From f7924ced4c8dd5319c47d865be8577bfdb988a58 Mon Sep 17 00:00:00 2001 From: Lu Wang <l423wang@uwaterloo.ca> Date: Wed, 14 Nov 2018 18:21:45 -0500 Subject: [PATCH] Added grammar rules, building parse tree --- Makefile | 17 +++++ SQLPGrammar.y | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++ SQLPParser.c | 37 +++++++++ SQLPScanner.l | 164 ++++++++++++++++++++++++++++++++++++++++ util.c | 32 ++++++++ util.h | 38 ++++++++++ 6 files changed, 490 insertions(+) create mode 100644 Makefile create mode 100644 SQLPGrammar.y create mode 100644 SQLPParser.c create mode 100644 SQLPScanner.l create mode 100644 util.c create mode 100644 util.h diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..89f986e --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +.PHONY: all clean + +all: SQLPParser + + +SQLPParser: SQLPParser.c SQLPGrammar.y SQLPScanner.l util.o + bison --verbose -d SQLPGrammar.y + flex SQLPScanner.l + gcc -w SQLPParser.c util.o -o SQLPParser + rm -f lex.yy.c SQLPGrammar.tab.c SQLPGrammar.tab.h + +util.o: util.c util.h + gcc -c util.c + +clean: + rm -f lex.yy.c *.tab.c *.tab.h *.fasl + diff --git a/SQLPGrammar.y b/SQLPGrammar.y new file mode 100644 index 0000000..4077a3c --- /dev/null +++ b/SQLPGrammar.y @@ -0,0 +1,202 @@ +%{ + #include "util.h" +%} + +// %union { +// struct node *n; +//} + +%define api.value.type {struct node *} +%token IMPLIES OR AND NOT LE GE LT GT NE HAS MAX MIN AS ASC DESC MOD ASSIGN EQ STAR COMMA DOT + +%token SIZE SELECTIVITY OVERLAP +%token FREQUENCY UNIT TIME SPACE + +%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF + +%token STORE STORING DYNAMIC STATIC OF TYPE ORDERED BY +%token INDEX LIST ARRAY BINARY TREE DISTRIBUTED POINTER + +%token SCHEMA CLASS ISA PROPERTIES CONSTRAINTS PROPERTY +%token ON DETERMINED COVER QUERY GIVEN FROM SELECT WHERE ORDER +%token PRECOMPUTED ONE EXIST FOR ALL TRANSACTION INTCLASS STRCLASS +%token INTEGER REAL DOUBLEREAL STRING MAXLEN RANGE TO +%token INSERT END CHANGE DELETE DECLARE RETURN UNION + +%start SQLPProgram +%% +SQLPProgram + : Query + { printf("Input Query\n"); + struct node *n = new_node(1, SQLPProgram); + n->children[0] = $1; + printf("Printing Tree\n"); + print_tree(n, 0); + } + ; + +Identifier + : IDENTIFIER + { printf("|%s| ", yytext); + $$ = new_node(1, Identifier); + $$->children[0] = $1; + } + ; + +Query + : Union_Query + { printf("Union Query\n"); + $$ = new_node(1, Query); + $$->children[0] = $1; + } + ; + +Select_Query + : SELECT Select_List Body + { printf("SQLP Query\n"); + $$ = new_node(3, Select_Query); + $$->children[0] = new_node(0, SELECT); + $$->children[1] = $2; + $$->children[2] = $3; + } + ; + +Body + : FROM TablePath + { printf("Body 1\n"); + $$ = new_node(2, Body); + $$->children[0] = new_node(0, FROM); + $$->children[1] = $2;} + | FROM TablePath WHERE Bool + { printf("Body 2\n"); + $$ = new_node(4, Body); + $$->children[0] = new_node(0, FROM); + $$->children[1] = $2; + $$->children[2] = new_node(0, WHERE); + $$->children[3] = $4; + } + | FROM TablePath WHERE Pred + { printf("Body 3\n"); + $$ = new_node(4, Body); + $$->children[0] = new_node(0, FROM); + $$->children[1] = $2; + $$->children[2] = new_node(0, WHERE); + $$->children[3] = $4; + } + ; + +TablePath + : Identifier Identifier + { printf("table path1\n"); + $$ = new_node(2, TablePath); + $$->children[0] = $1; + $$->children[1] = $2; + } + | Identifier Identifier ',' TablePath + { printf("table path2\n"); + $$ = new_node(4, TablePath); + $$->children[0] = $1; + $$->children[1] = $2; + $$->children[2] = new_node(0, COMMA); + $$->children[3] = $4; + } + ; + +Union_Query + : Select_Query + { printf("union query 1\n"); + $$ = new_node(1, Union_Query); + $$->children[0] = $1; + } + | Union_Query UNION Select_Query + { printf("union query 2\n"); + $$ = new_node(3, Union_Query); + $$->children[0] = $1; + $$->children[1] = new_node(0, UNION); + $$->children[2] = $3; + } + ; + +Select_List + : STAR + { printf("star\n"); + $$ = new_node(1, Select_List); + $$->children[0] = new_node(0, STAR); + } + | AttrPath + { printf("select list attr path\n"); + $$ = new_node(1, Select_List); + $$->children[0] = $1; + } + | AttrPath ',' Select_List + { printf("Select list\n"); + $$ = new_node(3, Select_List); + $$->children[0] = $1; + $$->children[1] = new_node(0, COMMA); + $$->children[2] = $3; + } + ; + +AttrPath + : Identifier + { printf("path id\n"); + $$ = new_node(1, AttrPath); + $$->children[0] = $1; + } + | Identifier '.' AttrPath + { printf("Path Function\n"); + $$ = new_node(3, AttrPath); + $$->children[0] = $1; + $$->children[1] = new_node(0, DOT); + $$->children[2] = $3; + } + ; +Operator + : EQ + | NE + | LE + | GE + | LT + | GT + ; + +Bool + : AttrPath Operator AttrPath + { printf("AttrPath op AttrPath\n"); + $$ = new_node(3, Bool); + $$->children[0] = $1; + $$->children[1] = $2; + $$->children[2] = $3; + } + | AttrPath Operator CONSTANT + { printf("AttrPath op Constant\n"); + $$ = new_node(3, Bool); + $$->children[0] = $1; + $$->children[1] = $2; + $$->children[2] = new_node(0, CONSTANT); + } + ; + +Pred + : Bool AND Pred + { printf("pred and pred\n"); + $$ = new_node(3, Pred); + $$->children[0] = $1; + $$->children[1] = new_node(0, AND); + $$->children[2] = $3; + + } + | NOT Pred + { printf("Not Pred\n"); + $$ = new_node(2, Pred); + $$->children[0] = new_node(0, NOT); + $$->children[1] = $2; + + } + | EXIST '(' Query ')' + { printf("Exist query\n"); + $$ = new_node(2, Pred); + $$->children[0] = new_node(0, EXIST); + $$->children[1] = $3; + } + ; \ No newline at end of file diff --git a/SQLPParser.c b/SQLPParser.c new file mode 100644 index 0000000..5421d36 --- /dev/null +++ b/SQLPParser.c @@ -0,0 +1,37 @@ +/* + * Copyright (C) 1989, G. E. Weddell. + * + * This file is part of RDM. + * + * RDM is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * RDM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with RDM. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <util.h> +#include "lex.yy.c" +#include "SQLPGrammar.tab.c" + +int main() +{ + int Result; + strcpy(LineBuffer, ""); + printf("(\n"); + Result = yyparse(); + if (Result) + printf("**error**"); + else + fprintf(stderr, "checking semantics.\n"); + printf(")\n"); + return (Result); +} \ No newline at end of file diff --git a/SQLPScanner.l b/SQLPScanner.l new file mode 100644 index 0000000..e358f7e --- /dev/null +++ b/SQLPScanner.l @@ -0,0 +1,164 @@ +%{ +/* + * Copyright (C) 1989, G. E. Weddell. + * + * This file is part of RDM. + * + * RDM is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * RDM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with RDM. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "SQLPGrammar.tab.h" +#include <stdio.h> +#include <ctype.h> + +int LineNumber = 1; +char LineBuffer[200]; +%} + +%p 3000 + +D [0-9] +L [a-zA-Z_] +E [Ee][+-]?{D}+ + +%% + +\%.*\n { LineNumber++; } +\n { LineNumber++; strcpy(LineBuffer, ""); } +"all" { strcat(LineBuffer, yytext); return(ALL); } +"and" { strcat(LineBuffer, yytext); return(AND); } +"array" { strcat(LineBuffer, yytext); return(ARRAY); } +"as" { strcat(LineBuffer, yytext); return(AS); } +"asc" { strcat(LineBuffer, yytext); return(ASC); } +"binary" { strcat(LineBuffer, yytext); return(BINARY); } +"by" { strcat(LineBuffer, yytext); return(BY); } +"change" { strcat(LineBuffer, yytext); return(CHANGE); } +"class" { strcat(LineBuffer, yytext); return(CLASS); } +"constraints" { strcat(LineBuffer, yytext); return(CONSTRAINTS); } +"cover" { strcat(LineBuffer, yytext); return(COVER); } +"declare" { strcat(LineBuffer, yytext); return(DECLARE); } +"delete" { strcat(LineBuffer, yytext); return(DELETE); } +"desc" { strcat(LineBuffer, yytext); return(DESC); } +"union" { strcat(LineBuffer, yytext); return(UNION); } +"determined" { strcat(LineBuffer, yytext); return(DETERMINED); } +"distributed" { strcat(LineBuffer, yytext); return(DISTRIBUTED); } +"dynamic" { strcat(LineBuffer, yytext); return(DYNAMIC); } +"exist" { strcat(LineBuffer, yytext); return(EXIST); } +"for" { strcat(LineBuffer, yytext); return(FOR); } +"frequency" { strcat(LineBuffer, yytext); return(FREQUENCY); } +"from" { strcat(LineBuffer, yytext); return(FROM); } +"given" { strcat(LineBuffer, yytext); return(GIVEN); } +"has" { strcat(LineBuffer, yytext); return(HAS); } +"implies" { strcat(LineBuffer, yytext); return(IMPLIES); } +"index" { strcat(LineBuffer, yytext); return(INDEX); } +"insert" { strcat(LineBuffer, yytext); return(INSERT); } +"Integer" { strcat(LineBuffer, yytext); return(INTCLASS); } +"isa" { strcat(LineBuffer, yytext); return(ISA); } +"list" { strcat(LineBuffer, yytext); return(LIST); } +"max" { strcat(LineBuffer, yytext); return(MAX); } +"maxlen" { strcat(LineBuffer, yytext); return(MAXLEN); } +"min" { strcat(LineBuffer, yytext); return(MIN); } +"mod" { strcat(LineBuffer, yytext); return(MOD); } +"not" { strcat(LineBuffer, yytext); return(NOT); } +"of" { strcat(LineBuffer, yytext); return(OF); } +"on" { strcat(LineBuffer, yytext); return(ON); } +"one" { strcat(LineBuffer, yytext); return(ONE); } +"or" { strcat(LineBuffer, yytext); return(OR); } +"order" { strcat(LineBuffer, yytext); return(ORDER); } +"ordered" { strcat(LineBuffer, yytext); return(ORDERED); } +"overlap" { strcat(LineBuffer, yytext); return(OVERLAP); } +"pointer" { strcat(LineBuffer, yytext); return(POINTER); } +"precomputed" { strcat(LineBuffer, yytext); return(PRECOMPUTED); } +"property" { strcat(LineBuffer, yytext); return(PROPERTY); } +"properties" { strcat(LineBuffer, yytext); return(PROPERTIES); } +"query" { strcat(LineBuffer, yytext); return(QUERY); } +"range" { strcat(LineBuffer, yytext); return(RANGE); } +"return" { strcat(LineBuffer, yytext); return(RETURN); } +"schema" { strcat(LineBuffer, yytext); return(SCHEMA); } +"select" { strcat(LineBuffer, yytext); return(SELECT); } +"selectivity" { strcat(LineBuffer, yytext); return(SELECTIVITY); } +"size" { strcat(LineBuffer, yytext); return(SIZE); } +"space" { strcat(LineBuffer, yytext); return(SPACE); } +"static" { strcat(LineBuffer, yytext); return(STATIC); } +"store" { strcat(LineBuffer, yytext); return(STORE); } +"storing" { strcat(LineBuffer, yytext); return(STORING); } +"String" { strcat(LineBuffer, yytext); return(STRCLASS); } +"time" { strcat(LineBuffer, yytext); return(TIME); } +"to" { strcat(LineBuffer, yytext); return(TO); } +"transaction" { strcat(LineBuffer, yytext); return(TRANSACTION); } +"tree" { strcat(LineBuffer, yytext); return(TREE); } +"type" { strcat(LineBuffer, yytext); return(TYPE); } +"unit" { strcat(LineBuffer, yytext); return(UNIT); } +"where" { strcat(LineBuffer, yytext); return(WHERE); } + +{L}({L}|{D})* { strcat(LineBuffer, yytext); return(IDENTIFIER); } + +{D}+ { strcat(LineBuffer, yytext); return(INTEGER); } +{D}+{E} { strcat(LineBuffer, yytext); return(REAL); } +{D}*"."{D}+({E})? { strcat(LineBuffer, yytext); return(REAL); } +{D}+"."{D}*({E})? { strcat(LineBuffer, yytext); return(REAL); } + +\"(\\.|[^\\"])*\" { strcat(LineBuffer, yytext); return(STRING); } + +":=" { strcat(LineBuffer, yytext); return(ASSIGN); } +"<=" { strcat(LineBuffer, yytext); return(LE); } +">=" { strcat(LineBuffer, yytext); return(GE); } +"<>" { strcat(LineBuffer, yytext); return(NE); } +";" { strcat(LineBuffer, yytext); return(';'); } +"{" { strcat(LineBuffer, yytext); return('{'); } +"}" { strcat(LineBuffer, yytext); return('}'); } +"," { strcat(LineBuffer, yytext); return(','); } +"=" { strcat(LineBuffer, yytext); return(EQ); } +"(" { strcat(LineBuffer, yytext); return('('); } +")" { strcat(LineBuffer, yytext); return(')'); } +"[" { strcat(LineBuffer, yytext); return('['); } +"]" { strcat(LineBuffer, yytext); return(']'); } +"." { strcat(LineBuffer, yytext); return('.'); } +"-" { strcat(LineBuffer, yytext); return('-'); } +"+" { strcat(LineBuffer, yytext); return('+'); } +"*" { strcat(LineBuffer, yytext); return(STAR); } +"/" { strcat(LineBuffer, yytext); return('/'); } +"<" { strcat(LineBuffer, yytext); return('<'); } +">" { strcat(LineBuffer, yytext); return('>'); } + +" " { strcat(LineBuffer, yytext); } +"\r" { strcat(LineBuffer, yytext); } +. { strcat(LineBuffer, yytext); + yyerror("illegal character"); } +%% + +yyerror(s) +char *s; +{ + int i; + char c; + fprintf( stderr, "\n%s", LineBuffer ); + c = input(); + while ( c != '\n' && c != 0 ) { + putc(c, stderr); + c = input(); + } + putc('\n', stderr); + for (i=1; i <= strlen( LineBuffer ) - strlen( yytext ); i++) + putc(' ', stderr); + for (i=1; i <= strlen( yytext ); i++) + putc('^', stderr); + putc('\n', stderr); + fprintf ( stderr, "** error: %s found in line %d.\n\n", s, LineNumber ); +} + +yywrap() +{ + return(1); +} \ No newline at end of file diff --git a/util.c b/util.c new file mode 100644 index 0000000..e22cc07 --- /dev/null +++ b/util.c @@ -0,0 +1,32 @@ +#include "util.h" + +struct node *new_node(int count, int type) { + struct node *n = malloc(sizeof(struct node)); + n->type = type; + n->count = count; + if (count > 0) n->children = malloc(sizeof(struct node*)*count); + return n; +} + + +const char *node_types[] = {"program", "id", "query", "select_query", "body", "table path", "union_query", +"select list", "attrpath", "operator", "bool", "pred"}; + +void print_tree(struct node *root, int indent) { + if (root == NULL) return; + if (root->count == 0) return; + for (int i = 0; i < indent; i++) { + printf(" "); + } + printf("type: %s\n", node_types[root->type]); + for (int i = 0; i < root->count; i++) { + print_tree(root->children[i], indent+1); + } +} + +void destroy_tree(struct node *root) { + for (int i = 0; i < root->count; i++) { + destroy_tree(root->children[i]); + } + free(root); +} \ No newline at end of file diff --git a/util.h b/util.h new file mode 100644 index 0000000..621ccb0 --- /dev/null +++ b/util.h @@ -0,0 +1,38 @@ +#ifndef UTIL_H +#define UTIL_H + +#include <stdio.h> +#include <stdlib.h> + +struct node { + int type; + int count; + struct node **children; +}; + + + +enum type { + SQLPProgram, + Identifier, + Query, + Select_Query, + Body, + TablePath, + Union_Query, + Select_List, + AttrPath, + Operator, + Bool, + Pred +}; + +struct node *new_node(int count, int type); + +void print_tree(struct node *root, int indent); +void destroy_tree(struct node *root); + +#endif + + + -- GitLab