From f7924ced4c8dd5319c47d865be8577bfdb988a58 Mon Sep 17 00:00:00 2001
From: Lu Wang <l423wang@uwaterloo.ca>
Date: Wed, 14 Nov 2018 18:21:45 -0500
Subject: [PATCH] Added grammar rules, building parse tree

---
 Makefile      |  17 +++++
 SQLPGrammar.y | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++
 SQLPParser.c  |  37 +++++++++
 SQLPScanner.l | 164 ++++++++++++++++++++++++++++++++++++++++
 util.c        |  32 ++++++++
 util.h        |  38 ++++++++++
 6 files changed, 490 insertions(+)
 create mode 100644 Makefile
 create mode 100644 SQLPGrammar.y
 create mode 100644 SQLPParser.c
 create mode 100644 SQLPScanner.l
 create mode 100644 util.c
 create mode 100644 util.h

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..89f986e
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,17 @@
+.PHONY: all clean
+
+all: SQLPParser
+
+
+SQLPParser: SQLPParser.c SQLPGrammar.y SQLPScanner.l util.o
+	bison --verbose -d SQLPGrammar.y
+	flex SQLPScanner.l
+	gcc -w SQLPParser.c util.o -o SQLPParser 
+	rm -f lex.yy.c SQLPGrammar.tab.c SQLPGrammar.tab.h 
+
+util.o: util.c util.h
+	gcc -c util.c
+
+clean:
+	rm -f lex.yy.c *.tab.c *.tab.h *.fasl
+
diff --git a/SQLPGrammar.y b/SQLPGrammar.y
new file mode 100644
index 0000000..4077a3c
--- /dev/null
+++ b/SQLPGrammar.y
@@ -0,0 +1,202 @@
+%{
+	#include "util.h"
+%}
+
+// %union {
+// 	struct node *n;
+//}
+
+%define api.value.type {struct node *}
+%token IMPLIES OR AND NOT LE GE LT GT NE HAS MAX MIN AS ASC DESC MOD ASSIGN EQ STAR COMMA DOT
+
+%token SIZE SELECTIVITY OVERLAP
+%token FREQUENCY UNIT TIME SPACE
+
+%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
+
+%token STORE STORING DYNAMIC STATIC OF TYPE ORDERED BY
+%token INDEX LIST ARRAY BINARY TREE DISTRIBUTED POINTER
+
+%token SCHEMA  CLASS ISA PROPERTIES CONSTRAINTS PROPERTY
+%token ON DETERMINED COVER QUERY GIVEN FROM SELECT WHERE ORDER
+%token PRECOMPUTED ONE EXIST FOR ALL TRANSACTION INTCLASS STRCLASS
+%token INTEGER REAL DOUBLEREAL STRING MAXLEN RANGE TO
+%token INSERT END CHANGE DELETE DECLARE RETURN UNION
+
+%start SQLPProgram
+%%
+SQLPProgram
+    : Query
+        { printf("Input Query\n"); 
+          struct node *n = new_node(1, SQLPProgram);
+          n->children[0] = $1;
+          printf("Printing Tree\n");
+          print_tree(n, 0);
+        }
+    ;
+
+Identifier
+	: IDENTIFIER
+		{ printf("|%s| ", yytext);
+		  $$ = new_node(1, Identifier);
+		  $$->children[0] = $1; 
+		}
+	;
+ 
+Query
+	: Union_Query
+		{ printf("Union Query\n");
+		  $$ = new_node(1, Query);
+		  $$->children[0] = $1;
+		}
+	;
+
+Select_Query
+	: SELECT Select_List Body
+		{ printf("SQLP Query\n");
+		  $$ = new_node(3, Select_Query);
+		  $$->children[0] = new_node(0, SELECT);
+		  $$->children[1] = $2;
+		  $$->children[2] = $3;
+		}
+	;
+
+Body
+	: FROM TablePath
+		{ printf("Body 1\n");
+		  $$ = new_node(2, Body);
+		  $$->children[0] = new_node(0, FROM);
+		  $$->children[1] = $2;}
+	| FROM TablePath WHERE Bool
+		{ printf("Body 2\n"); 
+		  $$ = new_node(4, Body);
+		  $$->children[0] = new_node(0, FROM);
+		  $$->children[1] = $2;
+		  $$->children[2] = new_node(0, WHERE);
+		  $$->children[3] = $4;
+        }
+    | FROM TablePath WHERE Pred
+		{ printf("Body 3\n"); 
+		  $$ = new_node(4, Body);
+		  $$->children[0] = new_node(0, FROM);
+		  $$->children[1] = $2;
+		  $$->children[2] = new_node(0, WHERE);
+		  $$->children[3] = $4;
+        }
+	;
+
+TablePath
+    : Identifier Identifier
+        { printf("table path1\n");
+          $$ = new_node(2, TablePath);
+		  $$->children[0] = $1;
+		  $$->children[1] = $2;
+    	}
+    | Identifier Identifier ',' TablePath
+        { printf("table path2\n");
+          $$ = new_node(4, TablePath);
+          $$->children[0] = $1;
+          $$->children[1] = $2;
+          $$->children[2] = new_node(0, COMMA);
+          $$->children[3] = $4;
+    	}
+    ;
+
+Union_Query
+   : Select_Query
+        { printf("union query 1\n");
+          $$ = new_node(1, Union_Query);
+          $$->children[0] = $1;
+      	}
+   | Union_Query UNION Select_Query
+   		{ printf("union query 2\n");
+   		  $$ = new_node(3, Union_Query);
+          $$->children[0] = $1;
+          $$->children[1] = new_node(0, UNION);
+          $$->children[2] = $3;
+        }
+   ;
+
+Select_List
+	: STAR 
+		{ printf("star\n");
+		  $$ = new_node(1, Select_List);
+		  $$->children[0] = new_node(0, STAR);
+		}
+	| AttrPath
+		{ printf("select list attr path\n");
+		  $$ = new_node(1, Select_List);
+          $$->children[0] = $1;
+      	}
+	| AttrPath ',' Select_List
+		{ printf("Select list\n"); 
+		  $$ = new_node(3, Select_List);
+		  $$->children[0] = $1;
+		  $$->children[1] = new_node(0, COMMA);
+		  $$->children[2] = $3;
+		}
+	;
+
+AttrPath 
+	: Identifier
+		{ printf("path id\n");
+		  $$ = new_node(1, AttrPath);
+		  $$->children[0] = $1;
+		}
+	| Identifier '.' AttrPath
+		{ printf("Path Function\n");
+		  $$ = new_node(3, AttrPath);
+		  $$->children[0] = $1;
+		  $$->children[1] = new_node(0, DOT);
+		  $$->children[2] = $3;
+		}
+	;
+Operator
+	: EQ
+	| NE
+	| LE
+	| GE
+	| LT
+	| GT
+	;
+
+Bool
+	: AttrPath Operator AttrPath
+		{ printf("AttrPath op AttrPath\n");
+		  $$ = new_node(3, Bool);
+		  $$->children[0] = $1;
+		  $$->children[1] = $2;
+		  $$->children[2] = $3;
+		}
+	| AttrPath Operator CONSTANT
+		{ printf("AttrPath op Constant\n");
+		  $$ = new_node(3, Bool);
+		  $$->children[0] = $1;
+		  $$->children[1] = $2;
+		  $$->children[2] = new_node(0, CONSTANT);
+		}
+	;
+
+Pred
+	: Bool AND Pred
+		{ printf("pred and pred\n");
+		  $$ = new_node(3, Pred);
+		  $$->children[0] = $1;
+		  $$->children[1] = new_node(0, AND);
+		  $$->children[2] = $3;
+
+		}
+	| NOT Pred
+		{ printf("Not Pred\n");
+		  $$ = new_node(2, Pred);
+		  $$->children[0] = new_node(0, NOT);
+		  $$->children[1] = $2;
+
+		}
+	| EXIST '(' Query ')'
+		{ printf("Exist query\n");
+		  $$ = new_node(2, Pred);
+		  $$->children[0] = new_node(0, EXIST);
+		  $$->children[1] = $3;
+		} 
+	;
\ No newline at end of file
diff --git a/SQLPParser.c b/SQLPParser.c
new file mode 100644
index 0000000..5421d36
--- /dev/null
+++ b/SQLPParser.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 1989, G. E. Weddell.
+ *
+ * This file is part of RDM.
+ *
+ * RDM is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * RDM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with RDM.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <util.h>
+#include "lex.yy.c"
+#include "SQLPGrammar.tab.c"
+
+int main()
+{
+  int Result;  
+  strcpy(LineBuffer, "");
+  printf("(\n");
+  Result = yyparse();
+  if (Result)
+    printf("**error**");
+  else
+    fprintf(stderr, "checking semantics.\n");
+  printf(")\n");
+  return (Result);
+}
\ No newline at end of file
diff --git a/SQLPScanner.l b/SQLPScanner.l
new file mode 100644
index 0000000..e358f7e
--- /dev/null
+++ b/SQLPScanner.l
@@ -0,0 +1,164 @@
+%{
+/*
+ * Copyright (C) 1989, G. E. Weddell.
+ *
+ * This file is part of RDM.
+ *
+ * RDM is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * RDM is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with RDM.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "SQLPGrammar.tab.h"
+#include <stdio.h>
+#include <ctype.h>
+
+int LineNumber = 1;
+char LineBuffer[200];
+%}
+
+%p 3000
+
+D			[0-9]
+L			[a-zA-Z_]
+E			[Ee][+-]?{D}+
+
+%%
+
+\%.*\n			{ LineNumber++; }
+\n			{ LineNumber++; strcpy(LineBuffer, ""); }
+"all"			{ strcat(LineBuffer, yytext); return(ALL); }
+"and"			{ strcat(LineBuffer, yytext); return(AND); }
+"array"			{ strcat(LineBuffer, yytext); return(ARRAY); }
+"as"			{ strcat(LineBuffer, yytext); return(AS); }
+"asc"			{ strcat(LineBuffer, yytext); return(ASC); }
+"binary"		{ strcat(LineBuffer, yytext); return(BINARY); }
+"by"			{ strcat(LineBuffer, yytext); return(BY); }
+"change"		{ strcat(LineBuffer, yytext); return(CHANGE); }
+"class"			{ strcat(LineBuffer, yytext); return(CLASS); }
+"constraints"		{ strcat(LineBuffer, yytext); return(CONSTRAINTS); }
+"cover"			{ strcat(LineBuffer, yytext); return(COVER); }
+"declare"		{ strcat(LineBuffer, yytext); return(DECLARE); }
+"delete"		{ strcat(LineBuffer, yytext); return(DELETE); }
+"desc"			{ strcat(LineBuffer, yytext); return(DESC); }
+"union"			{ strcat(LineBuffer, yytext); return(UNION); }
+"determined"		{ strcat(LineBuffer, yytext); return(DETERMINED); }
+"distributed"		{ strcat(LineBuffer, yytext); return(DISTRIBUTED); }
+"dynamic"		{ strcat(LineBuffer, yytext); return(DYNAMIC); }
+"exist"			{ strcat(LineBuffer, yytext); return(EXIST); }
+"for"			{ strcat(LineBuffer, yytext); return(FOR); }
+"frequency"		{ strcat(LineBuffer, yytext); return(FREQUENCY); }
+"from"			{ strcat(LineBuffer, yytext); return(FROM); }
+"given"			{ strcat(LineBuffer, yytext); return(GIVEN); }
+"has"			{ strcat(LineBuffer, yytext); return(HAS); }
+"implies"		{ strcat(LineBuffer, yytext); return(IMPLIES); }
+"index"			{ strcat(LineBuffer, yytext); return(INDEX); }
+"insert"		{ strcat(LineBuffer, yytext); return(INSERT); }
+"Integer"		{ strcat(LineBuffer, yytext); return(INTCLASS); }
+"isa"			{ strcat(LineBuffer, yytext); return(ISA); }
+"list"			{ strcat(LineBuffer, yytext); return(LIST); }
+"max"			{ strcat(LineBuffer, yytext); return(MAX); }
+"maxlen"		{ strcat(LineBuffer, yytext); return(MAXLEN); }
+"min"			{ strcat(LineBuffer, yytext); return(MIN); }
+"mod"			{ strcat(LineBuffer, yytext); return(MOD); }
+"not"			{ strcat(LineBuffer, yytext); return(NOT); }
+"of"			{ strcat(LineBuffer, yytext); return(OF); }
+"on"			{ strcat(LineBuffer, yytext); return(ON); }
+"one"			{ strcat(LineBuffer, yytext); return(ONE); }
+"or"			{ strcat(LineBuffer, yytext); return(OR); }
+"order"			{ strcat(LineBuffer, yytext); return(ORDER); }
+"ordered"		{ strcat(LineBuffer, yytext); return(ORDERED); }
+"overlap"		{ strcat(LineBuffer, yytext); return(OVERLAP); }
+"pointer"		{ strcat(LineBuffer, yytext); return(POINTER); }
+"precomputed"		{ strcat(LineBuffer, yytext); return(PRECOMPUTED); }
+"property"		{ strcat(LineBuffer, yytext); return(PROPERTY); }
+"properties"		{ strcat(LineBuffer, yytext); return(PROPERTIES); }
+"query"			{ strcat(LineBuffer, yytext); return(QUERY); }
+"range"			{ strcat(LineBuffer, yytext); return(RANGE); }
+"return"		{ strcat(LineBuffer, yytext); return(RETURN); }
+"schema"		{ strcat(LineBuffer, yytext); return(SCHEMA); }
+"select"		{ strcat(LineBuffer, yytext); return(SELECT); }
+"selectivity"		{ strcat(LineBuffer, yytext); return(SELECTIVITY); }
+"size"			{ strcat(LineBuffer, yytext); return(SIZE); }
+"space"			{ strcat(LineBuffer, yytext); return(SPACE); }
+"static"		{ strcat(LineBuffer, yytext); return(STATIC); }
+"store"			{ strcat(LineBuffer, yytext); return(STORE); }
+"storing"		{ strcat(LineBuffer, yytext); return(STORING); }
+"String"		{ strcat(LineBuffer, yytext); return(STRCLASS); }
+"time"			{ strcat(LineBuffer, yytext); return(TIME); }
+"to"			{ strcat(LineBuffer, yytext); return(TO); }
+"transaction"		{ strcat(LineBuffer, yytext); return(TRANSACTION); }
+"tree"			{ strcat(LineBuffer, yytext); return(TREE); }
+"type"			{ strcat(LineBuffer, yytext); return(TYPE); }
+"unit"			{ strcat(LineBuffer, yytext); return(UNIT); }
+"where"			{ strcat(LineBuffer, yytext); return(WHERE); }
+
+{L}({L}|{D})*		{ strcat(LineBuffer, yytext); return(IDENTIFIER); }
+
+{D}+			{ strcat(LineBuffer, yytext); return(INTEGER); }
+{D}+{E}			{ strcat(LineBuffer, yytext); return(REAL); }
+{D}*"."{D}+({E})?	{ strcat(LineBuffer, yytext); return(REAL); }
+{D}+"."{D}*({E})?	{ strcat(LineBuffer, yytext); return(REAL); }
+
+\"(\\.|[^\\"])*\"	{ strcat(LineBuffer, yytext); return(STRING); }
+
+":="			{ strcat(LineBuffer, yytext); return(ASSIGN); }
+"<="			{ strcat(LineBuffer, yytext); return(LE); }
+">="			{ strcat(LineBuffer, yytext); return(GE); }
+"<>"			{ strcat(LineBuffer, yytext); return(NE); }
+";"			{ strcat(LineBuffer, yytext); return(';'); }
+"{"			{ strcat(LineBuffer, yytext); return('{'); }
+"}"			{ strcat(LineBuffer, yytext); return('}'); }
+","			{ strcat(LineBuffer, yytext); return(','); }
+"="			{ strcat(LineBuffer, yytext); return(EQ); }
+"("			{ strcat(LineBuffer, yytext); return('('); }
+")"			{ strcat(LineBuffer, yytext); return(')'); }
+"["			{ strcat(LineBuffer, yytext); return('['); }
+"]"			{ strcat(LineBuffer, yytext); return(']'); }
+"."			{ strcat(LineBuffer, yytext); return('.'); }
+"-"			{ strcat(LineBuffer, yytext); return('-'); }
+"+"			{ strcat(LineBuffer, yytext); return('+'); }
+"*"			{ strcat(LineBuffer, yytext); return(STAR); }
+"/"			{ strcat(LineBuffer, yytext); return('/'); }
+"<"			{ strcat(LineBuffer, yytext); return('<'); }
+">"			{ strcat(LineBuffer, yytext); return('>'); }
+
+" "			{ strcat(LineBuffer, yytext); }
+"\r"			{ strcat(LineBuffer, yytext); }
+.			{ strcat(LineBuffer, yytext);
+			  yyerror("illegal character"); }
+%%
+
+yyerror(s)
+char *s;
+{
+   int i;
+   char c;
+   fprintf( stderr, "\n%s", LineBuffer );
+   c = input();
+   while ( c != '\n'  &&  c != 0 ) {
+      putc(c, stderr);
+      c = input();
+   }
+   putc('\n', stderr);
+   for (i=1; i <= strlen( LineBuffer ) - strlen( yytext ); i++)
+      putc(' ', stderr);
+   for (i=1; i <= strlen( yytext ); i++)
+      putc('^', stderr);
+   putc('\n', stderr);
+   fprintf ( stderr, "** error: %s found in line %d.\n\n", s, LineNumber );
+}
+
+yywrap()
+{
+   return(1);
+}
\ No newline at end of file
diff --git a/util.c b/util.c
new file mode 100644
index 0000000..e22cc07
--- /dev/null
+++ b/util.c
@@ -0,0 +1,32 @@
+#include "util.h"
+
+struct node *new_node(int count, int type) {
+	struct node *n = malloc(sizeof(struct node));
+	n->type = type;
+	n->count = count;
+	if (count > 0) n->children = malloc(sizeof(struct node*)*count);
+	return n;
+}
+
+
+const char *node_types[] = {"program", "id", "query", "select_query", "body", "table path", "union_query", 
+"select list", "attrpath", "operator", "bool", "pred"};
+
+void print_tree(struct node *root, int indent) {
+	if (root == NULL) return;
+	if (root->count == 0) return;
+	for (int i = 0; i < indent; i++) {
+		printf("  ");
+	}
+	printf("type: %s\n", node_types[root->type]);
+	for (int i = 0; i < root->count; i++) {
+		print_tree(root->children[i], indent+1);
+	}
+}
+
+void destroy_tree(struct node *root) {
+	for (int i = 0; i < root->count; i++) {
+		destroy_tree(root->children[i]);
+	}
+	free(root);
+}
\ No newline at end of file
diff --git a/util.h b/util.h
new file mode 100644
index 0000000..621ccb0
--- /dev/null
+++ b/util.h
@@ -0,0 +1,38 @@
+#ifndef UTIL_H
+#define UTIL_H
+
+#include <stdio.h>
+#include <stdlib.h>
+
+struct node {
+	int type;
+	int count;
+	struct node **children;
+};
+
+
+
+enum type {
+	SQLPProgram,
+	Identifier,
+	Query,
+	Select_Query, 
+	Body,
+	TablePath,
+	Union_Query,
+	Select_List,
+	AttrPath,
+	Operator,
+	Bool,
+	Pred
+};
+
+struct node *new_node(int count, int type);
+
+void print_tree(struct node *root, int indent);
+void destroy_tree(struct node *root);
+
+#endif
+
+
+
-- 
GitLab