diff --git a/Parser b/Parser index 0440ebd7ba1e19ef0801e8a356e211c737d58823..82c275741d8da0deb6c35c9eab373c8d70a19251 100755 Binary files a/Parser and b/Parser differ diff --git a/README.md b/README.md index 210ac94be33f02a98fe6aeaff18bdd703de06355..2edc947111d1729a782ec6bd2e264102e7a5ec68 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ -To run the parser, run `make` then run `./SQLPParser`. An example of a test input is `select s.name from student s where s.name = "John";`. Currently a syntax error will be reported when the semi-colon is read, but the parse tree that is outputted is correct. +To run the SQLP parser, run `make` then run `./SQLPParser`. An example of a test input is `select s.name from student s where s.name = "John" and s.num = 10;`. Currently a syntax error will be reported when the semi-colon is read, but the parse tree that is outputted is correct. The grammar is located in `SQLPGrammar.y`, and non-keyword types (such as `INTEGER`, `IDENTIFIER`, `STRING`) are defined in `SQLPScanner.l`. `util.c` contains the functions needed to build the con cell structure. +For parsing any general string into a tree of `cons_cells`, run `make` then run `./Parser`. An example of a test input is `PROJ (AS (SPCOL (VAR (s) PF (ATTR (name)))) ATOM (TABLE (student) VAR (s)))`. Every clause encosed in brackets is nested within a child `cons_cell`, and any two adjacent terms are connected via the `next` pointer. + A common way of defining the behavior of a process or thread in cases where performance is critical is in terms of a collection of n+1 files containing code in the C programming language: n "module" files with a ".c" suffix that diff --git a/SQLPParser b/SQLPParser index e22a4f61718ea40e54e8f783f0c190796c33b7fb..0645f020f73123bc69119ce5228dfdf102a26ea1 100755 Binary files a/SQLPParser and b/SQLPParser differ diff --git a/parser.c b/parser.c index 0f1002e1f4fbd1ffbb9061f970a7c123a6efaa12..276c7e66f96c313d490675ba4137e3b3b55f2542 100644 --- a/parser.c +++ b/parser.c @@ -10,8 +10,10 @@ cons_cell* parse(char* input, int size) { return NULL; } if (input[0] == '(') { + // Nested case int count_bracket = 1; int it = 0; + // Find where the brackets enclose while (count_bracket > 0) { it++; if (input[it] == '(') { @@ -23,16 +25,26 @@ cons_cell* parse(char* input, int size) { char buffer[it]; strncpy(buffer, &input[1], it - 1); buffer[it - 1] = '\0'; + + // Recurse on what is enclosed by the brackets cons_cell* car_cons = parse(buffer, it - 1); + + // Copy everything after the brackets int next_char = it + 1; while (input[next_char] == ' ' && input[next_char] != NULL) { next_char++; } char buffer_next[500]; strncpy(buffer_next, &input[next_char], size - next_char + 1); + + // Recurse on everything after the brackets cons_cell* cdr_cons = parse(buffer_next, size - next_char); + return create_cons_cell(car_cons, cdr_cons); } else { + // Not nested case + + // Copy the first term int first_space_idx = 0; while (input[first_space_idx] != ' ' && input[first_space_idx] != NULL) { first_space_idx++; @@ -40,6 +52,8 @@ cons_cell* parse(char* input, int size) { char buff_first[500]; strncpy(buff_first, input, first_space_idx); buff_first[first_space_idx] = '\0'; + + // Create cons_cell for first term cons_cell* car_cons = create_cons_cell_w_atom(buff_first, NULL); char buff_other[500]; int next_char = first_space_idx; @@ -47,7 +61,9 @@ cons_cell* parse(char* input, int size) { next_char++; } + // Copy everything after the first term strncpy(buff_other, &input[next_char], size - next_char + 1); + // Recurse on everything after the first term cons_cell* cdr_cons = parse(buff_other, size - next_char); car_cons->cdr = cdr_cons; return car_cons; diff --git a/util.h b/util.h index e225a0f572d0978cd12794c33b1c3b1f7cb4a8ff..ef4abde1d4f60ed956095b8c587de602ab2586f7 100644 --- a/util.h +++ b/util.h @@ -32,6 +32,35 @@ atom* create_atom(char* val); // Creates a cons_cell that has an atom as its car cons_cell* create_cons_cell_w_atom(char* val, cons_cell* cdr); +/* + +Below comment block are functions that implement the following relational +algebra expressions for SQL queries: + +<ra> := (comp <op> <term> <term> <ra>) + | (atom <table> <var>) + | (union-all <ra> <ra>) + | (cross <ra> <ra>) + | (proj ((rename <col> <col>)) <ra>) + | (not <ra> <ra>) + | (elim <ra>) + | (limit n <ra> <ra>) + | (eval <ra> <ra>) + +<term> := <col> | <const> +<col> := <spcol> | <scol> +<spcol> := <var> "." <pf> // for SQLP +<scol> := <var> "." <attr> // for SQL +<pf> := <attr> <attr> +<var> := identifier +<attr> := identifier +<table> := identifier +<term> := identifier +<const> := identifier +<op> := [GT, GE, EQ, LE, LT] + +*/ + // The following are all helper functions to create the // various relational algebra expressions cons_cell* create_spcol(cons_cell* var, cons_cell* pf); diff --git a/util.o b/util.o index 5cfc16b647bdf6acfb66fb5796182f59bc5e1c47..997f706995632ac4a978ec92192bd65c22298e13 100644 Binary files a/util.o and b/util.o differ