From 61c703f45ed58689573a33a5b550d4dd933c5d91 Mon Sep 17 00:00:00 2001 From: Nicholas Robinson <nwrobins@edu.uwaterloo.ca> Date: Wed, 12 Feb 2020 22:01:50 -0500 Subject: [PATCH] AstBuilding & Cleaning - added AstBuilding.py - new AstNodes.py that stores all Nodes and different types that we'll create - cleaned up Parsing.py heavily, now returns the parse tree - cleaned up Test.py as best I could to accomodate everyone - removed not needed file utils.py - commented out my code in Weeding.py --- AstBuilding.py | 69 +++++++++++++++++++++++++++++++++++++++++++ AstNodes.py | 58 ++++++++++++++++++++++++++++++++++++ Parsing.py | 65 +++++++++++++++------------------------- Test.py | 80 +++++++++++++++++++++++++++++++------------------- Weeding.py | 30 +++++++++---------- utils.py | 49 ------------------------------- 6 files changed, 215 insertions(+), 136 deletions(-) create mode 100644 AstBuilding.py create mode 100644 AstNodes.py delete mode 100644 utils.py diff --git a/AstBuilding.py b/AstBuilding.py new file mode 100644 index 0000000..8a59878 --- /dev/null +++ b/AstBuilding.py @@ -0,0 +1,69 @@ +import string +from AstNodes import * + +##################### Build AST ########################################## +# go through parse tree +# determine what nodes are going to be converted into specific type of node +# condense tree (a->b->c => a->c) + +# node: Node +def astbuildNode(node): + newNode = None + if (node.name == 'classDcl'): + newNode = ClassDcl(node) + else: + newNode = Node(node.name, node.lex, []) + + childs = node.children + newChilds = [] + + for c in childs: + newChilds.append(astbuildNode(c)) + + newNode.children = newChilds + + return newNode + +# tree: Node +def astbuild(tree): + try: + ast = astbuildNode(tree) + except: + return (None, 'Couldn\'t build AST') + + return (ast, 'success') + +# ast: Node +def weed(ast): + res = ast.weed() + for c in ast.children: + res += weed(c) + return res + +##################### Tests ########################################## + +x = Node('start', '', [ + Node('BOF', 'BOF', []), + Node('packageDcl', '', []), + Node('importDcls', '', []), + Node('topDcls', '', [ + Node('topDcl', '', [ + Node('classDcl', '', [ + Node('classMod', '', [ + Node('PUBLIC', 'public', []), + ]), + Node('CLASS', 'class', []), + Node('ID', 'MyClass', []), + Node('superclass', '', []), + Node('superInterface', '', []), + Node('classBody', '', [ + Node('LBRACK', '(', []), + Node('classBodyDcls', '', []), + Node('RBRACK', ')', []) + ]), + ]) + ]), + Node('topDcls', '', []) + ]) +]) +# print(astbuild(x)) \ No newline at end of file diff --git a/AstNodes.py b/AstNodes.py new file mode 100644 index 0000000..73c1b2d --- /dev/null +++ b/AstNodes.py @@ -0,0 +1,58 @@ +import string +import sys + +##################### Nodes ########################################## + +# children: Node[] +# name: string +# lex: string +class Node(): + def __init__(self, name, lex, children): + self.name = name + self.lex = lex + self.children = children + def __str__(self): + return printNodePretty(self) + def weed(self): + return '' + +# node: Node +class ClassDcl(Node): + def __init__(self, node): + self.name = node.name + self.lex = node.lex + self.children = node.children + self.classMod = node.children[0] + self.ID = node.children[2].lex + self.superClass = node.children[3] + self.superInterface = node.children[4] + self.classBody = node.children[5] + def weed(self): + # Every class must contain at least one explicit constructor + hasConstructor = False + classBodyDcls = self.classBody.children[1] + for d in classBodyDcls.children: + if d.children: + if d.children[0].name == 'constructorDcl': + hasConstructor = True + if not hasConstructor: + return 'Class ' + self.ID + ' does not contain a constructor.' + + return '' + + +##################### Helpers ########################################## + +# node: Node +# prefix: string +# last: boolean +def printNodePretty(node, prefix='', last=True): + res = prefix + res += '`- ' if last else '|- ' + res += node.name + ' ' + node.lex + '\n' + prefix += ' ' if last else '| ' + num = len(node.children) + for i, child in enumerate(node.children): + last = i == (num - 1) + res += printNodePretty(child, prefix, last) + return res \ No newline at end of file diff --git a/Parsing.py b/Parsing.py index 2500a8e..ac82a44 100644 --- a/Parsing.py +++ b/Parsing.py @@ -1,18 +1,9 @@ import string import sys from Scanning import Token +from AstNodes import Node -##################### Node & State ########################################## - -# TODO: different types for nodes -# children: Node[] -# name: string -# lex: string -class Node(): - def __init__(self, children, name, lex): - self.children = children - self.name = name - self.lex = lex +##################### State ########################################## # A State has a list of rules, and a num indicating which state it is # num: int @@ -21,6 +12,12 @@ class State(): def __init__(self): self.num = -1 self.rules = [] + def __str__(self): + print('state ' + self.num) + print('rules: ['), + for r in self.rules: + print(r + ', '), + print(']') # addRule simply adds the string rule to rules # rule: string @@ -48,14 +45,13 @@ class State(): if not ruleToUse: raise ValueError('No rule found for next=({}) in self.rules'.format(next), - self.rules, statesVisited, rulesToOutput) + theStack, rulesToOutput) # what type of rule is it? shift or reduce? if (ruleToUse[2] == 'shift'): self.goToState(statesVisited, ruleToUse) return True elif (ruleToUse[2] == 'reduce'): - # print('reduce') ruleUsed = int(ruleToUse[3]) cfgrule = cfgrules[ruleUsed] toPop = len(cfgrule) - 1 @@ -65,29 +61,20 @@ class State(): while (toPop != 0): statesVisited.pop() top = theStack.pop() - newChildren.insert(0, Node(top.children, top.name, top.lex)) + newChildren.insert(0, Node(top.name, top.lex, top.children)) toPop -= 1 - newNode = Node(newChildren, cfgrule[0], '') + newNode = Node(cfgrule[0], '', newChildren) theStack.append(newNode) return False else: - raise ValueError('rule neither shift nor reduce.') - - def output(self): - print('state ' + self.num) - print('rules: ['), - for r in self.rules: - print(r + ', '), - print(']') + raise ValueError('rule neither shift nor reduce.', ruleToUse) ##################### Globals ############################### -# TODO: are these okay as globals?? Also, states is manually set up as the length, should we fix this? cfgrules = [] # String[][] -lr1trans = [] # String[][] theStack = [] # Node[] states = [] # State[] @@ -106,19 +93,15 @@ def checkSequence(tokens): curState = int(statesVisited[-1]) if getNext: - # print('*** GET NEXT') rulesToOutput.append(tokens[i].name + ' ' + tokens[i].lex) - newNode = Node([], tokens[i].name, tokens[i].lex) + newNode = Node(tokens[i].name, tokens[i].lex, []) theStack.append(newNode) # get the next tokens if i >= len(tokens) - 1: break i += 1 - # print('>>> NEXT TOKEN WE SEE: ' + tokens[i].name + ' ' + tokens[i].lex) elif theStack: - # print('*** DONT GET NEXT') - getNext = states[curState].trans(statesVisited, theStack[-1].name, rulesToOutput) curState = int(statesVisited[-1]) @@ -131,21 +114,22 @@ def checkSequence(tokens): lastRule = False break if lastRule: + result = Node(cfgrules[0][0], '', theStack) rulesToOutput.append(' '.join(cfgrules[0])) else: - raise ValueError('last rule not found') + raise ValueError('last rule not found', theStack) - return rulesToOutput + return result +# TODO: set this grammar up once instead of for each file # set up grammar from files and set up states def setUpGrammar(): - global cfgrules, lr1trans, theStack, states + global cfgrules, theStack, states + cfgrules = [] # String[][] + theStack = [] # Node[] + states = [] # State[] - # reset global vars (TODO: there has to be a better way???) - cfgrules = [] - lr1trans = [] - theStack = [] - states = [] + lr1trans = [] # String[][] # one big file with open('cfg/trans.txt', 'r') as f: @@ -186,6 +170,5 @@ def parse(tokens): result = checkSequence(tokens) except ValueError as err: return (None, err) - - # printNodeStack(theStack) - return (result, "success") + + return (result, 'success') diff --git a/Test.py b/Test.py index e92c827..ce6a25f 100644 --- a/Test.py +++ b/Test.py @@ -5,27 +5,32 @@ from os.path import isfile, join from Scanning import scan from Parsing import parse import Weeding +import AstBuilding def allFiles(testDir): - return [testDir + f for f in listdir(testDir) if isfile(join(testDir, f)) and f.startswith('Je')] + return [testDir + f for f in listdir(testDir) if isfile(join(testDir, f)) and f.startswith('J1')] def main(): - # All files in the test directory - testDirectory = "./Tests/" - testFiles = allFiles(testDirectory) + if sys.argv[1]: + testFiles = ["./Tests/" + sys.argv[1]] + else: + # All files in the test directory + testDirectory = "./Tests/" + testFiles = allFiles(testDirectory) print("**********************************************************") for f in testFiles: - # print(f) + print(f) + content = open(f, "r").read() # Scanning - content = open(f, "r").read() - (tokens, errorString) = scan(content) + (tokens, error) = scan(content) + # Error in Scanning if tokens is None: - # print("ERROR in Scanning: " + errorString) + print("ERROR in Scanning: " + error) print("**********************************************************") continue @@ -33,40 +38,53 @@ def main(): for token in tokens: if (token.name and token.lex): s += '(' + token.name + ',' + token.lex + '), ' - print(s) + # print(s) + # Weeding after scanning # No weeds if everything is good (weeds = None) weeds = Weeding.fileNameCheck(tokens, f) if weeds: + print("ERROR in Weeding after Scanning:") print(weeds) + print("**********************************************************") continue - - # Parsing - # print("Parsing starts") - - # try: - # (steps, errorString) = parse(tokens) - # except: - # print("Exception in Parsing") - # - # # Error in Parsing - # if steps is None: - # print("ERROR in Parsing: ", errorString) - # print("**********************************************************") - # continue + tree = None + try: + (tree, error) = parse(tokens) + except: + print("Exception in Parsing") # Error in Parsing - if steps is None: - # print("ERROR in Parsing: ", errorString) - # print("**********************************************************") + if tree is None: + print("ERROR in Parsing:") + print(error) + for i in error.args: + print(i) + print("**********************************************************") continue - print(f) - print("Succeeded") - # print("All Steps:") - # print(steps) + # AST Building + try: + (ast, error) = AstBuilding.astbuild(tree) + except: + print("Exception in AstBuilding") + + # Error in AstBuilding + if ast is None: + print("ERROR in AstBuilding: " + error) + print("**********************************************************") + continue + + # Weeding after AST + weeds = AstBuilding.weed(ast) + if weeds: + print("ERROR in Weeding on AST: " + weeds) + print("**********************************************************") + continue + + print("Succeeded") print("**********************************************************") -main() +main() \ No newline at end of file diff --git a/Weeding.py b/Weeding.py index 6f2f304..61c6adc 100644 --- a/Weeding.py +++ b/Weeding.py @@ -46,21 +46,21 @@ def extendCheck(tokens): # node: Node[] -def oneConstructor(node, insideClass): - success = False - if not insideClass: - if node.name == 'classDcl': - for c in node.children: - success = oneConstructor(c, True) - if success: - return True - elif insideClass: - if node.name == 'constructorDcl': - return True +# def oneConstructor(node, insideClass): +# success = False +# if not insideClass: +# if node.name == 'classDcl': +# for c in node.children: +# success = oneConstructor(c, True) +# if success: +# return True +# elif insideClass: +# if node.name == 'constructorDcl': +# return True # tree: Node[] -def weed(tree): - # Every class must contain at least one explicit constructor - if oneConstructor(tree, False) == False: - print('a class does not have an explicit constructor') +# def weed(tree): +# # Every class must contain at least one explicit constructor +# if oneConstructor(tree, False) == False: +# print('a class does not have an explicit constructor') diff --git a/utils.py b/utils.py deleted file mode 100644 index 623e5dd..0000000 --- a/utils.py +++ /dev/null @@ -1,49 +0,0 @@ -import string - -# ------- Utility functions - -# a: stack -def printStack(a): - print('['), - for i in a: - print(i + ', '), - print(']') - -# a: Node -def printNode(a): - print(a.name), - if (a.lex != ''): - print(a.lex), - - for i in a.children: - print(i.name), - - if a.children: - print('\n<children>') - for i in a.children: - printNode(i) - print('</children>') - else: - print('\n'), - -# a: stack<Node> -def printNodeStack(a): - for i in a: - printNode(i) - -# print all the states we have, with their respective -# rules and accepting lists -def printAllStates(): - j = 0 - for i in states: - print(j) - j += 1 - i.output() - -# tokens: Token[] -def printTokenList(tokens): - s = "All Tokens:\n" - for token in tokens: - if (token.name and token.lex): - s += '(' + token.name + ',' + token.lex + '), ' - print(s) \ No newline at end of file -- GitLab