From 61c703f45ed58689573a33a5b550d4dd933c5d91 Mon Sep 17 00:00:00 2001
From: Nicholas Robinson <nwrobins@edu.uwaterloo.ca>
Date: Wed, 12 Feb 2020 22:01:50 -0500
Subject: [PATCH] AstBuilding & Cleaning

- added AstBuilding.py
- new AstNodes.py that stores all Nodes and different types that we'll
create
- cleaned up Parsing.py heavily, now returns the parse tree
- cleaned up Test.py as best I could to accomodate everyone
- removed not needed file utils.py
- commented out my code in Weeding.py
---
 AstBuilding.py | 69 +++++++++++++++++++++++++++++++++++++++++++
 AstNodes.py    | 58 ++++++++++++++++++++++++++++++++++++
 Parsing.py     | 65 +++++++++++++++-------------------------
 Test.py        | 80 +++++++++++++++++++++++++++++++-------------------
 Weeding.py     | 30 +++++++++----------
 utils.py       | 49 -------------------------------
 6 files changed, 215 insertions(+), 136 deletions(-)
 create mode 100644 AstBuilding.py
 create mode 100644 AstNodes.py
 delete mode 100644 utils.py

diff --git a/AstBuilding.py b/AstBuilding.py
new file mode 100644
index 0000000..8a59878
--- /dev/null
+++ b/AstBuilding.py
@@ -0,0 +1,69 @@
+import string
+from AstNodes import *
+
+##################### Build AST ##########################################
+# go through parse tree
+# determine what nodes are going to be converted into specific type of node
+# condense tree (a->b->c => a->c)
+
+# node: Node
+def astbuildNode(node):
+    newNode = None
+    if (node.name == 'classDcl'):
+        newNode = ClassDcl(node)
+    else:
+        newNode = Node(node.name, node.lex, [])
+
+    childs = node.children
+    newChilds    = []
+
+    for c in childs:
+        newChilds.append(astbuildNode(c))
+
+    newNode.children = newChilds
+
+    return newNode
+
+# tree: Node
+def astbuild(tree):
+    try:
+        ast = astbuildNode(tree)
+    except:
+        return (None, 'Couldn\'t build AST')
+
+    return (ast, 'success')
+
+# ast: Node
+def weed(ast):
+    res = ast.weed()
+    for c in ast.children:
+        res += weed(c)
+    return res
+
+##################### Tests ##########################################
+
+x = Node('start', '', [
+    Node('BOF', 'BOF', []),
+    Node('packageDcl', '', []),
+    Node('importDcls', '', []),
+    Node('topDcls', '', [
+        Node('topDcl', '', [
+            Node('classDcl', '', [
+                Node('classMod', '', [
+                    Node('PUBLIC', 'public', []),
+                ]),
+                Node('CLASS', 'class', []),
+                Node('ID', 'MyClass', []),
+                Node('superclass', '', []),
+                Node('superInterface', '', []),
+                Node('classBody', '', [
+                    Node('LBRACK', '(', []),
+                    Node('classBodyDcls', '', []),
+                    Node('RBRACK', ')', [])
+                ]),
+            ])
+        ]),
+        Node('topDcls', '', [])
+    ])
+])
+# print(astbuild(x))
\ No newline at end of file
diff --git a/AstNodes.py b/AstNodes.py
new file mode 100644
index 0000000..73c1b2d
--- /dev/null
+++ b/AstNodes.py
@@ -0,0 +1,58 @@
+import string
+import sys
+
+##################### Nodes ##########################################
+
+# children: Node[]
+# name:     string
+# lex:      string
+class Node():
+    def __init__(self, name, lex, children):
+        self.name     = name
+        self.lex      = lex
+        self.children = children
+    def __str__(self):
+        return printNodePretty(self)
+    def weed(self):
+        return ''
+
+# node: Node
+class ClassDcl(Node):
+    def __init__(self, node):
+        self.name           = node.name
+        self.lex            = node.lex
+        self.children       = node.children
+        self.classMod       = node.children[0]
+        self.ID             = node.children[2].lex
+        self.superClass     = node.children[3]
+        self.superInterface = node.children[4]
+        self.classBody      = node.children[5]
+    def weed(self):
+        # Every class must contain at least one explicit constructor
+        hasConstructor = False
+        classBodyDcls = self.classBody.children[1]
+        for d in classBodyDcls.children:
+            if d.children:
+                if d.children[0].name == 'constructorDcl':
+                    hasConstructor = True
+        if not hasConstructor:
+            return 'Class ' + self.ID + ' does not contain a constructor.'
+        
+        return ''
+
+
+##################### Helpers ##########################################
+
+# node:   Node
+# prefix: string
+# last:   boolean
+def printNodePretty(node, prefix='', last=True):
+    res = prefix
+    res += '`- ' if last else '|- '
+    res += node.name + ' ' + node.lex + '\n'
+    prefix += '   ' if last else '|  '
+    num = len(node.children)
+    for i, child in enumerate(node.children):
+        last = i == (num - 1)
+        res += printNodePretty(child, prefix, last)
+    return res
\ No newline at end of file
diff --git a/Parsing.py b/Parsing.py
index 2500a8e..ac82a44 100644
--- a/Parsing.py
+++ b/Parsing.py
@@ -1,18 +1,9 @@
 import string
 import sys
 from Scanning import Token
+from AstNodes import Node
 
-##################### Node & State ##########################################
-
-# TODO: different types for nodes
-# children: Node[]
-# name:     string
-# lex:      string
-class Node():
-    def __init__(self, children, name, lex):
-        self.children = children
-        self.name     = name
-        self.lex      = lex
+##################### State ##########################################
 
 # A State has a list of rules, and a num indicating which state it is
 # num:   int
@@ -21,6 +12,12 @@ class State():
     def __init__(self):
         self.num   = -1
         self.rules = []
+    def __str__(self):
+        print('state ' + self.num)
+        print('rules: ['),
+        for r in self.rules:
+            print(r + ', '),
+        print(']')
 
     # addRule simply adds the string rule to rules
     # rule: string
@@ -48,14 +45,13 @@ class State():
 
         if not ruleToUse:
             raise ValueError('No rule found for next=({}) in self.rules'.format(next),
-                self.rules, statesVisited, rulesToOutput)
+                theStack, rulesToOutput)
 
         # what type of rule is it? shift or reduce?
         if (ruleToUse[2] == 'shift'):
             self.goToState(statesVisited, ruleToUse)
             return True
         elif (ruleToUse[2] == 'reduce'):
-            # print('reduce')
             ruleUsed = int(ruleToUse[3])
             cfgrule  = cfgrules[ruleUsed]
             toPop    = len(cfgrule) - 1
@@ -65,29 +61,20 @@ class State():
             while (toPop != 0):
                 statesVisited.pop()
                 top = theStack.pop()
-                newChildren.insert(0, Node(top.children, top.name, top.lex))
+                newChildren.insert(0, Node(top.name, top.lex, top.children))
 
                 toPop -= 1
 
-            newNode = Node(newChildren, cfgrule[0], '')
+            newNode = Node(cfgrule[0], '', newChildren)
 
             theStack.append(newNode)
             return False
         else:
-            raise ValueError('rule neither shift nor reduce.')
-
-    def output(self):
-        print('state ' + self.num)
-        print('rules: ['),
-        for r in self.rules:
-            print(r + ', '),
-        print(']')
+            raise ValueError('rule neither shift nor reduce.', ruleToUse)
 
 ##################### Globals ###############################
 
-# TODO: are these okay as globals?? Also, states is manually set up as the length, should we fix this?
 cfgrules = [] # String[][]
-lr1trans = [] # String[][]
 theStack = [] # Node[]
 states   = [] # State[]
 
@@ -106,19 +93,15 @@ def checkSequence(tokens):
         curState = int(statesVisited[-1])
 
         if getNext:
-            # print('*** GET NEXT')
             rulesToOutput.append(tokens[i].name + ' ' + tokens[i].lex)
-            newNode = Node([], tokens[i].name, tokens[i].lex)
+            newNode = Node(tokens[i].name, tokens[i].lex, [])
             theStack.append(newNode)
 
             # get the next tokens
             if i >= len(tokens) - 1:
                 break
             i += 1
-            # print('>>> NEXT TOKEN WE SEE: ' + tokens[i].name + ' ' + tokens[i].lex)
         elif theStack:
-            # print('*** DONT GET NEXT')
-
             getNext = states[curState].trans(statesVisited, theStack[-1].name, rulesToOutput)
 
             curState = int(statesVisited[-1])
@@ -131,21 +114,22 @@ def checkSequence(tokens):
             lastRule = False
             break
     if lastRule:
+        result = Node(cfgrules[0][0], '', theStack)
         rulesToOutput.append(' '.join(cfgrules[0]))
     else:
-        raise ValueError('last rule not found')
+        raise ValueError('last rule not found', theStack)
 
-    return rulesToOutput
+    return result
 
+# TODO: set this grammar up once instead of for each file
 # set up grammar from files and set up states
 def setUpGrammar():
-    global cfgrules, lr1trans, theStack, states
+    global cfgrules, theStack, states
+    cfgrules = [] # String[][]
+    theStack = [] # Node[]
+    states   = [] # State[]
 
-    # reset global vars (TODO: there has to be a better way???)
-    cfgrules = []
-    lr1trans = []
-    theStack = []
-    states   = []
+    lr1trans = [] # String[][]
 
     # one big file
     with open('cfg/trans.txt', 'r') as f:
@@ -186,6 +170,5 @@ def parse(tokens):
         result = checkSequence(tokens)
     except ValueError as err:
         return (None, err)
-
-    # printNodeStack(theStack)
-    return (result, "success")
+        
+    return (result, 'success')
diff --git a/Test.py b/Test.py
index e92c827..ce6a25f 100644
--- a/Test.py
+++ b/Test.py
@@ -5,27 +5,32 @@ from os.path import isfile, join
 from Scanning import scan
 from Parsing import parse
 import Weeding
+import AstBuilding
 
 
 def allFiles(testDir):
-     return [testDir + f for f in listdir(testDir) if isfile(join(testDir, f)) and f.startswith('Je')]
+     return [testDir + f for f in listdir(testDir) if isfile(join(testDir, f)) and f.startswith('J1')]
 
 def main():
 
-    # All files in the test directory
-    testDirectory = "./Tests/"
-    testFiles = allFiles(testDirectory)
+    if sys.argv[1]:
+        testFiles = ["./Tests/" + sys.argv[1]]
+    else:
+        # All files in the test directory
+        testDirectory = "./Tests/"
+        testFiles = allFiles(testDirectory)
     print("**********************************************************")
 
     for f in testFiles:
-        # print(f)
+        print(f)
+        content = open(f, "r").read()
 
         # Scanning
-        content = open(f, "r").read()
-        (tokens, errorString) = scan(content)
+        (tokens, error) = scan(content)
+
         # Error in Scanning
         if tokens is None:
-            # print("ERROR in Scanning: " + errorString)
+            print("ERROR in Scanning: " + error)
             print("**********************************************************")
             continue
 
@@ -33,40 +38,53 @@ def main():
         for token in tokens:
             if (token.name and token.lex):
                 s += '(' + token.name + ',' + token.lex + '), '
-        print(s)
+        # print(s)
 
+        # Weeding after scanning
         # No weeds if everything is good (weeds = None)
         weeds = Weeding.fileNameCheck(tokens, f)
         if weeds:
+            print("ERROR in Weeding after Scanning:")
             print(weeds)
+            print("**********************************************************")
             continue
 
-
-
         # Parsing
-        # print("Parsing starts")
-
-        # try:
-        #     (steps, errorString) = parse(tokens)
-        # except:
-        #     print("Exception in Parsing")
-        #
-        # # Error in Parsing
-        # if steps is None:
-        #     print("ERROR in Parsing: ", errorString)
-        #     print("**********************************************************")
-        #     continue
+        tree = None
+        try:
+            (tree, error) = parse(tokens)
+        except:
+            print("Exception in Parsing")
 
         # Error in Parsing
-        if steps is None:
-            # print("ERROR in Parsing: ", errorString)
-            # print("**********************************************************")
+        if tree is None:
+            print("ERROR in Parsing:")
+            print(error)
+            for i in error.args:
+                print(i)
+            print("**********************************************************")
             continue
-        print(f)
-        print("Succeeded")
-        # print("All Steps:")
-        # print(steps)
 
+        # AST Building
+        try:
+            (ast, error) = AstBuilding.astbuild(tree)
+        except:
+            print("Exception in AstBuilding")
+
+        # Error in AstBuilding
+        if ast is None:
+            print("ERROR in AstBuilding: " + error)
+            print("**********************************************************")
+            continue
+
+        # Weeding after AST
+        weeds = AstBuilding.weed(ast)
+        if weeds:
+            print("ERROR in Weeding on AST: " + weeds)
+            print("**********************************************************")
+            continue
+
+        print("Succeeded")
         print("**********************************************************")
 
-main()
+main()
\ No newline at end of file
diff --git a/Weeding.py b/Weeding.py
index 6f2f304..61c6adc 100644
--- a/Weeding.py
+++ b/Weeding.py
@@ -46,21 +46,21 @@ def extendCheck(tokens):
 
 
 # node: Node[]
-def oneConstructor(node, insideClass):
-    success = False
-    if not insideClass:
-        if node.name == 'classDcl':
-            for c in node.children:
-                success = oneConstructor(c, True)
-                if success:
-                    return True
-    elif insideClass:
-        if node.name == 'constructorDcl':
-            return True
+# def oneConstructor(node, insideClass):
+#     success = False
+#     if not insideClass:
+#         if node.name == 'classDcl':
+#             for c in node.children:
+#                 success = oneConstructor(c, True)
+#                 if success:
+#                     return True
+#     elif insideClass:
+#         if node.name == 'constructorDcl':
+#             return True
 
 
 # tree: Node[]
-def weed(tree):
-    # Every class must contain at least one explicit constructor
-    if oneConstructor(tree, False) == False:
-        print('a class does not have an explicit constructor')
+# def weed(tree):
+#     # Every class must contain at least one explicit constructor
+#     if oneConstructor(tree, False) == False:
+#         print('a class does not have an explicit constructor')
diff --git a/utils.py b/utils.py
deleted file mode 100644
index 623e5dd..0000000
--- a/utils.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import string
-
-# ------- Utility functions
-
-# a: stack
-def printStack(a):
-    print('['),
-    for i in a:
-        print(i + ', '),
-    print(']')
-
-# a: Node
-def printNode(a):
-    print(a.name),
-    if (a.lex != ''):
-        print(a.lex),
-
-    for i in a.children:
-        print(i.name),
-    
-    if a.children:
-        print('\n<children>')
-        for i in a.children:
-            printNode(i)
-        print('</children>')
-    else:
-        print('\n'),
-
-# a: stack<Node>
-def printNodeStack(a):
-    for i in a:
-        printNode(i)
-
-# print all the states we have, with their respective
-#  rules and accepting lists
-def printAllStates():
-    j = 0
-    for i in states:
-        print(j)
-        j += 1
-        i.output()
-
-# tokens: Token[]
-def printTokenList(tokens):
-    s = "All Tokens:\n"
-    for token in tokens:
-        if (token.name and token.lex):
-            s += '(' + token.name + ',' + token.lex + '), '
-    print(s)
\ No newline at end of file
-- 
GitLab