cleaning up parser files

0f8fc3a2 · Nicholas Robinson · fdeaa95d · 0f8fc3a2 · 0f8fc3a2 · 0f8fc3a2
Commit 0f8fc3a2 authored 5 years ago by Nicholas Robinson
--- a/ParseTests/test.parse
+++ b/ParseTests/test.parse
--- a/Parsing.py
+++ b/Parsing.py
 import string
 import sys
+from utils import *

-# parses the scanned input
-
-##################### Rules ###############################
-
-cfgrules = [
-    'start BOF procedures EOF',
-    'procedures CLASS ID LBRACK dcl RBRACK',
-    'dcl INT ID ASSIGN NUM SEMICO',
-    'dcl'
-]
-
-lr1Trans = [
-    '0 BOF shift 1',
-    '1 procedures shift 2',
-    '1 CLASS shift 5',
-    '2 EOF shift 15',
-    '5 ID shift 6',
-    '6 LBRACK shift 7',
-    '7 INT shift 8',
-    '7 dcl shift 13',
-    '8 ID shift 9',
-    '9 ASSIGN shift 10',
-    '10 NUM shift 11',
-    '11 SEMICO shift 12',
-    '12 RBRACK reduce 2',
-    '13 RBRACK shift 14',
-    '14 EOF reduce 1'
-]
-
-# TODO: are these okay as globals??
+##################### Globals ###############################
+
+# TODO: put rules, trans, debug fns in different files
+# TODO: are these okay as globals?? Also, states is manually set up as the length, should we fix this?
+cfgrules = []
+lr1trans = []
 states   = [0] * 15 # State[]
 theStack = []       # stack<Node>

@@ -42,9 +19,9 @@ theStack = []       # stack<Node>
 # lex:       string
 class Node():
    def __init__(self, children, tokenType, lex):
-        self.children = children
+        self.children  = children
        self.tokenType = tokenType
-        self.lex = lex
+        self.lex       = lex

 # A State has a list of rules, and a list of accepting tokens
 # accepting: string[]
@@ -52,7 +29,7 @@ class Node():
 class State():
    def __init__(self):
        self.accepting = []
-        self.rules = []
+        self.rules     = []

    # addRule simply adds the string rule to rules, and adds the
    #   accepting token to accepting
@@ -90,10 +67,10 @@ class State():
            self.goToState(statesVisited, ruleToUse)
            return True
        elif (ruleSplit[2] == 'reduce'):
-            ruleUsed       = int(ruleSplit[3])
-            cfgrule        = cfgrules[ruleUsed]
+            ruleUsed          = int(ruleSplit[3])
+            cfgrule           = cfgrules[ruleUsed]
            rulesToOutput[0] += '\n' + cfgrule
-            toPop          = countWords(cfgrule) - 1
+            toPop             = countWords(cfgrule) - 1

            newChildren = [] # Node[]
            while (toPop != 0):
@@ -122,91 +99,8 @@ class State():
            print(i + ', '),
        print(']')

-##################### Helpers ##########################################
-
-# put input all in one line
-# s: string
-def formatInput(s):
-    line = ''
-    word = ''
-    output = 'BOF BOF'
-    return output
-
-# TODO: change parser so we don't have to use this function so much
-# split string into array of strings
-# e.g. '0 BOF shift 80' => ['0', 'BOF', 'shift', '80']
-# s: string
-def splitString(s):
-    words = []
-    isword = 0
-    for c in s:
-        if c in " \r\n\t": # whitespace
-            isword = 0
-        elif not isword:
-            words = words + [c]
-            isword = 1
-        else:
-            words[-1] = words[-1] + c
-    return words
-
-# count number of words in a string
-# s: string
-def countWords(s):
-    return len(splitString(s))
-
-# a: stack
-def printStack(a):
-    print('['),
-    for i in a:
-        print(i + ', '),
-    print(']')
-
-# a: Node
-def printNode(a):
-    print(a.tokenType),
-    if (a.lex != ''):
-        print(' ' + a.lex),
-
-    for i in reversed(a.children):
-        print(i.tokenType),
-    
-    if a.children:
-        print('\n<children>')
-        for i in reversed(a.children):
-            printNode(i)
-        print('</children>')
-    else:
-        print('\n'),
-
-# a: stack<Node>
-def printNodeStack(a):
-    for i in a:
-        printNode(i)
-
-# print all the states we have, with their respective
-#  rules and accepting lists
-def printAllStates():
-    j = 0
-    for i in states:
-        print(j)
-        j += 1
-        i.output()
-
-
 ##################### Main Functions ##########################################

-def getInput():
-    # Reading in test joos program
-    inputfile = sys.argv[1]
-    inputfile = "./ParseTests/" + inputfile
-
-    result = 'BOF BOF '
-    with open(inputfile) as f:
-        result += f.read().replace('\n', ' ')
-    result += ' EOF EOF'
-
-    return result.split()
-
 # input: string
 def checkSequence(input):
    statesVisited = []     # stack<int>
@@ -281,18 +175,41 @@ def checkSequence(input):
        else:
            print('ERROR')

-    printNodeStack(theStack)
+    # printNodeStack(theStack)
    return rulesToOutput[0][1:]

-def main():
+# put input all in one line
+def getInput():
+    # Reading in test joos program
+    inputfile = sys.argv[1]
+    inputfile = "./ParseTests/" + inputfile
+
+    result = 'BOF BOF '
+    with open(inputfile) as f:
+        result += f.read().replace('\n', ' ')
+    result += ' EOF EOF'
+
+    return result.split()
+
+def setUpGrammer():
+    global cfgrules, lr1trans
+
+    with open('cfgrules', 'r') as f:
+        cfgrules = f.read().splitlines()
+
+    with open('lr1trans', 'r') as f:
+        lr1trans = f.read().splitlines()
+
    for i in range(len(states)):
        s = State()
        states[i] = s

-    for t in lr1Trans:
+    for t in lr1trans:
        transState = int(splitString(t)[0])
        states[transState].addRule(t)

+def main():
+    setUpGrammer()
    toParse = getInput()
    result = checkSequence(toParse)
    print(result)

--- a/cfgrules
+++ b/cfgrules
+start BOF procedures EOF
+procedures CLASS ID LBRACK dcl RBRACK
+dcl INT ID ASSIGN NUM SEMICO
+dcl
\ No newline at end of file
--- a/lr1trans
+++ b/lr1trans
+0 BOF shift 1
+1 procedures shift 2
+1 CLASS shift 5
+2 EOF shift 15
+5 ID shift 6
+6 LBRACK shift 7
+7 INT shift 8
+7 dcl shift 13
+8 ID shift 9
+9 ASSIGN shift 10
+10 NUM shift 11
+11 SEMICO shift 12
+12 RBRACK reduce 2
+13 RBRACK shift 14
+14 EOF reduce 1
\ No newline at end of file
--- a/utils.py
+++ b/utils.py
+import string
+
+# ------- Utility functions
+
+# TODO: change parser so we don't have to use this function so much
+# split string into array of strings
+# e.g. '0 BOF shift 80' => ['0', 'BOF', 'shift', '80']
+# s: string
+def splitString(s):
+    words = []
+    isword = 0
+    for c in s:
+        if c in " \r\n\t": # whitespace
+            isword = 0
+        elif not isword:
+            words = words + [c]
+            isword = 1
+        else:
+            words[-1] = words[-1] + c
+    return words
+
+# count number of words in a string
+# s: string
+def countWords(s):
+    return len(splitString(s))
+
+# a: stack
+def printStack(a):
+    print('['),
+    for i in a:
+        print(i + ', '),
+    print(']')
+
+# a: Node
+def printNode(a):
+    print(a.tokenType),
+    if (a.lex != ''):
+        print(' ' + a.lex),
+
+    for i in reversed(a.children):
+        print(i.tokenType),
+    
+    if a.children:
+        print('\n<children>')
+        for i in reversed(a.children):
+            printNode(i)
+        print('</children>')
+    else:
+        print('\n'),
+
+# a: stack<Node>
+def printNodeStack(a):
+    for i in a:
+        printNode(i)
+
+# print all the states we have, with their respective
+#  rules and accepting lists
+def printAllStates():
+    j = 0
+    for i in states:
+        print(j)
+        j += 1
+        i.output()