-
Nicholas Robinson authored
- condensed getting the last rule, no more need for copying the stack - children are now right way round, not reversed
Nicholas Robinson authored- condensed getting the last rule, no more need for copying the stack - children are now right way round, not reversed
Parsing.py 5.35 KiB
import string
import sys
from utils import *
from Scanning import Token
##################### Node & State ##########################################
# TODO: different types for nodes
# children: Node[]
# name: string
# lex: string
class Node():
def __init__(self, children, name, lex):
self.children = children
self.name = name
self.lex = lex
# A State has a list of rules, and a num indicating which state it is
# num: int
# rules: string[]
class State():
def __init__(self):
self.num = -1
self.rules = []
# addRule simply adds the string rule to rules
# rule: string
def addRule(self, i, rule):
self.num = i
self.rules.append(rule)
# go to next state depending on what states you've already visited
# and what rule you are going to use next
# statesVisited: int[]
# ruleToUse: string
def goToState(self, statesVisited, ruleToUse):
nextState = int(ruleToUse[3])
statesVisited.append(nextState)
# statesVisited: int[]
# next: string
# rulesToOutput: string
def trans(self, statesVisited, next, rulesToOutput):
ruleToUse = []
for r in self.rules:
if r[1] == next:
ruleToUse = r
break
if not ruleToUse:
raise ValueError('No rule found for next=({}) in self.rules'.format(next),
self.rules, statesVisited, rulesToOutput)
# what type of rule is it? shift or reduce?
if (ruleToUse[2] == 'shift'):
self.goToState(statesVisited, ruleToUse)
return True
elif (ruleToUse[2] == 'reduce'):
# print('reduce')
ruleUsed = int(ruleToUse[3])
cfgrule = cfgrules[ruleUsed]
toPop = len(cfgrule) - 1
rulesToOutput.append(' '.join(cfgrule))
newChildren = [] # Node[]
while (toPop != 0):
statesVisited.pop()
top = theStack.pop()
newChildren.insert(0, Node(top.children, top.name, top.lex))
toPop -= 1
newNode = Node(newChildren, cfgrule[0], '')
theStack.append(newNode)
return False
else:
raise ValueError('rule neither shift nor reduce.')
def output(self):
print('state ' + self.num)
print('rules: ['),
for r in self.rules:
print(r + ', '),
print(']')
##################### Globals ###############################
# TODO: are these okay as globals?? Also, states is manually set up as the length, should we fix this?
cfgrules = [] # String[][]
lr1trans = [] # String[][]
theStack = [] # Node[]
states = [] # State[]
##################### Main Functions ##########################################
# input: list of strings
def checkSequence(tokens):
statesVisited = [] # int[]
rulesToOutput = [] # string[]
getNext = False # boolean
i = 0 # int
statesVisited.append(0)
while 1:
curState = int(statesVisited[-1])
if getNext:
# print('*** GET NEXT')
rulesToOutput.append(tokens[i].name + ' ' + tokens[i].lex)
newNode = Node([], tokens[i].name, tokens[i].lex)
theStack.append(newNode)
# get the next tokens
if i >= len(tokens) - 1:
break
i += 1
# print('>>> NEXT TOKEN WE SEE: ' + tokens[i].name + ' ' + tokens[i].lex)
elif theStack:
# print('*** DONT GET NEXT')
getNext = states[curState].trans(statesVisited, theStack[-1].name, rulesToOutput)
curState = int(statesVisited[-1])
getNext = states[curState].trans(statesVisited, tokens[i].name, rulesToOutput)
lastRule = True
for index, token in enumerate(theStack):
if token.name != cfgrules[0][index+1]:
lastRule = False
break
if lastRule:
rulesToOutput.append(' '.join(cfgrules[0]))
else:
raise ValueError('last rule not found')
return rulesToOutput
# set up grammar from files and set up states
def setUpGrammar():
global cfgrules, lr1trans, theStack, states
# reset global vars (TODO: there has to be a better way???)
cfgrules = []
lr1trans = []
theStack = []
states = []
# one big file
with open('cfg/trans.txt', 'r') as f:
lines = f.read().splitlines()
i = int(lines[0]) + 1 # terminals
i += int(lines[i]) + 1 # non terminals
i += 1 # starting non terminal
# cfg rules
for r in range(int(lines[i])):
cfgrules.append(lines[i+r+1].split())
# num of states
i += int(lines[i]) + 1
states = [State() for _ in range(int(lines[i]))]
# transitions
i += 1
for t in range(int(lines[i])):
lr1trans.append(lines[i+t+1].split())
# states
for t in lr1trans:
transState = int(t[0])
states[transState].addRule(transState, t)
# main parse function
# tokens: Token[]
def parse(tokens):
setUpGrammar()
# add BOF & EOF to token list
tokens.insert(0, Token('BOF', 'BOF'))
tokens.append(Token('EOF', 'EOF'))
try:
result = checkSequence(tokens)
except ValueError as err:
return (None, err)
# printNodeStack(theStack)
return (result, "success")