Parsing.py 4.80 KiB
import string
import sys
from Scanning import Token
from ParseNode import Node
##################### State ##########################################
# A State has a list of rules, and a num indicating which state it is
# num: int
# rules: string[]
class State():
def __init__(self):
self.num = -1
self.rules = []
def __str__(self):
print('state ' + self.num)
print('rules: ['),
for r in self.rules:
print(r + ', '),
print(']')
# addRule simply adds the string rule to rules
# rule: string
def addRule(self, i, rule):
self.num = i
self.rules.append(rule)
# go to next state depending on what states you've already visited
# and what rule you are going to use next
# statesVisited: int[]
# ruleToUse: string
def goToState(self, statesVisited, ruleToUse):
nextState = int(ruleToUse[3])
statesVisited.append(nextState)
# statesVisited: int[]
# next: string
# rulesToOutput: string
def trans(self, statesVisited, next, rulesToOutput):
ruleToUse = []
for r in self.rules:
if r[1] == next:
ruleToUse = r
break
if not ruleToUse:
raise ValueError('No rule found for next=({}) in self.rules'.format(next),
theStack, rulesToOutput)
# what type of rule is it? shift or reduce?
if (ruleToUse[2] == 'shift'):
self.goToState(statesVisited, ruleToUse)
return True
elif (ruleToUse[2] == 'reduce'):
ruleUsed = int(ruleToUse[3])
cfgrule = cfgrules[ruleUsed]
toPop = len(cfgrule) - 1
rulesToOutput.append(' '.join(cfgrule))
newChildren = [] # Node[]
while (toPop != 0):
statesVisited.pop()
top = theStack.pop()
newChildren.insert(0, Node(top.name, top.lex, top.children))
toPop -= 1
newNode = Node(cfgrule[0], '', newChildren)
theStack.append(newNode)
return False
else:
raise ValueError('rule neither shift nor reduce.', ruleToUse)
##################### Globals ###############################
cfgrules = [] # String[][]
theStack = [] # Node[]
states = [] # State[]
##################### Main Functions ##########################################
# input: list of strings
def checkSequence(tokens):
statesVisited = [] # int[]
rulesToOutput = [] # string[]
getNext = False # boolean
i = 0 # int
statesVisited.append(0)
while 1:
curState = int(statesVisited[-1])
if getNext:
rulesToOutput.append(tokens[i].name + ' ' + tokens[i].lex)
newNode = Node(tokens[i].name, tokens[i].lex, [])
theStack.append(newNode)
# get the next tokens
if i >= len(tokens) - 1:
break
i += 1
elif theStack:
getNext = states[curState].trans(statesVisited, theStack[-1].name, rulesToOutput)
curState = int(statesVisited[-1])
getNext = states[curState].trans(statesVisited, tokens[i].name, rulesToOutput)
lastRule = True
for index, token in enumerate(theStack):
if token.name != cfgrules[0][index+1]:
lastRule = False
break
if lastRule:
result = Node(cfgrules[0][0], '', theStack)
rulesToOutput.append(' '.join(cfgrules[0]))
else:
raise ValueError('last rule not found', theStack)
return result
# TODO: set this grammar up once instead of for each file
# set up grammar from files and set up states
def setUpGrammar():
global cfgrules, theStack, states
cfgrules = [] # String[][]
theStack = [] # Node[]
states = [] # State[]
lr1trans = [] # String[][]
# one big file
with open('cfg/trans.txt', 'r') as f:
lines = f.read().splitlines()
i = int(lines[0]) + 1 # terminals
i += int(lines[i]) + 1 # non terminals
i += 1 # starting non terminal
# cfg rules
for r in range(int(lines[i])):
cfgrules.append(lines[i+r+1].split())
# num of states
i += int(lines[i]) + 1
states = [State() for _ in range(int(lines[i]))]
# transitions
i += 1
for t in range(int(lines[i])):
lr1trans.append(lines[i+t+1].split())
# states
for t in lr1trans:
transState = int(t[0])
states[transState].addRule(transState, t)
# main parse function
# tokens: Token[]
def parse(tokens):
setUpGrammar()
# add BOF & EOF to token list
tokens.insert(0, Token('BOF', 'BOF'))
tokens.append(Token('EOF', 'EOF'))
try:
result = checkSequence(tokens)
except ValueError as err:
return (None, err)
return (result, 'success')