Skip to content
Snippets Groups Projects
Commit ef21ba75 authored by Xun Yang's avatar Xun Yang
Browse files

scanning rough structure

parents
No related branches found
No related tags found
No related merge requests found
import re # regex
import string
# class ScanDFA():
# scans the input line by line, and char by char for each line
# explicitly recognize whitespace when scanning, but discard whitespace tokens at the end
################# Joos Token Names in 5 categoeis ##########################
JoosTokens = set([
# Literals:
'INT',
# Operants:
'+',
# Separators:
'L(',
# Keywords:
'ID', '', 'SQL', 'Git', 'Tableau', 'SAS',
])
##################### Token ##########################################
# A Token is a pair (name, lexeme)
class Token(){
def __init__(self, name, lex):
self.name = name
self.lex = lex
}
##################### Transition function ############################
idRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
def JoosTransition(input, state):
if (state == 'START'):
if (input == '('):
return 'LPAREN'
if (input == ')'):
return 'RPAREN'
elif (idRegex.fullmatch(input) != None):
return 'ID'
##################### Other DFA elements ##############################
######################### DFA #######################################
class DFA ():
def __init__(self, states, alphabet, transition, start, accepting):
self.states = states
self.alphabet = alphabet
self.transition = transition
self.start = start
self.accepting = accepting
def recognize(input, state):
if (input):
return state in self.accepting
elif (self.transition.isDefinedAt(input[0], state)):
recognize(input[1:], self.transition(state, input[0]))
else:
return false
JoosDFA = DFA(
states = JoosTokens,
alphabet = set(string.ascii_lowercase)
.union(set(string.ascii_uppercase))
.union(set(list(".;,{}()[]<>!+-*/=''\"\\"))),
#TODO: add operand and separator characters to alphabet
start = 'START',
accept = JoosTokens,
transition = JoosTransition
)
################### Simplified Maximal Munch ###########################
def SMM(input, dfa):
# list of tokens scanned
scanned = []
seenInput = []
state = dfa.start
while (input):
while (input and transition):
state = dfa.transition(input[0], state)
seenInput.append(input[0])
input = input[1:]
if (state in dfa.accept):
scanned.append(Token(state, seenInput))
else :
print("ERROR on Maximal Munch")
seenInput = []
state = dfa.start
return scanned
################# Scan ################################################
def scan(input):
tokens = SMM(input, JoosDFA)
# TODO: handle edge cases (e.g. check int range, error on ++
# remove whitespace and comments
tokens = filter(lambda t: t.name not in ("WHITESPACE", "COMMENT"), tokens)
return tokens
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment