Skip to content
Snippets Groups Projects
Commit 6e9d22f8 authored by pycsham's avatar pycsham
Browse files

fixed some bugs for Scanning.py to make sure the scanner works on all J1 test...

fixed some bugs for Scanning.py to make sure the scanner works on all J1 test cases, with some minor exceptions
parent 4844d5d7
No related branches found
No related tags found
No related merge requests found
......@@ -75,7 +75,6 @@ wrongJavaKeyWordDict = {
'float',
'goto',
'long',
'new',
'private',
'strictfp',
'super',
......@@ -86,6 +85,7 @@ wrongJavaKeyWordDict = {
'transient',
'try',
'volatile',
'_'
}
# a set that contains unsed operators or seperators
......@@ -154,18 +154,25 @@ JoosDFATokens = set([
# Note: recognize keywords as ID, then convert them to different tokens later
def JoosTransition(input, state):
if (state == 'WHITESPACE'):
if (input == ' '):
if (input == ' ' or ord(input) == 9 or ord(input) == 12):
return 'WHITESPACE'
else:
return None
elif state == 'NEWLINE':
if input in ('\n', '\r'):
if input == '\n':
return 'NEWLINE'
else:
return None
if input == '\r':
return 'NEWLINEC'
return None
elif state == 'NEWLINEC':
if input == '\r' or ord(input) == 10:
return 'NEWLINEC'
if input == '\n':
return 'NEWLINE'
return None
elif (state == 'START'):
if (input.isalpha()):
if (input.isalpha() or input == '_' or input == '$'):
return 'ID'
if (input.isdigit()):
if (input == '0'):
......@@ -173,11 +180,13 @@ def JoosTransition(input, state):
return 'NUM'
# whitespace
if (input == ' '):
if (input == ' ' or ord(input) == 9 or ord(input) == 12):
return 'WHITESPACE'
# newline
if input in ('\n', '\r'):
if input == '\n':
return 'NEWLINE'
if input == '\r':
return 'NEWLINEC'
# operators
if (input == '='):
......@@ -235,7 +244,7 @@ def JoosTransition(input, state):
# dealing with compound names
if input == '.':
return 'HALFCOMP'
if (input.isalpha() or input.isdigit()):
if (input.isalpha() or input.isdigit() or input == '_' or input == '$'):
return 'ID'
return None
......@@ -245,6 +254,8 @@ def JoosTransition(input, state):
return 'COMPID'
if input == '*':
return 'IMPORTALL'
if input == '/':
return 'DIV'
return None
elif state == 'COMPID':
if input == '.':
......@@ -314,13 +325,25 @@ def JoosTransition(input, state):
return 'LCOMMENT'
return None
elif(state == 'LCOMMENT'):
if(input == '*'):
return 'LCOMMENT'
return None
elif(state == 'MULT'):
if(input == '/'):
if (input == '*'):
return 'LCOM2'
return 'LCOMMENT'
elif state == 'LCOM2':
if input == '/':
return 'RCOMMENT'
return None
if input == '*':
return 'LCOM3'
return 'LCOMMENT'
elif state == 'LCOM3':
if input == '/':
return 'RCOMMENT'
return 'LCOMMENT'
elif state == 'COMMENT':
if input == '\n':
return 'NEWLINE'
if input == '\r':
return 'NEWLINEC'
return 'COMMENT'
# Recognizing unused operators to be filtered later
elif(state == 'PERIOD'):
......@@ -343,7 +366,7 @@ def JoosTransition(input, state):
##################### Other DFA elements ##############################
#TODO: remove alphabets since it's unecessary in our DFA implementation
specialChars = set(list(".;:,@{}()[]<>!?+-*/&|^%=''\"\\"))
JoosAccept = JoosDFATokens.union({'WHITESPACE', 'COMMENT', 'NEWLINE', 'LCOMMENT', 'RCOMMENT'})
JoosAccept = JoosDFATokens.union({'WHITESPACE', 'COMMENT', 'NEWLINE', 'LCOMMENT', 'RCOMMENT', 'NEWLINEC', 'LCOM2', 'LCOM3'})
JoosStates = JoosAccept.union({'START', 'PERIOD2', 'HALFCOMP'})
JoosAlphabet = set(string.ascii_lowercase).union(set(string.ascii_uppercase)).union(set(string.digits)).union(specialChars)
......@@ -373,6 +396,7 @@ def SMM(input, dfa):
seenInput = ""
state = dfa.start
while (input):
# print("input is: ", input[0])
if ord(input[0]) > 127:
return (None, "Not ASCII")
newState = dfa.transition(input[0], state)
......@@ -399,8 +423,6 @@ def scan(input):
if (tokens):
# Handle erroneous tokens (return None and error string)
# Preprocess for comment removal
endOfLineCommentFlag = False
multiLineCommentFlag = False
indexRange = len(tokens)
for index,token in enumerate(tokens):
......@@ -416,20 +438,11 @@ def scan(input):
elif token.name in wrongJavaOpDict:
return (None, "operator in Java but not in Joos")
# End of Line Comment flagging
elif token.name == 'COMMENT' and not multiLineCommentFlag:
endOfLineCommentFlag = True
elif token.name == 'LCOMMENT' and not endOfLineCommentFlag:
# Checking if the multi line comment has a closing tag
if token.name == 'LCOMMENT':
multiLineCommentFlag = True
elif token.name == 'RCOMMENT':
if token.name == 'RCOMMENT':
multiLineCommentFlag = False
elif token.name == 'NEWLINE':
endOfLineCommentFlag = False
if endOfLineCommentFlag:
token.name = 'COMMENT'
elif multiLineCommentFlag:
token.name = 'LCOMMENT'
......@@ -440,5 +453,5 @@ def scan(input):
idsToTokens(tokens)
# remove whitespace, newline characters and comments
tokens = list(filter(lambda t: t.name not in ("WHITESPACE", "COMMENT", 'LCOMMENT', 'RCOMMENT', 'NEWLINE'), tokens))
tokens = list(filter(lambda t: t.name not in ("WHITESPACE", "COMMENT", 'LCOMMENT', 'RCOMMENT', 'NEWLINE', 'NEWLINEC', 'LCOM2', 'LCOM3'), tokens))
return (tokens, "success")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment