Skip to content
Snippets Groups Projects
Commit ecd5a07a authored by pycsham's avatar pycsham
Browse files

Made changes to scanner and grammar after going over around 3/4 of error...

Made changes to scanner and grammar after going over around 3/4 of error public test cases. (e.g. bitwise operations, integer range checking)
parent e0c1196b
No related branches found
No related tags found
No related merge requests found
...@@ -85,7 +85,11 @@ wrongJavaKeyWordDict = { ...@@ -85,7 +85,11 @@ wrongJavaKeyWordDict = {
'transient', 'transient',
'try', 'try',
'volatile', 'volatile',
'_' '_',
'this',
'super',
'Long',
'Float'
} }
# a set that contains unsed operators or seperators # a set that contains unsed operators or seperators
...@@ -126,9 +130,6 @@ JoosDFATokens = set([ ...@@ -126,9 +130,6 @@ JoosDFATokens = set([
'SUB', # - 'SUB', # -
'MULT', # * 'MULT', # *
'DIV', # / 'DIV', # /
'BITAND', # &
'BITOR', # |
'EXP', # ^
'MOD', # % 'MOD', # %
# Separators: # Separators:
...@@ -142,9 +143,8 @@ JoosDFATokens = set([ ...@@ -142,9 +143,8 @@ JoosDFATokens = set([
'COMMA', # , 'COMMA', # ,
'PERIOD', # . 'PERIOD', # .
# Unused Sep and Operators that we need to recognize # Unused Seperator (TODO: refactor this)
'ELLIPSIS', # ... 'ELLIPSIS', # ...
'ARROW', # ->
]) ])
...@@ -209,8 +209,6 @@ def JoosTransition(input, state): ...@@ -209,8 +209,6 @@ def JoosTransition(input, state):
return 'BITAND' return 'BITAND'
if (input == '|'): if (input == '|'):
return 'BITOR' return 'BITOR'
if (input == '^'):
return 'EXP'
if (input == '%'): if (input == '%'):
return 'MOD' return 'MOD'
...@@ -240,6 +238,31 @@ def JoosTransition(input, state): ...@@ -240,6 +238,31 @@ def JoosTransition(input, state):
if (input == '\''): if (input == '\''):
return 'LCHAR' return 'LCHAR'
# Handling all operators that are not allowed in Joos (some cases are handled elsewhere)
elif state == 'ADD':
if input == '+':
return 'OPDISCARD'
if input == '=':
return 'OPDISCARD'
return None
elif state == 'SUB':
if (input == '>'):
return 'OPDISCARD'
if input == '-':
return 'OPDISCARD'
if input == '=':
return 'OPDISCARD'
return None
elif state == 'MULT':
if input == '=':
return 'OPDISCARD'
return None
elif state == 'MOD':
if input == '=':
return 'OPDISCARD'
return None
elif (state == 'ID'): elif (state == 'ID'):
# dealing with compound names # dealing with compound names
if input == '.': if input == '.':
...@@ -267,6 +290,8 @@ def JoosTransition(input, state): ...@@ -267,6 +290,8 @@ def JoosTransition(input, state):
elif (state == 'NUM'): elif (state == 'NUM'):
if(input.isdigit()): if(input.isdigit()):
return 'NUM' return 'NUM'
if input == '.':
return 'FLOAT' # not accepted
return None return None
# string literal # string literal
...@@ -277,6 +302,8 @@ def JoosTransition(input, state): ...@@ -277,6 +302,8 @@ def JoosTransition(input, state):
return 'LITERALSTRING' return 'LITERALSTRING'
return 'LSTRING' return 'LSTRING'
elif (state == 'STRINGESC'): elif (state == 'STRINGESC'):
if input == 'u':
return 'UNICODE' #going to be discarded
return 'LSTRING' return 'LSTRING'
# char literal # char literal
...@@ -299,10 +326,14 @@ def JoosTransition(input, state): ...@@ -299,10 +326,14 @@ def JoosTransition(input, state):
elif (state == 'GT'): elif (state == 'GT'):
if (input == '='): if (input == '='):
return 'GE' return 'GE'
elif input == '>':
return 'OPDISCARD'
return None return None
elif (state == 'LT'): elif (state == 'LT'):
if (input == '='): if (input == '='):
return 'LE' return 'LE'
elif input == '<':
return 'OPDISCARD'
return None return None
elif (state == 'NOT'): elif (state == 'NOT'):
if (input == '='): if (input == '='):
...@@ -311,11 +342,16 @@ def JoosTransition(input, state): ...@@ -311,11 +342,16 @@ def JoosTransition(input, state):
elif (state == 'BITAND'): elif (state == 'BITAND'):
if (input == '&'): if (input == '&'):
return 'AND' return 'AND'
elif input == '=':
return 'OPDISCARD'
return None return None
elif (state == 'BITOR'): elif (state == 'BITOR'):
if (input == '|'): if (input == '|'):
return 'OR' return 'OR'
elif input == '=':
return 'OPDISCARD'
return None return None
# Comments # Comments
elif(state == 'DIV'): elif(state == 'DIV'):
...@@ -323,6 +359,8 @@ def JoosTransition(input, state): ...@@ -323,6 +359,8 @@ def JoosTransition(input, state):
return 'COMMENT' return 'COMMENT'
elif (input == '*'): elif (input == '*'):
return 'LCOMMENT' return 'LCOMMENT'
elif input == '=':
return 'OPDISCARD'
return None return None
elif(state == 'LCOMMENT'): elif(state == 'LCOMMENT'):
if (input == '*'): if (input == '*'):
...@@ -354,12 +392,14 @@ def JoosTransition(input, state): ...@@ -354,12 +392,14 @@ def JoosTransition(input, state):
if(input == '.'): if(input == '.'):
return 'ELLIPSIS' return 'ELLIPSIS'
return None return None
elif (state == 'SUB'):
if (input == '>'): # Handling hexidecimal literals (error case)
return 'ARROW' elif state == 'ZERO':
return None if input == 'x':
return 'HEXLITERAL'
elif input == '.':
return 'FLOAT'
else: else:
return None return None
...@@ -367,7 +407,7 @@ def JoosTransition(input, state): ...@@ -367,7 +407,7 @@ def JoosTransition(input, state):
#TODO: remove alphabets since it's unecessary in our DFA implementation #TODO: remove alphabets since it's unecessary in our DFA implementation
specialChars = set(list(".;:,@{}()[]<>!?+-*/&|^%=''\"\\")) specialChars = set(list(".;:,@{}()[]<>!?+-*/&|^%=''\"\\"))
JoosAccept = JoosDFATokens.union({'WHITESPACE', 'COMMENT', 'NEWLINE', 'LCOMMENT', 'RCOMMENT', 'NEWLINEC', 'LCOM2', 'LCOM3'}) JoosAccept = JoosDFATokens.union({'WHITESPACE', 'COMMENT', 'NEWLINE', 'LCOMMENT', 'RCOMMENT', 'NEWLINEC', 'LCOM2', 'LCOM3'})
JoosStates = JoosAccept.union({'START', 'PERIOD2', 'HALFCOMP'}) JoosStates = JoosAccept.union({'START', 'PERIOD2', 'HALFCOMP', 'HEXLITERAL', 'OPDISCARD', 'BITAND', 'BITOR', 'FLOAT', 'UNICODE'})
JoosAlphabet = set(string.ascii_lowercase).union(set(string.ascii_uppercase)).union(set(string.digits)).union(specialChars) JoosAlphabet = set(string.ascii_lowercase).union(set(string.ascii_uppercase)).union(set(string.digits)).union(specialChars)
######################### DFA ####################################### ######################### DFA #######################################
...@@ -431,12 +471,25 @@ def scan(input): ...@@ -431,12 +471,25 @@ def scan(input):
if index < indexRange-1: if index < indexRange-1:
if tokens[index+1].name == 'NUM': if tokens[index+1].name == 'NUM':
return (None, "wrong integer literal: starts with 0") return (None, "wrong integer literal: starts with 0")
# Checking integer range (does not cover all edge cases)
elif token.name == 'NUM' and index > 0 and tokens[index-1].name == 'SUB' and int(token.lex) > 2147483648:
return (None, "integer too small")
elif token.name == 'NUM' and int(token.lex) > 2147483647:
return (None, "interger too large")
# dealing with keywords in Java but not in Joos # dealing with keywords in Java but not in Joos
elif token.name == 'ID' and token.lex in wrongJavaKeyWordDict: elif token.name == 'ID' and token.lex in wrongJavaKeyWordDict:
return (None, "keyword in Java but not in Joos") return (None, "keyword in Java but not in Joos")
# dealing with operators and seperators in Java but not in Joos # dealing with operators and seperators in Java but not in Joos
elif token.name in wrongJavaOpDict: elif token.name in wrongJavaOpDict:
return (None, "operator in Java but not in Joos") return (None, "operator in Java but not in Joos")
# Checking wrong keywords in compIDs
elif token.name == 'COMPID':
temp = token.lex.split('.')
if temp[0] in wrongJavaKeyWordDict:
return (None, "wrong keyword in comp id")
# Checking if the multi line comment has a closing tag # Checking if the multi line comment has a closing tag
if token.name == 'LCOMMENT': if token.name == 'LCOMMENT':
......
64 63
EOF EOF
BOF BOF
INTERFACE INTERFACE
...@@ -24,7 +24,6 @@ PUBLIC ...@@ -24,7 +24,6 @@ PUBLIC
RETURN RETURN
SHORT SHORT
STATIC STATIC
THIS
VOID VOID
WHILE WHILE
ID ID
...@@ -160,7 +159,7 @@ primaryNoArrayAccess ...@@ -160,7 +159,7 @@ primaryNoArrayAccess
arrayID arrayID
methodID methodID
start start
200 199
start BOF packageDcl importDcls topDcls EOF start BOF packageDcl importDcls topDcls EOF
packageDcl PACKAGE name SEMICO packageDcl PACKAGE name SEMICO
packageDcl PACKAGE ID SEMICO packageDcl PACKAGE ID SEMICO
...@@ -300,7 +299,6 @@ arrayCreationExpr NEW ID LSQRBRACK RSQRBRACK ...@@ -300,7 +299,6 @@ arrayCreationExpr NEW ID LSQRBRACK RSQRBRACK
primary arrayAccess primary arrayAccess
primary primaryNoArrayAccess primary primaryNoArrayAccess
primaryNoArrayAccess literal primaryNoArrayAccess literal
primaryNoArrayAccess THIS
primaryNoArrayAccess LPAREN expr RPAREN primaryNoArrayAccess LPAREN expr RPAREN
primaryNoArrayAccess classInstanceCreate primaryNoArrayAccess classInstanceCreate
primaryNoArrayAccess fieldAccess primaryNoArrayAccess fieldAccess
......
Source diff could not be displayed: it is too large. Options to address this: view the blob.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment