diff --git a/Scanning.py b/Scanning.py index 9bc615ff837102d29fc4ca8057bc957d70c33705..0328c0b9be4e83f231b05fad3f2cf7870ec71ff5 100644 --- a/Scanning.py +++ b/Scanning.py @@ -61,29 +61,29 @@ def idsToTokens(tokens): # a set that contains keywords that are in Java but not in Joos wrongJavaKeyWordDict = { - 'assert', - 'break', - 'case', - 'catch', - 'const', - 'continue', - 'default', - 'do', - 'double', - 'enum', - 'finally', - 'float', - 'goto', - 'long', - 'private', - 'strictfp', - 'super', - 'switch', - 'synchronized', - 'throw', - 'throws', - 'transient', - 'try', + 'assert', + 'break', + 'case', + 'catch', + 'const', + 'continue', + 'default', + 'do', + 'double', + 'enum', + 'finally', + 'float', + 'goto', + 'long', + 'private', + 'strictfp', + 'super', + 'switch', + 'synchronized', + 'throw', + 'throws', + 'transient', + 'try', 'volatile', '_', 'this', @@ -92,7 +92,7 @@ wrongJavaKeyWordDict = { 'Float' } -# a set that contains unsed operators or seperators +# a set that contains unsed operators or seperators wrongJavaOpDict = { 'ELLIPSIS', 'ARROW' @@ -103,13 +103,13 @@ wrongJavaOpDict = { ################# Joos DFA Tokens ################################### JoosDFATokens = set([ # Identifiers or Keywords (keywords are mapped to their token name in idToTokenDict above) - 'ID', + 'ID', # Literals and names (note: 'this' is considered as a keyword) 'NUM', # number (excludes 0) - 'ZERO', # 0 + 'ZERO', # 0 'LITERALBOOL', # true or false - 'LITERALCHAR', # character e.g. 'c', includes escape characters? + 'LITERALCHAR', # character e.g. 'c', includes escape characters? 'LITERALSTRING', # string e.g. "hello", includes escape sequences 'NULL', # null 'COMPID', # compound ids e.g. java.util.vectors @@ -170,7 +170,7 @@ def JoosTransition(input, state): if input == '\n': return 'NEWLINE' return None - + elif (state == 'START'): if (input.isalpha() or input == '_' or input == '$'): return 'ID' @@ -231,7 +231,7 @@ def JoosTransition(input, state): return 'COMMA' if (input == '.'): return 'PERIOD' - + # literals if (input == '\"'): return 'LSTRING' @@ -262,7 +262,7 @@ def JoosTransition(input, state): return 'OPDISCARD' return None - + elif (state == 'ID'): # dealing with compound names if input == '.': @@ -270,7 +270,7 @@ def JoosTransition(input, state): if (input.isalpha() or input.isdigit() or input == '_' or input == '$'): return 'ID' return None - + # Compound names elif state == 'HALFCOMP': if input.isalpha(): @@ -286,7 +286,7 @@ def JoosTransition(input, state): if input.isalpha() or input.isdigit(): return 'COMPID' return None - + elif (state == 'NUM'): if(input.isdigit()): return 'NUM' @@ -304,6 +304,8 @@ def JoosTransition(input, state): elif (state == 'STRINGESC'): if input == 'u': return 'UNICODE' #going to be discarded + if input not in ('n', 'r', 't', 'v', '\\','\'', '"', '?', 'b' ): + return None return 'LSTRING' # char literal @@ -312,6 +314,8 @@ def JoosTransition(input, state): return 'CHARESC' return 'CHAREND' elif (state == 'CHARESC'): + if input not in ('n', 'r', 't', 'v', '\\','\'', '"', '?', 'b' ): + return None return 'CHAREND' elif (state == 'CHAREND'): if (input == '\''): @@ -352,7 +356,7 @@ def JoosTransition(input, state): return 'OPDISCARD' return None - + # Comments elif(state == 'DIV'): if (input == '/'): @@ -392,14 +396,14 @@ def JoosTransition(input, state): if(input == '.'): return 'ELLIPSIS' return None - + # Handling hexidecimal literals (error case) elif state == 'ZERO': if input == 'x': return 'HEXLITERAL' elif input == '.': return 'FLOAT' - + else: return None @@ -460,7 +464,7 @@ def scan(input): # Handling error in munching if (tokens is None): return (None, "Error on Scanning character: " + str(result[1])) - + if (tokens): # Handle erroneous tokens (return None and error string) multiLineCommentFlag = False @@ -493,14 +497,14 @@ def scan(input): return (None, "wrong keyword in comp id") if i is not 0 and (t == 'Class' or t == 'class'): return (None, "wrong keyword in comp id") - + # Checking if the multi line comment has a closing tag if token.name == 'LCOMMENT': multiLineCommentFlag = True if token.name == 'RCOMMENT': multiLineCommentFlag = False - - + + # Checking for missing closing */ for multi line comments if multiLineCommentFlag is True: