diff --git a/Scanning.py b/Scanning.py index fe9181a06e474290998fb64d54d2f1c7d021ef59..38a7e833ab6adc6a73addb0a853ccfc4ef41b6f5 100644 --- a/Scanning.py +++ b/Scanning.py @@ -58,8 +58,8 @@ def idsToTokens(tokens): t.name = idToTokenDict.get(t.lex) -# a set that contains keywords that are in java but not in joos -wrongKeyWordDict = { +# a set that contains keywords that are in Java but not in Joos +wrongJavaKeyWordDict = { 'assert', 'break', 'case', @@ -88,7 +88,6 @@ wrongKeyWordDict = { 'volatile', } - ######################## DFA Stuff ################################### ################# Joos DFA Tokens ################################### @@ -97,12 +96,12 @@ JoosDFATokens = set([ 'ID', # Literals and names (note: 'this' is considered as a keyword) - 'NUM', # number + 'NUM', # number (excludes 0) + 'ZERO', # 0 'LITERALBOOL', # true or false 'LITERALCHAR', # character e.g. 'c', includes escape characters? 'LITERALSTRING', # string e.g. "hello", includes escape sequences 'NULL', # null - 'COMPOUND', # compound name e.g. System.io.print # Operators: 'ASSIGN', # = @@ -140,6 +139,10 @@ JoosDFATokens = set([ 'ELLIPSIS', # ... 'AT', # @ 'DOUBLECO', # :: + + # Paragraph Comments: + 'LCOMMENT', # /* or /** + 'RCOMMENT', # */ ]) ##################### Transition function ############################ @@ -148,7 +151,7 @@ JoosDFATokens = set([ # Note: recognize keywords as ID, then convert them to different tokens later def JoosTransition(input, state): if (state == 'WHITESPACE'): - if (input in (' ', '\n')): + if (input in (' ', '\n', '\r')): return 'WHITESPACE' else: return None @@ -157,13 +160,14 @@ def JoosTransition(input, state): if (input.isalpha()): return 'ID' if (input.isdigit()): + if (input == '0'): + return 'ZERO' return 'NUM' # whitespace and comments - if (input == ' '): - return 'WHITESPACE' - if (input == '\n'): + if (input in (' ', '\n', '\r')): return 'WHITESPACE' + # operators if (input == '='): @@ -228,6 +232,11 @@ def JoosTransition(input, state): return 'ID' return None + elif (state == 'NUM'): + if(input.isdigit()): + return 'NUM' + return None + # string literal elif (state == 'LSTRING'): if (input == '\\'): @@ -280,7 +289,7 @@ def JoosTransition(input, state): return 'OR' return None - # length 2/3 seperators + # length 2-3 seperators elif(state == 'COLON'): if (input == ':'): return 'DOUBLECO' @@ -293,6 +302,23 @@ def JoosTransition(input, state): if(input == '.'): return 'ELLIPSIS' return None + + # Comments + elif(state == 'DIV'): + if (input == '/'): + return 'COMMENT' + elif (input == '*'): + return 'LCOMMENT' + return None + elif(state == 'LCOMMENT'): + if(input == '*'): + return 'LCOMMENT' + return None + elif(state == 'MULT'): + if(input == '/'): + return 'RCOMMENT' + return None + else: return None @@ -338,30 +364,44 @@ def SMM(input, dfa): if (state in dfa.accept): scanned.append(Token(state, seenInput)) else: - print(ord(input), "ERROR on Maximal Munch") - break + return (None, ord(input[0])) - return scanned + return (scanned, "success") ################# Scan ################################################ def scan(input): - tokens = SMM(input, JoosDFA) + result = SMM(input, JoosDFA) + tokens = result[0] + # Handling error in munching + if (tokens is None): + return (None, "Error on Scanning character: " + str(result[1])) + if (tokens): - # TODO: handle edge cases (e.g. check int range) - # TODO: handle cases where keywords in Java but not in joos appear (a false program) - + # Handle erroneous tokens (return None and error string) + # Preprocess for comment removal + commentFlag = False + indexRange = len(tokens) + for index,token in enumerate(tokens): + # dealing with numbers that start with 0 (e.g. 09) + if token.name == 'ZERO': + if index < indexRange-1: + if tokens[index+1].name == 'NUM': + return (None, "wrong integer literal: starts with 0") + # dealing with keywords in Java but not in Joos + elif token.name == 'ID' and token.lex in wrongJavaKeyWordDict: + return (None, "keyword in Java but not in Joos") + + # Comment flagging + elif token.name == 'COMMENT': + commentFlag = True + if commentFlag: + token.name = 'COMMENT' + idsToTokens(tokens) # remove whitespace and comments tokens = filter(lambda t: t.name not in ("WHITESPACE", "COMMENT"), tokens) - return tokens - -# TODO: -# check range for digits -# handle string literals -# escape sequences? -# compound names scanning tokenizing ? + return (tokens, "success") -# is 00 valid in java? -# We might not need alphabet for the DFA (alphabet is imply by the transition rules) +################# Removing Multiline Comments ################################################ diff --git a/TestScan.py b/TestScan.py index 9860b3331eb682155ad487979889fe40321d9ae3..389de740b671361c2f61010d978f4638abb26f34 100644 --- a/TestScan.py +++ b/TestScan.py @@ -2,20 +2,31 @@ import sys from Scanning import scan -# Lines of Tokens -tlines = [] +def main(): + # Lines of Tokens + tlines = [] -inputfile = sys.argv[1] -inputfile = "./Tests/" + inputfile -f = open(inputfile, "r") -lines = f.readlines() + # Reading in test joos program + inputfile = sys.argv[1] + inputfile = "./Tests/" + inputfile + f = open(inputfile, "r") + lines = f.readlines() -for line in lines: - tlines.append(scan(line)) + # Scanning line by line + for index, line in enumerate(lines): + result = scan(line) + # Error in Scanning + if result[0] is None: + print("Error in Scanning at line " + str(index) + ": " + result[1]); + return None + tlines.append(result[0]) -for tline in tlines: - s = "" - for token in tline: - if (token.name and token.lex): - s += '(' + token.name + ',' + token.lex + '), ' - print(s) + + for tline in tlines: + s = "" + for token in tline: + if (token.name and token.lex): + s += '(' + token.name + ',' + token.lex + '), ' + print(s) + +main() \ No newline at end of file diff --git a/Tests/ErrorJavaTokenNotJoos.joos b/Tests/ErrorJavaTokenNotJoos.joos new file mode 100644 index 0000000000000000000000000000000000000000..0a42dfaaf0554b705cc86551098a33ddbb97cbc4 --- /dev/null +++ b/Tests/ErrorJavaTokenNotJoos.joos @@ -0,0 +1,5 @@ +public class WrongKeyword { + public static void main(String[] args) { + switch (); + } +} \ No newline at end of file diff --git a/Tests/add2num.joos b/Tests/add2num.joos index 868f317eed121db16d2727fd371ed7b7fe9fd1c3..cb4b16d6432fa68a9c6b7c5823ef20eaef0ab27f 100644 --- a/Tests/add2num.joos +++ b/Tests/add2num.joos @@ -3,6 +3,7 @@ public class AddTwoIntegers { int one = 10; int two = 20; + int he234slk = 30; int i,j; int sum = one + two; if (two > 45) { diff --git a/Tests/comments.joos b/Tests/comments.joos new file mode 100644 index 0000000000000000000000000000000000000000..cd458f8aab50b982b4e5dc6c22a829258b7d3fb5 --- /dev/null +++ b/Tests/comments.joos @@ -0,0 +1,6 @@ +public class Comments { + public static void main(String[] args) { + int four = 4; // ignore these stuff + // ignore this line + } +} \ No newline at end of file diff --git a/Tests/empty.joos b/Tests/empty.joos new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Tests/errorNumber.joos b/Tests/errorNumber.joos new file mode 100644 index 0000000000000000000000000000000000000000..087fd5613595e7a802552a3047c07fcb86e04ad5 --- /dev/null +++ b/Tests/errorNumber.joos @@ -0,0 +1,5 @@ +public class OneInteger { + public static void main(String[] args) { + int two = 099; + } +} \ No newline at end of file diff --git a/Tests/errorOperatorsInJavaNotJoos.joos b/Tests/errorOperatorsInJavaNotJoos.joos new file mode 100644 index 0000000000000000000000000000000000000000..4511fabdbd10d7ef9f71965c23f257b4fbcfc58a --- /dev/null +++ b/Tests/errorOperatorsInJavaNotJoos.joos @@ -0,0 +1,5 @@ +public class WrongOperator { + public static void main(String[] args) { + ~ + } +} \ No newline at end of file