Added error handling for Scanning. Removing end of line comments

7e35acd1 · pycsham · 614a62a2 · 7e35acd1 · 7e35acd1 · 7e35acd1
Commit 7e35acd1 authored 5 years ago by pycsham
--- a/Scanning.py
+++ b/Scanning.py
@@ -58,8 +58,8 @@ def idsToTokens(tokens):
                t.name = idToTokenDict.get(t.lex)
-# a set that contains keywords that are in java but not in joos
+# a set that contains keywords that are in Java but not in Joos
-wrongKeyWordDict = {
+wrongJavaKeyWordDict = {
    'assert', 
    'break', 
    'case', 
@@ -88,7 +88,6 @@ wrongKeyWordDict = {
    'volatile',
 }
 ######################## DFA Stuff ###################################
 #################  Joos DFA Tokens ###################################
@@ -97,12 +96,12 @@ JoosDFATokens = set([
    'ID',             
    # Literals and names (note: 'this' is considered as a keyword)
-    'NUM',            # number
+    'NUM',            # number (excludes 0)
+    'ZERO',           # 0 
    'LITERALBOOL',    # true or false
    'LITERALCHAR',    # character e.g. 'c', includes escape characters? 
    'LITERALSTRING',  # string e.g. "hello", includes escape sequences
    'NULL',           # null
-    'COMPOUND',       # compound name e.g. System.io.print
    # Operators:
    'ASSIGN',         # =
@@ -140,6 +139,10 @@ JoosDFATokens = set([
    'ELLIPSIS',       # ...
    'AT',             # @
    'DOUBLECO',       # ::
+    # Paragraph Comments:
+    'LCOMMENT',       # /* or /**
+    'RCOMMENT',       # */
    ])
 ##################### Transition function ############################
@@ -148,7 +151,7 @@ JoosDFATokens = set([
 # Note: recognize keywords as ID, then convert them to different tokens later
 def JoosTransition(input, state):
    if (state == 'WHITESPACE'):
-        if (input in (' ', '\n')):
+        if (input in (' ', '\n', '\r')):
            return 'WHITESPACE'
        else:
            return None
@@ -157,13 +160,14 @@ def JoosTransition(input, state):
        if (input.isalpha()):
            return 'ID'
        if (input.isdigit()):
+            if (input == '0'):
+                return 'ZERO'
            return 'NUM'
        # whitespace and comments
-        if (input == ' '):
+        if (input in (' ', '\n', '\r')):
-            return 'WHITESPACE'
-        if (input == '\n'):
            return 'WHITESPACE'
        # operators
        if (input == '='):
@@ -228,6 +232,11 @@ def JoosTransition(input, state):
            return 'ID'
        return None
+    elif (state == 'NUM'):
+        if(input.isdigit()):
+            return 'NUM'
+        return None
    # string literal
    elif (state == 'LSTRING'):
        if (input == '\\'):
@@ -280,7 +289,7 @@ def JoosTransition(input, state):
            return 'OR'
        return None
-    # length 2/3 seperators
+    # length 2-3 seperators
    elif(state == 'COLON'):
        if (input == ':'):
            return 'DOUBLECO'
@@ -293,6 +302,23 @@ def JoosTransition(input, state):
        if(input == '.'):
            return 'ELLIPSIS'
        return None
+    # Comments
+    elif(state == 'DIV'):
+        if (input == '/'):
+            return 'COMMENT'
+        elif (input == '*'):
+            return 'LCOMMENT'
+        return None
+    elif(state == 'LCOMMENT'):
+        if(input == '*'):
+            return 'LCOMMENT'
+        return None
+    elif(state == 'MULT'):
+        if(input == '/'):
+            return 'RCOMMENT'
+        return None
    else:
        return None
@@ -338,30 +364,44 @@ def SMM(input, dfa):
        if (state in dfa.accept):
            scanned.append(Token(state, seenInput))
        else:
-            print(ord(input), "ERROR on Maximal Munch")
+            return (None, ord(input[0]))
-            break
-    return scanned
+    return (scanned, "success")
 ################# Scan ################################################
 def scan(input):
-    tokens = SMM(input, JoosDFA)
+    result = SMM(input, JoosDFA)
+    tokens = result[0]
+    # Handling error in munching
+    if (tokens is None):
+        return (None, "Error on Scanning character: " + str(result[1]))
    if (tokens):
-        # TODO: handle edge cases (e.g. check int range)
+        # Handle erroneous tokens (return None and error string)
-        # TODO: handle cases where keywords in Java but not in joos appear (a false program)
+        # Preprocess for comment removal
+        commentFlag = False
+        indexRange = len(tokens)
+        for index,token in enumerate(tokens):
+            # dealing with numbers that start with 0 (e.g. 09)
+            if token.name == 'ZERO':
+                if index < indexRange-1:
+                    if tokens[index+1].name == 'NUM':
+                        return (None, "wrong integer literal: starts with 0")
+            # dealing with keywords in Java but not in Joos
+            elif token.name == 'ID' and token.lex in wrongJavaKeyWordDict:
+                return (None, "keyword in Java but not in Joos")
+            # Comment flagging
+            elif token.name == 'COMMENT':
+                commentFlag = True
+            if commentFlag:
+                token.name = 'COMMENT'
        idsToTokens(tokens)
        # remove whitespace and comments
        tokens = filter(lambda t: t.name not in ("WHITESPACE", "COMMENT"), tokens)
-    return tokens
+    return (tokens, "success")
-# TODO:
-# check range for digits
-# handle string literals
-# escape sequences? 
-# compound names scanning tokenizing ?
-# is 00 valid in java?
+################# Removing Multiline Comments ################################################
-# We might not need alphabet for the DFA (alphabet is imply by the transition rules)
--- a/TestScan.py
+++ b/TestScan.py
@@ -2,20 +2,31 @@ import sys
 from Scanning import scan
-# Lines of Tokens
+def main():
-tlines = []
+    # Lines of Tokens
+    tlines = []
-inputfile = sys.argv[1]
+    # Reading in test joos program
-inputfile = "./Tests/" + inputfile
+    inputfile = sys.argv[1]
-f = open(inputfile, "r")
+    inputfile = "./Tests/" + inputfile
-lines = f.readlines()
+    f = open(inputfile, "r")
+    lines = f.readlines()
-for line in lines:
+    # Scanning line by line
-    tlines.append(scan(line))
+    for index, line in enumerate(lines):
+        result = scan(line)
+        # Error in Scanning
+        if result[0] is None:
+            print("Error in Scanning at line " + str(index) + ": " + result[1]); 
+            return None
+        tlines.append(result[0])
-for tline in tlines:
-    s = ""
+    for tline in tlines:
-    for token in tline:
+        s = ""
-        if (token.name and token.lex):
+        for token in tline:
-            s += '(' + token.name + ',' + token.lex + '), '
+            if (token.name and token.lex):
-    print(s)
+                s += '(' + token.name + ',' + token.lex + '), '
+        print(s)
+main()
\ No newline at end of file
--- a/Tests/ErrorJavaTokenNotJoos.joos
+++ b/Tests/ErrorJavaTokenNotJoos.joos
+public class WrongKeyword {
+    public static void main(String[] args) { 
+        switch ();
+    }
+}
\ No newline at end of file
--- a/Tests/add2num.joos
+++ b/Tests/add2num.joos
@@ -3,6 +3,7 @@ public class AddTwoIntegers {
        int one = 10;
        int two = 20;
+        int he234slk = 30;
        int i,j;
        int sum = one + two;
        if (two > 45) {

--- a/Tests/comments.joos
+++ b/Tests/comments.joos
+public class Comments {
+    public static void main(String[] args) { 
+        int four = 4; // ignore these stuff
+        // ignore this line
+    }
+}
\ No newline at end of file
--- a/Tests/empty.joos
+++ b/Tests/empty.joos
--- a/Tests/errorNumber.joos
+++ b/Tests/errorNumber.joos
+public class OneInteger {
+    public static void main(String[] args) { 
+        int two = 099;
+    }
+}
\ No newline at end of file
--- a/Tests/errorOperatorsInJavaNotJoos.joos
+++ b/Tests/errorOperatorsInJavaNotJoos.joos
+public class WrongOperator {
+    public static void main(String[] args) { 
+        ~
+    }
+}
\ No newline at end of file