Skip to content
Snippets Groups Projects
Commit 7e35acd1 authored by pycsham's avatar pycsham
Browse files

Added error handling for Scanning. Removing end of line comments

parent 614a62a2
No related branches found
No related tags found
No related merge requests found
...@@ -58,8 +58,8 @@ def idsToTokens(tokens): ...@@ -58,8 +58,8 @@ def idsToTokens(tokens):
t.name = idToTokenDict.get(t.lex) t.name = idToTokenDict.get(t.lex)
# a set that contains keywords that are in java but not in joos # a set that contains keywords that are in Java but not in Joos
wrongKeyWordDict = { wrongJavaKeyWordDict = {
'assert', 'assert',
'break', 'break',
'case', 'case',
...@@ -88,7 +88,6 @@ wrongKeyWordDict = { ...@@ -88,7 +88,6 @@ wrongKeyWordDict = {
'volatile', 'volatile',
} }
######################## DFA Stuff ################################### ######################## DFA Stuff ###################################
################# Joos DFA Tokens ################################### ################# Joos DFA Tokens ###################################
...@@ -97,12 +96,12 @@ JoosDFATokens = set([ ...@@ -97,12 +96,12 @@ JoosDFATokens = set([
'ID', 'ID',
# Literals and names (note: 'this' is considered as a keyword) # Literals and names (note: 'this' is considered as a keyword)
'NUM', # number 'NUM', # number (excludes 0)
'ZERO', # 0
'LITERALBOOL', # true or false 'LITERALBOOL', # true or false
'LITERALCHAR', # character e.g. 'c', includes escape characters? 'LITERALCHAR', # character e.g. 'c', includes escape characters?
'LITERALSTRING', # string e.g. "hello", includes escape sequences 'LITERALSTRING', # string e.g. "hello", includes escape sequences
'NULL', # null 'NULL', # null
'COMPOUND', # compound name e.g. System.io.print
# Operators: # Operators:
'ASSIGN', # = 'ASSIGN', # =
...@@ -140,6 +139,10 @@ JoosDFATokens = set([ ...@@ -140,6 +139,10 @@ JoosDFATokens = set([
'ELLIPSIS', # ... 'ELLIPSIS', # ...
'AT', # @ 'AT', # @
'DOUBLECO', # :: 'DOUBLECO', # ::
# Paragraph Comments:
'LCOMMENT', # /* or /**
'RCOMMENT', # */
]) ])
##################### Transition function ############################ ##################### Transition function ############################
...@@ -148,7 +151,7 @@ JoosDFATokens = set([ ...@@ -148,7 +151,7 @@ JoosDFATokens = set([
# Note: recognize keywords as ID, then convert them to different tokens later # Note: recognize keywords as ID, then convert them to different tokens later
def JoosTransition(input, state): def JoosTransition(input, state):
if (state == 'WHITESPACE'): if (state == 'WHITESPACE'):
if (input in (' ', '\n')): if (input in (' ', '\n', '\r')):
return 'WHITESPACE' return 'WHITESPACE'
else: else:
return None return None
...@@ -157,13 +160,14 @@ def JoosTransition(input, state): ...@@ -157,13 +160,14 @@ def JoosTransition(input, state):
if (input.isalpha()): if (input.isalpha()):
return 'ID' return 'ID'
if (input.isdigit()): if (input.isdigit()):
if (input == '0'):
return 'ZERO'
return 'NUM' return 'NUM'
# whitespace and comments # whitespace and comments
if (input == ' '): if (input in (' ', '\n', '\r')):
return 'WHITESPACE'
if (input == '\n'):
return 'WHITESPACE' return 'WHITESPACE'
# operators # operators
if (input == '='): if (input == '='):
...@@ -228,6 +232,11 @@ def JoosTransition(input, state): ...@@ -228,6 +232,11 @@ def JoosTransition(input, state):
return 'ID' return 'ID'
return None return None
elif (state == 'NUM'):
if(input.isdigit()):
return 'NUM'
return None
# string literal # string literal
elif (state == 'LSTRING'): elif (state == 'LSTRING'):
if (input == '\\'): if (input == '\\'):
...@@ -280,7 +289,7 @@ def JoosTransition(input, state): ...@@ -280,7 +289,7 @@ def JoosTransition(input, state):
return 'OR' return 'OR'
return None return None
# length 2/3 seperators # length 2-3 seperators
elif(state == 'COLON'): elif(state == 'COLON'):
if (input == ':'): if (input == ':'):
return 'DOUBLECO' return 'DOUBLECO'
...@@ -293,6 +302,23 @@ def JoosTransition(input, state): ...@@ -293,6 +302,23 @@ def JoosTransition(input, state):
if(input == '.'): if(input == '.'):
return 'ELLIPSIS' return 'ELLIPSIS'
return None return None
# Comments
elif(state == 'DIV'):
if (input == '/'):
return 'COMMENT'
elif (input == '*'):
return 'LCOMMENT'
return None
elif(state == 'LCOMMENT'):
if(input == '*'):
return 'LCOMMENT'
return None
elif(state == 'MULT'):
if(input == '/'):
return 'RCOMMENT'
return None
else: else:
return None return None
...@@ -338,30 +364,44 @@ def SMM(input, dfa): ...@@ -338,30 +364,44 @@ def SMM(input, dfa):
if (state in dfa.accept): if (state in dfa.accept):
scanned.append(Token(state, seenInput)) scanned.append(Token(state, seenInput))
else: else:
print(ord(input), "ERROR on Maximal Munch") return (None, ord(input[0]))
break
return scanned return (scanned, "success")
################# Scan ################################################ ################# Scan ################################################
def scan(input): def scan(input):
tokens = SMM(input, JoosDFA) result = SMM(input, JoosDFA)
tokens = result[0]
# Handling error in munching
if (tokens is None):
return (None, "Error on Scanning character: " + str(result[1]))
if (tokens): if (tokens):
# TODO: handle edge cases (e.g. check int range) # Handle erroneous tokens (return None and error string)
# TODO: handle cases where keywords in Java but not in joos appear (a false program) # Preprocess for comment removal
commentFlag = False
indexRange = len(tokens)
for index,token in enumerate(tokens):
# dealing with numbers that start with 0 (e.g. 09)
if token.name == 'ZERO':
if index < indexRange-1:
if tokens[index+1].name == 'NUM':
return (None, "wrong integer literal: starts with 0")
# dealing with keywords in Java but not in Joos
elif token.name == 'ID' and token.lex in wrongJavaKeyWordDict:
return (None, "keyword in Java but not in Joos")
# Comment flagging
elif token.name == 'COMMENT':
commentFlag = True
if commentFlag:
token.name = 'COMMENT'
idsToTokens(tokens) idsToTokens(tokens)
# remove whitespace and comments # remove whitespace and comments
tokens = filter(lambda t: t.name not in ("WHITESPACE", "COMMENT"), tokens) tokens = filter(lambda t: t.name not in ("WHITESPACE", "COMMENT"), tokens)
return tokens return (tokens, "success")
# TODO:
# check range for digits
# handle string literals
# escape sequences?
# compound names scanning tokenizing ?
# is 00 valid in java? ################# Removing Multiline Comments ################################################
# We might not need alphabet for the DFA (alphabet is imply by the transition rules)
...@@ -2,20 +2,31 @@ import sys ...@@ -2,20 +2,31 @@ import sys
from Scanning import scan from Scanning import scan
# Lines of Tokens def main():
tlines = [] # Lines of Tokens
tlines = []
inputfile = sys.argv[1] # Reading in test joos program
inputfile = "./Tests/" + inputfile inputfile = sys.argv[1]
f = open(inputfile, "r") inputfile = "./Tests/" + inputfile
lines = f.readlines() f = open(inputfile, "r")
lines = f.readlines()
for line in lines: # Scanning line by line
tlines.append(scan(line)) for index, line in enumerate(lines):
result = scan(line)
# Error in Scanning
if result[0] is None:
print("Error in Scanning at line " + str(index) + ": " + result[1]);
return None
tlines.append(result[0])
for tline in tlines:
s = "" for tline in tlines:
for token in tline: s = ""
if (token.name and token.lex): for token in tline:
s += '(' + token.name + ',' + token.lex + '), ' if (token.name and token.lex):
print(s) s += '(' + token.name + ',' + token.lex + '), '
print(s)
main()
\ No newline at end of file
public class WrongKeyword {
public static void main(String[] args) {
switch ();
}
}
\ No newline at end of file
...@@ -3,6 +3,7 @@ public class AddTwoIntegers { ...@@ -3,6 +3,7 @@ public class AddTwoIntegers {
int one = 10; int one = 10;
int two = 20; int two = 20;
int he234slk = 30;
int i,j; int i,j;
int sum = one + two; int sum = one + two;
if (two > 45) { if (two > 45) {
......
public class Comments {
public static void main(String[] args) {
int four = 4; // ignore these stuff
// ignore this line
}
}
\ No newline at end of file
public class OneInteger {
public static void main(String[] args) {
int two = 099;
}
}
\ No newline at end of file
public class WrongOperator {
public static void main(String[] args) {
~
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment