diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..bec372b02f1c16c7e6e750b11b3abf66b6c60db2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.swp
+__pycache__
diff --git a/README.md b/README.md
index df4eef18b4268eaf484ad626d30cdf8cb9007f02..68891b1136fdb0d967c0d9b1966bc00f4e806a36 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,8 @@
 # Maze World - Assignment 2
-Assignment code for course ECE 493 T25 at the University of Waterloo in Spring 2019.
-(*Code designed and created by Sriram Ganapathi Subramanian and Mark Crowley, 2019*)
+Assignment code for course ECE 493 T25 at the University of Waterloo in Spring 2020.
+(*Code designed and created by Sriram Ganapathi Subramanian and Mark Crowley, 2020*)
 
-**Due Date:** July 30 11:59pm submitted as PDF and code to LEARN dropbox.
+**Due Date:** TBD: submitted as PDF and code to LEARN dropbox.
 
 **Collaboration:** You can discuss solutions and help to work out the code. But each person *must do their own work*. All code and writing will be cross-checked against each other and against internet databases for cheating. 
 
diff --git a/maze_env.py b/maze_env.py
index f7791d88c2cbad7bb43e0098e9fc2271f43ce392..e349b0cbd0b66f6f39d5dd8efef197da11db5a6f 100644
--- a/maze_env.py
+++ b/maze_env.py
@@ -28,7 +28,6 @@ class Maze(tk.Tk, object):
         self.title('maze')
         self.geometry('{0}x{1}'.format(MAZE_H * UNIT, MAZE_W * UNIT))
         self.build_shape_maze(agentXY, goalXY, walls, pits)
-        #self.build_maze()
 
     def build_shape_maze(self,agentXY,goalXY, walls,pits):
         self.canvas = tk.Canvas(self, bg='white',
@@ -92,7 +91,6 @@ class Maze(tk.Tk, object):
         if(value == 0):
             return self.canvas.coords(self.agent)
         else:
-            #Reset Agent
             if(resetAgent):
                 self.canvas.delete(self.agent)
                 self.agent = self.canvas.create_rectangle(origin[0] - 15, origin[1] - 15,
@@ -108,19 +106,16 @@ class Maze(tk.Tk, object):
                 reward = 1
                 done = True
                 nextstate = 'terminal'
-            #elif nextstate in [self.canvas.coords(self.pit1), self.canvas.coords(self.pit2)]:
             elif nextstate in [self.canvas.coords(w) for w in self.wallblocks]:
                 reward = -0.3
                 done = False
                 nextstate = currstate
                 reverse=True
-                #print("Wall penalty:{}".format(reward))
             elif nextstate in [self.canvas.coords(w) for w in self.pitblocks]:
                 reward = -10
                 done = True
                 nextstate = 'terminal'
                 reverse=False
-                #print("Wall penalty:{}".format(reward))
             else:
                 reward = -0.1
                 done = False
@@ -146,14 +141,12 @@ class Maze(tk.Tk, object):
         self.canvas.move(self.agent, base_action[0], base_action[1])  # move agent
 
         s_ = self.canvas.coords(self.agent)  # next state
-        #print("s_.coords:{}({})".format(self.canvas.coords(self.agent),type(self.canvas.coords(self.agent))))
-        #print("s_:{}({})".format(s_, type(s_)))
 
         # call the reward function
         reward, done, reverse = self.computeReward(s, action, s_)
         if(reverse):
             self.canvas.move(self.agent, -base_action[0], -base_action[1])  # move agent back
-            s_ = self.canvas.coords(self.agent)  
+            s_ = self.canvas.coords(self.agent)
 
         return s_, reward, done
 
diff --git a/run_main.py b/run_main.py
index 5df0eb4dc6418168fd0f943af4b3d4b54624709d..3a20d41ef2b525bbb76aa1232018f13c183a8f82 100644
--- a/run_main.py
+++ b/run_main.py
@@ -11,9 +11,9 @@ def debug(debuglevel, msg, **kwargs):
     if debuglevel <= DEBUG:
         if 'printNow' in kwargs:
             if kwargs['printNow']:
-                print(msg) 
+                print(msg)
         else:
-            print(msg) 
+            print(msg)
 
 
 def plot_rewards(experiments):
@@ -30,44 +30,39 @@ def plot_rewards(experiments):
     plt.ylabel("Return", fontsize=18)
     plt.tick_params(axis='both', which='major',
                     labelsize=14)
-#    plt.axis([0, 1100, 0, 1100000])
     plt.show()
 
 def update(env, RL, data, episodes=50):
     global_reward = np.zeros(episodes)
     data['global_reward']=global_reward
 
-    for episode in range(episodes):  
+    for episode in range(episodes):
         t=0
         # initial state
         if episode == 0:
             state = env.reset(value = 0)
         else:
             state = env.reset()
-       
+
         debug(2,'state(ep:{},t:{})={}'.format(episode, t, state))
 
         # RL choose action based on state
         action = RL.choose_action(str(state))
         while True:
             # fresh env
-            #if(t<5000 and (showRender or (episode % renderEveryNth)==0)):
             if(showRender or (episode % renderEveryNth)==0):
                 env.render(sim_speed)
 
-
             # RL take action and get next state and reward
             state_, reward, done = env.step(action)
             global_reward[episode] += reward
             debug(2,'state(ep:{},t:{})={}'.format(episode, t, state))
             debug(2,'reward_{}=  total return_t ={} Mean50={}'.format(reward, global_reward[episode],np.mean(global_reward[-50:])))
-            
 
             # RL learn from this transition
             # and determine next state and action
             state, action =  RL.learn(str(state), action, reward, str(state_))
 
-
             # break while loop when end of this episode
             if done:
                 break
@@ -91,7 +86,7 @@ if __name__ == "__main__":
     printEveryNth=1
     do_plot_rewards=True
 
-    #Exmaple Full Run, you may need to run longer
+    #Example Full Run, you may need to run longer
     #showRender=False
     #episodes=2000
     #renderEveryNth=10000
@@ -106,21 +101,24 @@ if __name__ == "__main__":
         datafile = sys.argv[3]
 
 
-    #All Tasks
+    # Task Specifications
+
     agentXY=[0,0]
     goalXY=[4,4]
 
-    #Task 1
-    wall_shape=np.array([[7,7],[4,6]])
-    pits=np.array([[6,3],[2,6]])
+    # Task 1
+    wall_shape=np.array([[2,2],[3,6]])
+    pits=np.array([[6,3],[1,4]])
 
-    #Task 2
-    #wall_shape=np.array([[5,2],[4,2],[3,2],[3,3],[3,4],[3,5],[3,6],[4,6],[5,6]])
-    #pits=[]
+    # Task 2
+    wall_shape=np.array([[6,2],[5,2],[4,2],[3,2],[2,2],[6,3],[6,4],[6,5],
+        [2,3],[2,4],[2,5]])
+    pits=[]
 
-    #Task 3
-    #wall_shape=np.array([[7,4],[7,3],[6,3],[6,2],[5,2],[4,2],[3,2],[3,3],[3,4],[3,5],[3,6],[4,6],[5,6]])
-    #pits=np.array([[1,3],[0,5], [7,7]])
+    # Task 3
+    wall_shape=np.array([[6,3],[6,3],[6,2],[5,2],[4,2],[3,2],[3,3],
+        [3,4],[3,5],[3,6],[4,6],[5,6],[5,7],[7,3]])
+    pits=np.array([[1,3],[0,5], [7,7], [8,5]])
 
     env1 = Maze(agentXY,goalXY,wall_shape, pits)
     RL1 = rlalg1(actions=list(range(env1.n_actions)))
diff --git a/task1.png b/task1.png
index 96a9bea77e26fe9dde7521f0a256b8c15ccc18fb..b8183ee0cae8f207ef60fa440c1ab6ef0192ad83 100644
Binary files a/task1.png and b/task1.png differ
diff --git a/task2.png b/task2.png
index 32eefec8c7fd180f577fcf8fb2685a1f7b26f114..53946251800bb545e6bca8eba686775f7d856fdb 100644
Binary files a/task2.png and b/task2.png differ
diff --git a/task3.png b/task3.png
index 106e40d32a7c29c368fbd8172416961fe9b68557..532bbe2ee799d57874a20b45c86c1bca8ee4b60d 100644
Binary files a/task3.png and b/task3.png differ