initial changes

1c5c0e18 · Shamak Dutta · 66a34752 · 1c5c0e18 · 1c5c0e18 · 1c5c0e18
Commit 1c5c0e18 authored 4 years ago by Shamak Dutta
--- a/.gitignore
+++ b/.gitignore
+*.swp
+__pycache__
--- a/README.md
+++ b/README.md
 # Maze World - Assignment 2
-Assignment code for course ECE 493 T25 at the University of Waterloo in Spring 2019.
-(*Code designed and created by Sriram Ganapathi Subramanian and Mark Crowley, 2019*)
+Assignment code for course ECE 493 T25 at the University of Waterloo in Spring 2020.
+(*Code designed and created by Sriram Ganapathi Subramanian and Mark Crowley, 2020*)

-**Due Date:** July 30 11:59pm submitted as PDF and code to LEARN dropbox.
+**Due Date:** TBD: submitted as PDF and code to LEARN dropbox.

 **Collaboration:** You can discuss solutions and help to work out the code. But each person *must do their own work*. All code and writing will be cross-checked against each other and against internet databases for cheating. 


--- a/maze_env.py
+++ b/maze_env.py
@@ -28,7 +28,6 @@ class Maze(tk.Tk, object):
        self.title('maze')
        self.geometry('{0}x{1}'.format(MAZE_H * UNIT, MAZE_W * UNIT))
        self.build_shape_maze(agentXY, goalXY, walls, pits)
-        #self.build_maze()

    def build_shape_maze(self,agentXY,goalXY, walls,pits):
        self.canvas = tk.Canvas(self, bg='white',
@@ -92,7 +91,6 @@ class Maze(tk.Tk, object):
        if(value == 0):
            return self.canvas.coords(self.agent)
        else:
-            #Reset Agent
            if(resetAgent):
                self.canvas.delete(self.agent)
                self.agent = self.canvas.create_rectangle(origin[0] - 15, origin[1] - 15,
@@ -108,19 +106,16 @@ class Maze(tk.Tk, object):
                reward = 1
                done = True
                nextstate = 'terminal'
-            #elif nextstate in [self.canvas.coords(self.pit1), self.canvas.coords(self.pit2)]:
            elif nextstate in [self.canvas.coords(w) for w in self.wallblocks]:
                reward = -0.3
                done = False
                nextstate = currstate
                reverse=True
-                #print("Wall penalty:{}".format(reward))
            elif nextstate in [self.canvas.coords(w) for w in self.pitblocks]:
                reward = -10
                done = True
                nextstate = 'terminal'
                reverse=False
-                #print("Wall penalty:{}".format(reward))
            else:
                reward = -0.1
                done = False
@@ -146,14 +141,12 @@ class Maze(tk.Tk, object):
        self.canvas.move(self.agent, base_action[0], base_action[1])  # move agent

        s_ = self.canvas.coords(self.agent)  # next state
-        #print("s_.coords:{}({})".format(self.canvas.coords(self.agent),type(self.canvas.coords(self.agent))))
-        #print("s_:{}({})".format(s_, type(s_)))

        # call the reward function
        reward, done, reverse = self.computeReward(s, action, s_)
        if(reverse):
            self.canvas.move(self.agent, -base_action[0], -base_action[1])  # move agent back
-            s_ = self.canvas.coords(self.agent)  
+            s_ = self.canvas.coords(self.agent)

        return s_, reward, done


--- a/run_main.py
+++ b/run_main.py
@@ -11,9 +11,9 @@ def debug(debuglevel, msg, **kwargs):
    if debuglevel <= DEBUG:
        if 'printNow' in kwargs:
            if kwargs['printNow']:
-                print(msg) 
+                print(msg)
        else:
-            print(msg) 
+            print(msg)


 def plot_rewards(experiments):
@@ -30,44 +30,39 @@ def plot_rewards(experiments):
    plt.ylabel("Return", fontsize=18)
    plt.tick_params(axis='both', which='major',
                    labelsize=14)
-#    plt.axis([0, 1100, 0, 1100000])
    plt.show()

 def update(env, RL, data, episodes=50):
    global_reward = np.zeros(episodes)
    data['global_reward']=global_reward

-    for episode in range(episodes):  
+    for episode in range(episodes):
        t=0
        # initial state
        if episode == 0:
            state = env.reset(value = 0)
        else:
            state = env.reset()
-       
+
        debug(2,'state(ep:{},t:{})={}'.format(episode, t, state))

        # RL choose action based on state
        action = RL.choose_action(str(state))
        while True:
            # fresh env
-            #if(t<5000 and (showRender or (episode % renderEveryNth)==0)):
            if(showRender or (episode % renderEveryNth)==0):
                env.render(sim_speed)

-
            # RL take action and get next state and reward
            state_, reward, done = env.step(action)
            global_reward[episode] += reward
            debug(2,'state(ep:{},t:{})={}'.format(episode, t, state))
            debug(2,'reward_{}=  total return_t ={} Mean50={}'.format(reward, global_reward[episode],np.mean(global_reward[-50:])))
-            

            # RL learn from this transition
            # and determine next state and action
            state, action =  RL.learn(str(state), action, reward, str(state_))

-
            # break while loop when end of this episode
            if done:
                break
@@ -91,7 +86,7 @@ if __name__ == "__main__":
    printEveryNth=1
    do_plot_rewards=True

-    #Exmaple Full Run, you may need to run longer
+    #Example Full Run, you may need to run longer
    #showRender=False
    #episodes=2000
    #renderEveryNth=10000
@@ -106,21 +101,24 @@ if __name__ == "__main__":
        datafile = sys.argv[3]


-    #All Tasks
+    # Task Specifications
+
    agentXY=[0,0]
    goalXY=[4,4]

-    #Task 1
-    wall_shape=np.array([[7,7],[4,6]])
-    pits=np.array([[6,3],[2,6]])
+    # Task 1
+    wall_shape=np.array([[2,2],[3,6]])
+    pits=np.array([[6,3],[1,4]])

-    #Task 2
-    #wall_shape=np.array([[5,2],[4,2],[3,2],[3,3],[3,4],[3,5],[3,6],[4,6],[5,6]])
-    #pits=[]
+    # Task 2
+    wall_shape=np.array([[6,2],[5,2],[4,2],[3,2],[2,2],[6,3],[6,4],[6,5],
+        [2,3],[2,4],[2,5]])
+    pits=[]

-    #Task 3
-    #wall_shape=np.array([[7,4],[7,3],[6,3],[6,2],[5,2],[4,2],[3,2],[3,3],[3,4],[3,5],[3,6],[4,6],[5,6]])
-    #pits=np.array([[1,3],[0,5], [7,7]])
+    # Task 3
+    wall_shape=np.array([[6,3],[6,3],[6,2],[5,2],[4,2],[3,2],[3,3],
+        [3,4],[3,5],[3,6],[4,6],[5,6],[5,7],[7,3]])
+    pits=np.array([[1,3],[0,5], [7,7], [8,5]])

    env1 = Maze(agentXY,goalXY,wall_shape, pits)
    RL1 = rlalg1(actions=list(range(env1.n_actions)))

--- a/task1.png
+++ b/task1.png
--- a/task2.png
+++ b/task2.png
--- a/task3.png
+++ b/task3.png