diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..bec372b02f1c16c7e6e750b11b3abf66b6c60db2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.swp +__pycache__ diff --git a/README.md b/README.md index df4eef18b4268eaf484ad626d30cdf8cb9007f02..68891b1136fdb0d967c0d9b1966bc00f4e806a36 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Maze World - Assignment 2 -Assignment code for course ECE 493 T25 at the University of Waterloo in Spring 2019. -(*Code designed and created by Sriram Ganapathi Subramanian and Mark Crowley, 2019*) +Assignment code for course ECE 493 T25 at the University of Waterloo in Spring 2020. +(*Code designed and created by Sriram Ganapathi Subramanian and Mark Crowley, 2020*) -**Due Date:** July 30 11:59pm submitted as PDF and code to LEARN dropbox. +**Due Date:** TBD: submitted as PDF and code to LEARN dropbox. **Collaboration:** You can discuss solutions and help to work out the code. But each person *must do their own work*. All code and writing will be cross-checked against each other and against internet databases for cheating. diff --git a/maze_env.py b/maze_env.py index f7791d88c2cbad7bb43e0098e9fc2271f43ce392..e349b0cbd0b66f6f39d5dd8efef197da11db5a6f 100644 --- a/maze_env.py +++ b/maze_env.py @@ -28,7 +28,6 @@ class Maze(tk.Tk, object): self.title('maze') self.geometry('{0}x{1}'.format(MAZE_H * UNIT, MAZE_W * UNIT)) self.build_shape_maze(agentXY, goalXY, walls, pits) - #self.build_maze() def build_shape_maze(self,agentXY,goalXY, walls,pits): self.canvas = tk.Canvas(self, bg='white', @@ -92,7 +91,6 @@ class Maze(tk.Tk, object): if(value == 0): return self.canvas.coords(self.agent) else: - #Reset Agent if(resetAgent): self.canvas.delete(self.agent) self.agent = self.canvas.create_rectangle(origin[0] - 15, origin[1] - 15, @@ -108,19 +106,16 @@ class Maze(tk.Tk, object): reward = 1 done = True nextstate = 'terminal' - #elif nextstate in [self.canvas.coords(self.pit1), self.canvas.coords(self.pit2)]: elif nextstate in [self.canvas.coords(w) for w in self.wallblocks]: reward = -0.3 done = False nextstate = currstate reverse=True - #print("Wall penalty:{}".format(reward)) elif nextstate in [self.canvas.coords(w) for w in self.pitblocks]: reward = -10 done = True nextstate = 'terminal' reverse=False - #print("Wall penalty:{}".format(reward)) else: reward = -0.1 done = False @@ -146,14 +141,12 @@ class Maze(tk.Tk, object): self.canvas.move(self.agent, base_action[0], base_action[1]) # move agent s_ = self.canvas.coords(self.agent) # next state - #print("s_.coords:{}({})".format(self.canvas.coords(self.agent),type(self.canvas.coords(self.agent)))) - #print("s_:{}({})".format(s_, type(s_))) # call the reward function reward, done, reverse = self.computeReward(s, action, s_) if(reverse): self.canvas.move(self.agent, -base_action[0], -base_action[1]) # move agent back - s_ = self.canvas.coords(self.agent) + s_ = self.canvas.coords(self.agent) return s_, reward, done diff --git a/run_main.py b/run_main.py index 5df0eb4dc6418168fd0f943af4b3d4b54624709d..3a20d41ef2b525bbb76aa1232018f13c183a8f82 100644 --- a/run_main.py +++ b/run_main.py @@ -11,9 +11,9 @@ def debug(debuglevel, msg, **kwargs): if debuglevel <= DEBUG: if 'printNow' in kwargs: if kwargs['printNow']: - print(msg) + print(msg) else: - print(msg) + print(msg) def plot_rewards(experiments): @@ -30,44 +30,39 @@ def plot_rewards(experiments): plt.ylabel("Return", fontsize=18) plt.tick_params(axis='both', which='major', labelsize=14) -# plt.axis([0, 1100, 0, 1100000]) plt.show() def update(env, RL, data, episodes=50): global_reward = np.zeros(episodes) data['global_reward']=global_reward - for episode in range(episodes): + for episode in range(episodes): t=0 # initial state if episode == 0: state = env.reset(value = 0) else: state = env.reset() - + debug(2,'state(ep:{},t:{})={}'.format(episode, t, state)) # RL choose action based on state action = RL.choose_action(str(state)) while True: # fresh env - #if(t<5000 and (showRender or (episode % renderEveryNth)==0)): if(showRender or (episode % renderEveryNth)==0): env.render(sim_speed) - # RL take action and get next state and reward state_, reward, done = env.step(action) global_reward[episode] += reward debug(2,'state(ep:{},t:{})={}'.format(episode, t, state)) debug(2,'reward_{}= total return_t ={} Mean50={}'.format(reward, global_reward[episode],np.mean(global_reward[-50:]))) - # RL learn from this transition # and determine next state and action state, action = RL.learn(str(state), action, reward, str(state_)) - # break while loop when end of this episode if done: break @@ -91,7 +86,7 @@ if __name__ == "__main__": printEveryNth=1 do_plot_rewards=True - #Exmaple Full Run, you may need to run longer + #Example Full Run, you may need to run longer #showRender=False #episodes=2000 #renderEveryNth=10000 @@ -106,21 +101,24 @@ if __name__ == "__main__": datafile = sys.argv[3] - #All Tasks + # Task Specifications + agentXY=[0,0] goalXY=[4,4] - #Task 1 - wall_shape=np.array([[7,7],[4,6]]) - pits=np.array([[6,3],[2,6]]) + # Task 1 + wall_shape=np.array([[2,2],[3,6]]) + pits=np.array([[6,3],[1,4]]) - #Task 2 - #wall_shape=np.array([[5,2],[4,2],[3,2],[3,3],[3,4],[3,5],[3,6],[4,6],[5,6]]) - #pits=[] + # Task 2 + wall_shape=np.array([[6,2],[5,2],[4,2],[3,2],[2,2],[6,3],[6,4],[6,5], + [2,3],[2,4],[2,5]]) + pits=[] - #Task 3 - #wall_shape=np.array([[7,4],[7,3],[6,3],[6,2],[5,2],[4,2],[3,2],[3,3],[3,4],[3,5],[3,6],[4,6],[5,6]]) - #pits=np.array([[1,3],[0,5], [7,7]]) + # Task 3 + wall_shape=np.array([[6,3],[6,3],[6,2],[5,2],[4,2],[3,2],[3,3], + [3,4],[3,5],[3,6],[4,6],[5,6],[5,7],[7,3]]) + pits=np.array([[1,3],[0,5], [7,7], [8,5]]) env1 = Maze(agentXY,goalXY,wall_shape, pits) RL1 = rlalg1(actions=list(range(env1.n_actions))) diff --git a/task1.png b/task1.png index 96a9bea77e26fe9dde7521f0a256b8c15ccc18fb..b8183ee0cae8f207ef60fa440c1ab6ef0192ad83 100644 Binary files a/task1.png and b/task1.png differ diff --git a/task2.png b/task2.png index 32eefec8c7fd180f577fcf8fb2685a1f7b26f114..53946251800bb545e6bca8eba686775f7d856fdb 100644 Binary files a/task2.png and b/task2.png differ diff --git a/task3.png b/task3.png index 106e40d32a7c29c368fbd8172416961fe9b68557..532bbe2ee799d57874a20b45c86c1bca8ee4b60d 100644 Binary files a/task3.png and b/task3.png differ