Skip to content
Snippets Groups Projects
Commit 1c5c0e18 authored by Shamak Dutta's avatar Shamak Dutta
Browse files

initial changes

parent 66a34752
No related branches found
No related tags found
No related merge requests found
*.swp
__pycache__
# Maze World - Assignment 2 # Maze World - Assignment 2
Assignment code for course ECE 493 T25 at the University of Waterloo in Spring 2019. Assignment code for course ECE 493 T25 at the University of Waterloo in Spring 2020.
(*Code designed and created by Sriram Ganapathi Subramanian and Mark Crowley, 2019*) (*Code designed and created by Sriram Ganapathi Subramanian and Mark Crowley, 2020*)
**Due Date:** July 30 11:59pm submitted as PDF and code to LEARN dropbox. **Due Date:** TBD: submitted as PDF and code to LEARN dropbox.
**Collaboration:** You can discuss solutions and help to work out the code. But each person *must do their own work*. All code and writing will be cross-checked against each other and against internet databases for cheating. **Collaboration:** You can discuss solutions and help to work out the code. But each person *must do their own work*. All code and writing will be cross-checked against each other and against internet databases for cheating.
......
...@@ -28,7 +28,6 @@ class Maze(tk.Tk, object): ...@@ -28,7 +28,6 @@ class Maze(tk.Tk, object):
self.title('maze') self.title('maze')
self.geometry('{0}x{1}'.format(MAZE_H * UNIT, MAZE_W * UNIT)) self.geometry('{0}x{1}'.format(MAZE_H * UNIT, MAZE_W * UNIT))
self.build_shape_maze(agentXY, goalXY, walls, pits) self.build_shape_maze(agentXY, goalXY, walls, pits)
#self.build_maze()
def build_shape_maze(self,agentXY,goalXY, walls,pits): def build_shape_maze(self,agentXY,goalXY, walls,pits):
self.canvas = tk.Canvas(self, bg='white', self.canvas = tk.Canvas(self, bg='white',
...@@ -92,7 +91,6 @@ class Maze(tk.Tk, object): ...@@ -92,7 +91,6 @@ class Maze(tk.Tk, object):
if(value == 0): if(value == 0):
return self.canvas.coords(self.agent) return self.canvas.coords(self.agent)
else: else:
#Reset Agent
if(resetAgent): if(resetAgent):
self.canvas.delete(self.agent) self.canvas.delete(self.agent)
self.agent = self.canvas.create_rectangle(origin[0] - 15, origin[1] - 15, self.agent = self.canvas.create_rectangle(origin[0] - 15, origin[1] - 15,
...@@ -108,19 +106,16 @@ class Maze(tk.Tk, object): ...@@ -108,19 +106,16 @@ class Maze(tk.Tk, object):
reward = 1 reward = 1
done = True done = True
nextstate = 'terminal' nextstate = 'terminal'
#elif nextstate in [self.canvas.coords(self.pit1), self.canvas.coords(self.pit2)]:
elif nextstate in [self.canvas.coords(w) for w in self.wallblocks]: elif nextstate in [self.canvas.coords(w) for w in self.wallblocks]:
reward = -0.3 reward = -0.3
done = False done = False
nextstate = currstate nextstate = currstate
reverse=True reverse=True
#print("Wall penalty:{}".format(reward))
elif nextstate in [self.canvas.coords(w) for w in self.pitblocks]: elif nextstate in [self.canvas.coords(w) for w in self.pitblocks]:
reward = -10 reward = -10
done = True done = True
nextstate = 'terminal' nextstate = 'terminal'
reverse=False reverse=False
#print("Wall penalty:{}".format(reward))
else: else:
reward = -0.1 reward = -0.1
done = False done = False
...@@ -146,14 +141,12 @@ class Maze(tk.Tk, object): ...@@ -146,14 +141,12 @@ class Maze(tk.Tk, object):
self.canvas.move(self.agent, base_action[0], base_action[1]) # move agent self.canvas.move(self.agent, base_action[0], base_action[1]) # move agent
s_ = self.canvas.coords(self.agent) # next state s_ = self.canvas.coords(self.agent) # next state
#print("s_.coords:{}({})".format(self.canvas.coords(self.agent),type(self.canvas.coords(self.agent))))
#print("s_:{}({})".format(s_, type(s_)))
# call the reward function # call the reward function
reward, done, reverse = self.computeReward(s, action, s_) reward, done, reverse = self.computeReward(s, action, s_)
if(reverse): if(reverse):
self.canvas.move(self.agent, -base_action[0], -base_action[1]) # move agent back self.canvas.move(self.agent, -base_action[0], -base_action[1]) # move agent back
s_ = self.canvas.coords(self.agent) s_ = self.canvas.coords(self.agent)
return s_, reward, done return s_, reward, done
......
...@@ -11,9 +11,9 @@ def debug(debuglevel, msg, **kwargs): ...@@ -11,9 +11,9 @@ def debug(debuglevel, msg, **kwargs):
if debuglevel <= DEBUG: if debuglevel <= DEBUG:
if 'printNow' in kwargs: if 'printNow' in kwargs:
if kwargs['printNow']: if kwargs['printNow']:
print(msg) print(msg)
else: else:
print(msg) print(msg)
def plot_rewards(experiments): def plot_rewards(experiments):
...@@ -30,44 +30,39 @@ def plot_rewards(experiments): ...@@ -30,44 +30,39 @@ def plot_rewards(experiments):
plt.ylabel("Return", fontsize=18) plt.ylabel("Return", fontsize=18)
plt.tick_params(axis='both', which='major', plt.tick_params(axis='both', which='major',
labelsize=14) labelsize=14)
# plt.axis([0, 1100, 0, 1100000])
plt.show() plt.show()
def update(env, RL, data, episodes=50): def update(env, RL, data, episodes=50):
global_reward = np.zeros(episodes) global_reward = np.zeros(episodes)
data['global_reward']=global_reward data['global_reward']=global_reward
for episode in range(episodes): for episode in range(episodes):
t=0 t=0
# initial state # initial state
if episode == 0: if episode == 0:
state = env.reset(value = 0) state = env.reset(value = 0)
else: else:
state = env.reset() state = env.reset()
debug(2,'state(ep:{},t:{})={}'.format(episode, t, state)) debug(2,'state(ep:{},t:{})={}'.format(episode, t, state))
# RL choose action based on state # RL choose action based on state
action = RL.choose_action(str(state)) action = RL.choose_action(str(state))
while True: while True:
# fresh env # fresh env
#if(t<5000 and (showRender or (episode % renderEveryNth)==0)):
if(showRender or (episode % renderEveryNth)==0): if(showRender or (episode % renderEveryNth)==0):
env.render(sim_speed) env.render(sim_speed)
# RL take action and get next state and reward # RL take action and get next state and reward
state_, reward, done = env.step(action) state_, reward, done = env.step(action)
global_reward[episode] += reward global_reward[episode] += reward
debug(2,'state(ep:{},t:{})={}'.format(episode, t, state)) debug(2,'state(ep:{},t:{})={}'.format(episode, t, state))
debug(2,'reward_{}= total return_t ={} Mean50={}'.format(reward, global_reward[episode],np.mean(global_reward[-50:]))) debug(2,'reward_{}= total return_t ={} Mean50={}'.format(reward, global_reward[episode],np.mean(global_reward[-50:])))
# RL learn from this transition # RL learn from this transition
# and determine next state and action # and determine next state and action
state, action = RL.learn(str(state), action, reward, str(state_)) state, action = RL.learn(str(state), action, reward, str(state_))
# break while loop when end of this episode # break while loop when end of this episode
if done: if done:
break break
...@@ -91,7 +86,7 @@ if __name__ == "__main__": ...@@ -91,7 +86,7 @@ if __name__ == "__main__":
printEveryNth=1 printEveryNth=1
do_plot_rewards=True do_plot_rewards=True
#Exmaple Full Run, you may need to run longer #Example Full Run, you may need to run longer
#showRender=False #showRender=False
#episodes=2000 #episodes=2000
#renderEveryNth=10000 #renderEveryNth=10000
...@@ -106,21 +101,24 @@ if __name__ == "__main__": ...@@ -106,21 +101,24 @@ if __name__ == "__main__":
datafile = sys.argv[3] datafile = sys.argv[3]
#All Tasks # Task Specifications
agentXY=[0,0] agentXY=[0,0]
goalXY=[4,4] goalXY=[4,4]
#Task 1 # Task 1
wall_shape=np.array([[7,7],[4,6]]) wall_shape=np.array([[2,2],[3,6]])
pits=np.array([[6,3],[2,6]]) pits=np.array([[6,3],[1,4]])
#Task 2 # Task 2
#wall_shape=np.array([[5,2],[4,2],[3,2],[3,3],[3,4],[3,5],[3,6],[4,6],[5,6]]) wall_shape=np.array([[6,2],[5,2],[4,2],[3,2],[2,2],[6,3],[6,4],[6,5],
#pits=[] [2,3],[2,4],[2,5]])
pits=[]
#Task 3 # Task 3
#wall_shape=np.array([[7,4],[7,3],[6,3],[6,2],[5,2],[4,2],[3,2],[3,3],[3,4],[3,5],[3,6],[4,6],[5,6]]) wall_shape=np.array([[6,3],[6,3],[6,2],[5,2],[4,2],[3,2],[3,3],
#pits=np.array([[1,3],[0,5], [7,7]]) [3,4],[3,5],[3,6],[4,6],[5,6],[5,7],[7,3]])
pits=np.array([[1,3],[0,5], [7,7], [8,5]])
env1 = Maze(agentXY,goalXY,wall_shape, pits) env1 = Maze(agentXY,goalXY,wall_shape, pits)
RL1 = rlalg1(actions=list(range(env1.n_actions))) RL1 = rlalg1(actions=list(range(env1.n_actions)))
......
task1.png

256 KiB | W: | H:

task1.png

5.75 KiB | W: | H:

task1.png
task1.png
task1.png
task1.png
  • 2-up
  • Swipe
  • Onion skin
task2.png

257 KiB | W: | H:

task2.png

5.68 KiB | W: | H:

task2.png
task2.png
task2.png
task2.png
  • 2-up
  • Swipe
  • Onion skin
task3.png

262 KiB | W: | H:

task3.png

6.21 KiB | W: | H:

task3.png
task3.png
task3.png
task3.png
  • 2-up
  • Swipe
  • Onion skin
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment