Skip to content
Snippets Groups Projects
Commit 1c5c0e18 authored by Shamak Dutta's avatar Shamak Dutta
Browse files

initial changes

parent 66a34752
No related branches found
No related tags found
No related merge requests found
*.swp
__pycache__
# Maze World - Assignment 2
Assignment code for course ECE 493 T25 at the University of Waterloo in Spring 2019.
(*Code designed and created by Sriram Ganapathi Subramanian and Mark Crowley, 2019*)
Assignment code for course ECE 493 T25 at the University of Waterloo in Spring 2020.
(*Code designed and created by Sriram Ganapathi Subramanian and Mark Crowley, 2020*)
**Due Date:** July 30 11:59pm submitted as PDF and code to LEARN dropbox.
**Due Date:** TBD: submitted as PDF and code to LEARN dropbox.
**Collaboration:** You can discuss solutions and help to work out the code. But each person *must do their own work*. All code and writing will be cross-checked against each other and against internet databases for cheating.
......
......@@ -28,7 +28,6 @@ class Maze(tk.Tk, object):
self.title('maze')
self.geometry('{0}x{1}'.format(MAZE_H * UNIT, MAZE_W * UNIT))
self.build_shape_maze(agentXY, goalXY, walls, pits)
#self.build_maze()
def build_shape_maze(self,agentXY,goalXY, walls,pits):
self.canvas = tk.Canvas(self, bg='white',
......@@ -92,7 +91,6 @@ class Maze(tk.Tk, object):
if(value == 0):
return self.canvas.coords(self.agent)
else:
#Reset Agent
if(resetAgent):
self.canvas.delete(self.agent)
self.agent = self.canvas.create_rectangle(origin[0] - 15, origin[1] - 15,
......@@ -108,19 +106,16 @@ class Maze(tk.Tk, object):
reward = 1
done = True
nextstate = 'terminal'
#elif nextstate in [self.canvas.coords(self.pit1), self.canvas.coords(self.pit2)]:
elif nextstate in [self.canvas.coords(w) for w in self.wallblocks]:
reward = -0.3
done = False
nextstate = currstate
reverse=True
#print("Wall penalty:{}".format(reward))
elif nextstate in [self.canvas.coords(w) for w in self.pitblocks]:
reward = -10
done = True
nextstate = 'terminal'
reverse=False
#print("Wall penalty:{}".format(reward))
else:
reward = -0.1
done = False
......@@ -146,14 +141,12 @@ class Maze(tk.Tk, object):
self.canvas.move(self.agent, base_action[0], base_action[1]) # move agent
s_ = self.canvas.coords(self.agent) # next state
#print("s_.coords:{}({})".format(self.canvas.coords(self.agent),type(self.canvas.coords(self.agent))))
#print("s_:{}({})".format(s_, type(s_)))
# call the reward function
reward, done, reverse = self.computeReward(s, action, s_)
if(reverse):
self.canvas.move(self.agent, -base_action[0], -base_action[1]) # move agent back
s_ = self.canvas.coords(self.agent)
s_ = self.canvas.coords(self.agent)
return s_, reward, done
......
......@@ -11,9 +11,9 @@ def debug(debuglevel, msg, **kwargs):
if debuglevel <= DEBUG:
if 'printNow' in kwargs:
if kwargs['printNow']:
print(msg)
print(msg)
else:
print(msg)
print(msg)
def plot_rewards(experiments):
......@@ -30,44 +30,39 @@ def plot_rewards(experiments):
plt.ylabel("Return", fontsize=18)
plt.tick_params(axis='both', which='major',
labelsize=14)
# plt.axis([0, 1100, 0, 1100000])
plt.show()
def update(env, RL, data, episodes=50):
global_reward = np.zeros(episodes)
data['global_reward']=global_reward
for episode in range(episodes):
for episode in range(episodes):
t=0
# initial state
if episode == 0:
state = env.reset(value = 0)
else:
state = env.reset()
debug(2,'state(ep:{},t:{})={}'.format(episode, t, state))
# RL choose action based on state
action = RL.choose_action(str(state))
while True:
# fresh env
#if(t<5000 and (showRender or (episode % renderEveryNth)==0)):
if(showRender or (episode % renderEveryNth)==0):
env.render(sim_speed)
# RL take action and get next state and reward
state_, reward, done = env.step(action)
global_reward[episode] += reward
debug(2,'state(ep:{},t:{})={}'.format(episode, t, state))
debug(2,'reward_{}= total return_t ={} Mean50={}'.format(reward, global_reward[episode],np.mean(global_reward[-50:])))
# RL learn from this transition
# and determine next state and action
state, action = RL.learn(str(state), action, reward, str(state_))
# break while loop when end of this episode
if done:
break
......@@ -91,7 +86,7 @@ if __name__ == "__main__":
printEveryNth=1
do_plot_rewards=True
#Exmaple Full Run, you may need to run longer
#Example Full Run, you may need to run longer
#showRender=False
#episodes=2000
#renderEveryNth=10000
......@@ -106,21 +101,24 @@ if __name__ == "__main__":
datafile = sys.argv[3]
#All Tasks
# Task Specifications
agentXY=[0,0]
goalXY=[4,4]
#Task 1
wall_shape=np.array([[7,7],[4,6]])
pits=np.array([[6,3],[2,6]])
# Task 1
wall_shape=np.array([[2,2],[3,6]])
pits=np.array([[6,3],[1,4]])
#Task 2
#wall_shape=np.array([[5,2],[4,2],[3,2],[3,3],[3,4],[3,5],[3,6],[4,6],[5,6]])
#pits=[]
# Task 2
wall_shape=np.array([[6,2],[5,2],[4,2],[3,2],[2,2],[6,3],[6,4],[6,5],
[2,3],[2,4],[2,5]])
pits=[]
#Task 3
#wall_shape=np.array([[7,4],[7,3],[6,3],[6,2],[5,2],[4,2],[3,2],[3,3],[3,4],[3,5],[3,6],[4,6],[5,6]])
#pits=np.array([[1,3],[0,5], [7,7]])
# Task 3
wall_shape=np.array([[6,3],[6,3],[6,2],[5,2],[4,2],[3,2],[3,3],
[3,4],[3,5],[3,6],[4,6],[5,6],[5,7],[7,3]])
pits=np.array([[1,3],[0,5], [7,7], [8,5]])
env1 = Maze(agentXY,goalXY,wall_shape, pits)
RL1 = rlalg1(actions=list(range(env1.n_actions)))
......
task1.png

256 KiB | W: | H:

task1.png

5.75 KiB | W: | H:

task1.png
task1.png
task1.png
task1.png
  • 2-up
  • Swipe
  • Onion skin
task2.png

257 KiB | W: | H:

task2.png

5.68 KiB | W: | H:

task2.png
task2.png
task2.png
task2.png
  • 2-up
  • Swipe
  • Onion skin
task3.png

262 KiB | W: | H:

task3.png

6.21 KiB | W: | H:

task3.png
task3.png
task3.png
task3.png
  • 2-up
  • Swipe
  • Onion skin
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment