Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
wise-move
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
2
Issues
2
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wise-lab
wise-move
Commits
7dc47bce
Commit
7dc47bce
authored
Jan 25, 2019
by
Jae Young Lee
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix the bug of having -100000 or 10000 rewards sometime.
parent
4a9327bd
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
75 additions
and
89 deletions
+75
-89
backends/controller_base.py
backends/controller_base.py
+3
-5
backends/kerasrl_learner.py
backends/kerasrl_learner.py
+10
-13
config.json
config.json
+1
-1
env/env_base.py
env/env_base.py
+1
-1
env/simple_intersection/road_geokinemetry.py
env/simple_intersection/road_geokinemetry.py
+1
-1
env/simple_intersection/simple_intersection_env.py
env/simple_intersection/simple_intersection_env.py
+3
-1
options/options_loader.py
options/options_loader.py
+0
-5
options/simple_intersection/maneuver_base.py
options/simple_intersection/maneuver_base.py
+2
-2
options/simple_intersection/maneuvers.py
options/simple_intersection/maneuvers.py
+54
-60
No files found.
backends/controller_base.py
View file @
7dc47bce
...
...
@@ -65,7 +65,7 @@ class ControllerBase(PolicyBase):
total_reward
+=
self
.
current_node
.
high_level_extra_reward
# TODO for info
return
observation
,
total_reward
,
self
.
env
.
termination_condition
,
info
return
observation
,
total_reward
,
terminal
,
info
# TODO: Looks generic. Move to an intermediate class/highlevel manager so that base class can be clean
''' Executes one step of current node. Sets node_terminal_state_reached flag if node termination condition
...
...
@@ -76,9 +76,7 @@ class ControllerBase(PolicyBase):
def
low_level_step_current_node
(
self
):
u_ego
=
self
.
current_node
.
low_level_policy
(
self
.
current_node
.
get_reduced_features_tuple
())
u_ego
=
self
.
current_node
.
low_level_policy
(
self
.
current_node
.
get_reduced_features_tuple
())
feature
,
R
,
terminal
,
info
=
self
.
current_node
.
step
(
u_ego
)
self
.
node_terminal_state_reached
=
terminal
return
self
.
env
.
get_features_tuple
(
),
R
,
self
.
env
.
termination_condition
,
info
return
self
.
env
.
get_features_tuple
(),
R
,
self
.
env
.
termination_condition
,
info
backends/kerasrl_learner.py
View file @
7dc47bce
...
...
@@ -317,6 +317,7 @@ class DQNLearner(LearnerBase):
nb_steps_warmup
=
self
.
nb_steps_warmup
,
target_model_update
=
self
.
target_model_update
,
policy
=
policy
,
test_policy
=
test_policy
,
enable_dueling_network
=
True
)
agent
.
compile
(
Adam
(
lr
=
self
.
lr
),
metrics
=
[
'mae'
])
...
...
@@ -327,6 +328,8 @@ class DQNLearner(LearnerBase):
env
,
nb_steps
=
1000000
,
visualize
=
False
,
verbose
=
1
,
log_interval
=
10000
,
nb_max_episode_steps
=
200
,
tensorboard
=
False
,
model_checkpoints
=
False
,
...
...
@@ -346,7 +349,8 @@ class DQNLearner(LearnerBase):
env
,
nb_steps
=
nb_steps
,
visualize
=
visualize
,
verbose
=
1
,
verbose
=
verbose
,
log_interval
=
log_interval
,
nb_max_episode_steps
=
nb_max_episode_steps
,
callbacks
=
callbacks
)
...
...
@@ -479,21 +483,13 @@ class RestrictedGreedyQPolicy(GreedyQPolicy):
Selection action
"""
assert
q_values
.
ndim
==
1
nb_actions
=
q_values
.
shape
[
0
]
restricted_q_values
=
list
()
for
i
in
range
(
0
,
nb_actions
):
if
q_values
[
i
]
!=
-
np
.
inf
:
restricted_q_values
.
append
(
q_values
[
i
])
# every q_value is -np.inf (this sometimes inevitably happens within the fit and test functions
# of kerasrl at the terminal stage as they force to call forward in Kerasrl-learner which calls this function.
# In this case, we choose a policy randomly.
if
len
(
restricted_q_values
)
<
1
:
action
=
np
.
random
.
random_integers
(
0
,
nb_actions
-
1
)
if
np
.
max
(
q_values
)
==
-
np
.
inf
:
# every q_value is -np.inf, we choose action = 0
action
=
0
else
:
action
=
np
.
argmax
(
restricted_
q_values
)
action
=
np
.
argmax
(
q_values
)
return
action
...
...
@@ -521,6 +517,7 @@ class DQNAgentOverOptions(DQNAgent):
def
__get_invalid_node_indices
(
self
):
"""Returns a list of option indices that are invalid according to
initiation conditions."""
invalid_node_indices
=
list
()
for
index
,
option_alias
in
enumerate
(
self
.
low_level_policy_aliases
):
# TODO: Locate reset_maneuver to another place as this is a "get" function.
...
...
config.json
View file @
7dc47bce
...
...
@@ -3,7 +3,7 @@
"wait"
:
"Wait"
,
"follow"
:
"Follow"
,
"stop"
:
"Stop"
,
"
immediatestop"
:
"ImmediateStop
"
,
"
changelane"
:
"ChangeLane
"
,
"keeplane"
:
"KeepLane"
},
...
...
env/env_base.py
View file @
7dc47bce
...
...
@@ -157,7 +157,7 @@ class EpisodicEnvBase(GymCompliantEnvBase):
if
self
.
_terminate_in_goal
and
self
.
goal_achieved
:
return
True
return
self
.
violation_happened
and
self
.
_LTL_preconditions_enable
return
self
.
violation_happened
@
property
def
goal_achieved
(
self
):
...
...
env/simple_intersection/road_geokinemetry.py
View file @
7dc47bce
...
...
@@ -93,7 +93,7 @@ vlanes = Route(
[
-
vwidth
-
5.0
-
intersection_voffset
,
-
vwidth
-
intersection_voffset
],
35
,
[
-
4.0
,
4.0
])
intersection_width
=
vlanes
.
n_lanes
*
vlanes
.
width
intersection_width
=
vlanes
.
n_lanes
*
vlanes
.
width
intersection_height
=
hlanes
.
n_lanes
*
hlanes
.
width
intersection_width_w_offset
=
intersection_width
+
2
*
intersection_hoffset
...
...
env/simple_intersection/simple_intersection_env.py
View file @
7dc47bce
...
...
@@ -51,7 +51,9 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase):
#: The weight vector to calculate the cost. In the maneuver, cost_weights
# can be set to a specific value which may be different than the default.
cost_weights
=
(
1.0
,
0.25
,
0.1
,
1.0
,
100.0
,
0.1
,
0.25
,
0.1
)
# TODO: check _cost_weights in both here and ManeuverBase. The _cost_weights has to be substituted to here, but it doesn't sometimes.
_cost_weights
=
(
10.0
*
1e-3
,
10.0
*
1e-3
,
0.25
*
1e-3
,
1.0
*
1e-3
,
100.0
*
1e-3
,
0.1
*
1e-3
,
0.05
*
1e-3
,
0.1
*
1e-3
)
#TODO: Move this to constants
# The empirical min and max of each term in the cost vector, which is used to normalize the values
...
...
options/options_loader.py
View file @
7dc47bce
...
...
@@ -180,11 +180,6 @@ class OptionsGraph:
print
(
"
\n
Warning: the trained low-level policy of
\"
"
+
key
+
"
\"
does not exists; the manual policy will be used.
\n
"
)
# setting the cost weights for high-level policy training.
# TODO: this shouldn't be initialized here, but within ManeuverBase class (e.g. make some flag indicating high-level training and set the weights)...
maneuver
.
_cost_weights
=
(
100.0
*
1e-3
,
10.0
*
1e-3
,
0.25
*
1e-3
,
1.0
*
1e-3
,
100.0
*
1e-3
,
0.1
*
1e-3
,
0.25
*
1e-3
,
0.1
*
1e-3
)
if
self
.
config
[
"method"
]
==
"mcts"
:
maneuver
.
timeout
=
np
.
inf
...
...
options/simple_intersection/maneuver_base.py
View file @
7dc47bce
...
...
@@ -30,7 +30,7 @@ class ManeuverBase(EpisodicEnvBase):
# as a negative reward, so a cost will be summed up to the reward
# with subtraction.
_cost_weights
=
(
10.0
*
1e-3
,
10.0
*
1e-3
,
0.25
*
1e-3
,
1.0
*
1e-3
,
100.0
*
1e-3
,
0.1
*
1e-3
,
0.
2
5
*
1e-3
,
0.1
*
1e-3
)
100.0
*
1e-3
,
0.1
*
1e-3
,
0.
0
5
*
1e-3
,
0.1
*
1e-3
)
_extra_r_terminal
=
None
_extra_r_on_timeout
=
None
...
...
@@ -38,7 +38,7 @@ class ManeuverBase(EpisodicEnvBase):
#: the flag being False when _cost_weights is used without
# modification; If True, then the action parts of _cost_weights
# are increased for some edge cases (see the step method).
_extra_action_weights_flag
=
Tru
e
_extra_action_weights_flag
=
Fals
e
#: the extra weights on the actions added to _cost_weights
# for some edge cases when _extra_action_weights_flag = True.
...
...
options/simple_intersection/maneuvers.py
View file @
7dc47bce
...
...
@@ -15,11 +15,10 @@ class KeepLane(ManeuverBase):
self
.
_target_lane
=
self
.
env
.
ego
.
APs
[
'lane'
]
def
_init_LTL_preconditions
(
self
):
self
.
_LTL_preconditions
.
append
(
LTLProperty
(
"G ( not veh_ahead )"
,
0
))
#
self._LTL_preconditions.append(
#
LTLProperty("G ( not stopped_now )", 100,
#
self._enable_low_level_training_properties))
self
.
_LTL_preconditions
.
append
(
LTLProperty
(
"G ( not stopped_now )"
,
100
,
self
.
_enable_low_level_training_properties
))
self
.
_LTL_preconditions
.
append
(
LTLProperty
(
...
...
@@ -37,6 +36,7 @@ class KeepLane(ManeuverBase):
self
.
env
.
_terminate_in_goal
=
False
self
.
env
.
_reward_in_goal
=
None
self
.
_enable_low_level_training_properties
=
True
self
.
_extra_action_weights_flag
=
True
def
generate_validation_scenario
(
self
):
self
.
generate_scenario
(
...
...
@@ -65,7 +65,7 @@ class KeepLane(ManeuverBase):
return
False
class
ImmediateStop
(
ManeuverBase
):
class
Halt
(
ManeuverBase
):
_terminate_in_goal
=
True
_reward_in_goal
=
None
...
...
@@ -102,6 +102,7 @@ class ImmediateStop(ManeuverBase):
self
.
env
.
_reward_in_goal
=
None
self
.
_reward_in_goal
=
200
self
.
_enable_low_level_training_properties
=
True
self
.
_extra_action_weights_flag
=
True
def
generate_validation_scenario
(
self
):
self
.
_ego_pos_range
=
(
rd
.
hlanes
.
start_pos
,
rd
.
hlanes
.
end_pos
)
...
...
@@ -147,14 +148,11 @@ class Stop(ManeuverBase):
self
.
_target_lane
=
self
.
env
.
ego
.
APs
[
'lane'
]
def
_init_LTL_preconditions
(
self
):
#self._LTL_preconditions.append(
# LTLProperty("G ( not has_stopped_in_stop_region )",
# self._penalty(self._reward_in_goal)))
self
.
_LTL_preconditions
.
append
(
LTLProperty
(
"G ( not has_stopped_in_stop_region )"
,
-
150
,
not
self
.
_enable_low_level_training_properties
))
LTLProperty
(
"G ( not has_stopped_in_stop_region )"
,
self
.
_penalty
(
self
.
_reward_in_goal
),
not
self
.
_enable_low_level_training_properties
))
# before_intersection rather than "before_but_close_to_stop_region or in_stop_region"?
self
.
_LTL_preconditions
.
append
(
LTLProperty
(
"G ( (before_but_close_to_stop_region or in_stop_region) U has_stopped_in_stop_region )"
,
...
...
@@ -191,6 +189,7 @@ class Stop(ManeuverBase):
self
.
_reward_in_goal
=
200
self
.
_penalty_in_violation
=
150
self
.
_enable_low_level_training_properties
=
True
self
.
_extra_action_weights_flag
=
True
def
_low_level_manual_policy
(
self
):
return
self
.
env
.
aggressive_driving_policy
(
EGO_INDEX
)
...
...
@@ -234,27 +233,21 @@ class Wait(ManeuverBase):
def
_init_LTL_preconditions
(
self
):
self
.
_LTL_preconditions
.
append
(
LTLProperty
(
"G ( (in_stop_region and stopped_now) and not (highest_priority and intersection_is_clear))"
,
None
,
not
self
.
_enable_low_level_training_properties
))
# not available in low-level training...
LTLProperty
(
"G ( (in_stop_region and stopped_now) and not (highest_priority and intersection_is_clear))"
,
0
,
not
self
.
_enable_low_level_training_properties
))
# not available in low-level training...
#LTLProperty(
# "G ( (in_stop_region and stopped_now) U (highest_priority and intersection_is_clear))", 0,
# not self._enable_low_level_training_properties)) # not available in low-level training...
#self._LTL_preconditions.append(
# LTLProperty("G ( not (in_intersection and highest_priority) )",
# self._penalty(self._reward_in_goal)))
self
.
_LTL_preconditions
.
append
(
LTLProperty
(
"G ( not (in_intersection and highest_priority) )"
,
self
.
_penalty
(
self
.
_reward_in_goal
)))
#
self._LTL_preconditions.append(
#
LTLProperty(
#
"G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150, self._enable_low_level_training_properties))
self
.
_LTL_preconditions
.
append
(
LTLProperty
(
"G ( in_stop_region U (highest_priority and intersection_is_clear) )"
,
150
,
self
.
_enable_low_level_training_properties
))
#
self._LTL_preconditions.append(
#
LTLProperty(
#
"G ( (lane and target_lane) or (not lane and not target_lane) )",
#
100, self._enable_low_level_training_properties))
self
.
_LTL_preconditions
.
append
(
LTLProperty
(
"G ( (lane and target_lane) or (not lane and not target_lane) )"
,
100
,
self
.
_enable_low_level_training_properties
))
def
_init_param
(
self
):
self
.
_v_ref
=
0
#if self._enable_low_level_training_properties else rd.speed_limit
...
...
@@ -324,7 +317,7 @@ class ChangeLane(ManeuverBase):
_violation_penalty_in_low_level_training
=
None
# high_level_extra_reward = -100000
0
high_level_extra_reward
=
-
5
0
def
_init_param
(
self
):
self
.
_v_ref
=
rd
.
speed_limit
...
...
@@ -332,13 +325,17 @@ class ChangeLane(ManeuverBase):
def
_init_LTL_preconditions
(
self
):
self
.
_LTL_preconditions
.
append
(
LTLProperty
(
"G ( on_route and not over_speed_limit )"
,
self
.
_violation_penalty_in_low_level_training
,
self
.
_enable_low_level_training_properties
))
LTLProperty
(
"G ( on_route and not over_speed_limit )"
,
self
.
_violation_penalty_in_low_level_training
,
self
.
_enable_low_level_training_properties
))
self
.
_LTL_preconditions
.
append
(
LTLProperty
(
"G ( not stopped_now )"
,
100
,
self
.
_enable_low_level_training_properties
))
LTLProperty
(
"G ( not stopped_now )"
,
100
,
self
.
_enable_low_level_training_properties
))
self
.
_LTL_preconditions
.
append
(
LTLProperty
(
"G ( not in_intersection and not in_stop_region )"
,
None
,
not
self
.
_enable_low_level_training_properties
))
# activated only for the high-level case.
@
property
def
goal_achieved
(
self
):
...
...
@@ -346,9 +343,9 @@ class ChangeLane(ManeuverBase):
APs
=
self
.
env
.
ego
.
APs
on_other_lane
=
APs
[
'lane'
]
==
self
.
_target_lane
achieved_y_displacement
=
np
.
sign
(
ego
.
y
)
*
\
(
ego
.
y
-
rd
.
hlanes
.
centres
[
APs
[
'target_lane'
]])
>=
-
self
.
min_y_distance
(
ego
.
y
-
rd
.
hlanes
.
centres
[
APs
[
'target_lane'
]])
>=
-
self
.
min_y_distance
return
on_other_lane
and
APs
[
'on_route'
]
and
\
achieved_y_displacement
and
APs
[
'parallel_to_lane'
]
achieved_y_displacement
and
APs
[
'parallel_to_lane'
]
def
_low_level_manual_policy
(
self
):
return
self
.
env
.
aggressive_driving_policy
(
EGO_INDEX
)
...
...
@@ -366,6 +363,7 @@ class ChangeLane(ManeuverBase):
self
.
_reward_in_goal
=
200
self
.
_violation_penalty_in_low_level_training
=
150
self
.
_enable_low_level_training_properties
=
True
self
.
_extra_action_weights_flag
=
True
def
generate_validation_scenario
(
self
):
self
.
generate_scenario
(
...
...
@@ -396,20 +394,17 @@ class Follow(ManeuverBase):
LTLProperty
(
"G ( veh_ahead )"
,
self
.
_penalty_for_out_of_range
))
self
.
_LTL_preconditions
.
append
(
LTLProperty
(
"G ( (lane and target_lane) or (not lane and not target_lane) )"
,
self
.
_penalty_for_change_lane
))
LTLProperty
(
"G ( (lane and target_lane) or (not lane and not target_lane) )"
,
self
.
_penalty_for_change_lane
))
#self._LTL_preconditions.append(
#
self._LTL_preconditions.append(
# LTLProperty("G ( not stopped_now U veh_ahead_stopped_now)", 200,
# self._enable_low_level_training_properties))
self
.
_LTL_preconditions
.
append
(
LTLProperty
(
"G ( not veh_ahead_too_close )"
,
self
.
_penalty_for_out_of_range
,
self
.
_enable_low_level_training_properties
))
self
.
_LTL_preconditions
.
append
(
LTLProperty
(
"G ( not in_stop_region)"
,
0
,
not
self
.
_enable_low_level_training_properties
))
LTLProperty
(
"G ( not veh_ahead_too_close )"
,
self
.
_penalty_for_out_of_range
,
self
.
_enable_low_level_training_properties
))
def
generate_learning_scenario
(
self
):
self
.
generate_scenario
(
...
...
@@ -422,6 +417,7 @@ class Follow(ManeuverBase):
self
.
_penalty_for_out_of_range
=
200
self
.
_penalty_for_change_lane
=
170
self
.
_enable_low_level_training_properties
=
True
self
.
_extra_action_weights_flag
=
True
def
generate_validation_scenario
(
self
):
self
.
generate_learning_scenario
()
...
...
@@ -434,12 +430,12 @@ class Follow(ManeuverBase):
def
_set_v_ref
(
self
):
#if self._enable_low_level_training_properties:
self
.
_target_veh_i
,
_
=
self
.
env
.
get_V2V_distance
()
self
.
_target_veh_i
,
_
=
self
.
env
.
get_V2V_distance
()
if
self
.
_target_veh_i
is
not
None
:
self
.
_v_ref
=
self
.
env
.
vehs
[
self
.
_target_veh_i
].
v
else
:
self
.
_v_ref
=
0
if
self
.
_target_veh_i
is
not
None
:
self
.
_v_ref
=
self
.
env
.
vehs
[
self
.
_target_veh_i
].
v
else
:
self
.
_v_ref
=
0
#else:
# self._v_ref = rd.speed_limit
...
...
@@ -448,19 +444,17 @@ class Follow(ManeuverBase):
@
property
def
extra_termination_condition
(
self
):
APs
=
self
.
env
.
ego
.
APs
#
APs = self.env.ego.APs
if
self
.
_target_veh_i
is
None
:
return
False
elif
not
self
.
_enable_low_level_training_properties
:
# activated only for the high-level training.
if
(
APs
[
'in_stop_region'
]
or
APs
[
'before_but_close_to_stop_region'
])
\
and
(
self
.
env
.
vehs
[
self
.
_target_veh_i
].
APs
[
'in_intersection'
]
or
self
.
env
.
vehs
[
self
.
_target_veh_i
].
x
>
0
):
return
True
else
:
return
False
#elif not self._enable_low_level_training_properties: # activated only for the high-level training.
# if (APs['in_stop_region'] or APs['before_but_close_to_stop_region']) \
# and (self.env.vehs[self._target_veh_i].APs['in_intersection'] or
# self.env.vehs[self._target_veh_i].x > 0):
# return True
# else:
return
False
def
_features_dim_reduction
(
self
,
features_tuple
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment