From f443e3820c74aa4661f8a1f83db5c24efc216452 Mon Sep 17 00:00:00 2001 From: Jaeyoung Lee <jaeyoung.lee@uwaterloo.ca> Date: Wed, 6 Feb 2019 13:46:44 -0500 Subject: [PATCH] Wait (and the others slightly) improved. --- options/simple_intersection/maneuvers.py | 43 +++++++++++++----------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/options/simple_intersection/maneuvers.py b/options/simple_intersection/maneuvers.py index e995a19..2cf2e7f 100644 --- a/options/simple_intersection/maneuvers.py +++ b/options/simple_intersection/maneuvers.py @@ -172,7 +172,7 @@ class Stop(ManeuverBase): self._penalty_in_violation)) self._LTL_preconditions.append( - LTLProperty("G ( not stopped_now U in_stop_region )", 100, + LTLProperty("G ( not stopped_now U in_stop_region )", 200, self._enable_low_level_training_properties)) self._LTL_preconditions.append( @@ -228,7 +228,7 @@ class Stop(ManeuverBase): elif (rd.speed_limit / 5 < self._v_ref) and \ (self.env.ego.v < self._v_ref / 2) and self.env.ego.acc < 0: - self._extra_r_terminal = -100 + self._extra_r_terminal = -200 return True else: @@ -241,13 +241,13 @@ class Stop(ManeuverBase): class Wait(ManeuverBase): _reward_in_goal = None - _terminate_in_goal = True + _terminate_in_goal = False def _init_LTL_preconditions(self): - self._LTL_preconditions.append( - LTLProperty("G ( (in_stop_region and stopped_now) U (highest_priority and intersection_is_clear))", - None, not self._enable_low_level_training_properties)) # not available in low-level training... + LTLProperty( + "G ( (in_stop_region and has_stopped_in_stop_region) U (highest_priority and intersection_is_clear))", + None, not self._enable_low_level_training_properties)) # not available in low-level training... self._LTL_preconditions.append( LTLProperty("G ( not (in_intersection and highest_priority and intersection_is_clear) )", @@ -255,13 +255,13 @@ class Wait(ManeuverBase): self._LTL_preconditions.append( LTLProperty( - "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150, + "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 200, self._enable_low_level_training_properties)) - self._LTL_preconditions.append( - LTLProperty( - "G ( (lane and target_lane) or (not lane and not target_lane) )", - 150, self._enable_low_level_training_properties)) + #self._LTL_preconditions.append( + # LTLProperty( + # "G ( (lane and target_lane) or (not lane and not target_lane) )", + # 150, self._enable_low_level_training_properties)) def _init_param(self): self._update_param() @@ -270,8 +270,10 @@ class Wait(ManeuverBase): def _update_param(self): if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear']: self._v_ref = rd.speed_limit + self._extra_action_weights_flag = True else: self._v_ref = 0 + self._extra_action_weights_flag = False def generate_learning_scenario(self): n_others = 0 if np.random.rand() <= 0 else np.random.randint(1, 4) @@ -298,17 +300,20 @@ class Wait(ManeuverBase): self.env.ego.waited_count = np.random.randint(min_waited_count, max_waited_count + 21) self.env.init_APs(False) + self.env._terminate_in_goal = False self._reward_in_goal = 200 self._enable_low_level_training_properties = True - self._extra_action_weights_flag = True + self._extra_action_weights_flag = False @property def extra_termination_condition(self): if self._enable_low_level_training_properties: # activated only for the low-level training. - if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear'] \ - and np.random.rand() <= 0.1 and self.env.ego.v <= self._v_ref / 10 \ - and self.env.ego.acc < 0: - self._extra_r_terminal = - 100 + if self.env.ego.APs['highest_priority'] and \ + self.env.ego.APs['intersection_is_clear'] and \ + np.random.rand() <= 0.25 and \ + self.env.ego.v <= self._v_ref / 10 and \ + self.env.ego.acc < 0: + self._extra_r_terminal = -200 return True else: self._extra_r_terminal = None @@ -449,6 +454,7 @@ class ChangeLane(ManeuverBase): self._violation_penalty_in_low_level_training = 150 self._enable_low_level_training_properties = True self._extra_action_weights_flag = True + self.env._terminate_in_goal = False def generate_validation_scenario(self): self.generate_scenario( @@ -460,6 +466,8 @@ class ChangeLane(ManeuverBase): self._reward_in_goal = 200 self._violation_penalty_in_low_level_training = 150 self._enable_low_level_training_properties = True + self.env._terminate_in_goal = False + @staticmethod def _features_dim_reduction(features_tuple): @@ -511,9 +519,6 @@ class Follow(ManeuverBase): self._enable_low_level_training_properties = True self._extra_action_weights_flag = True - def generate_validation_scenario(self): - self.generate_learning_scenario() - def _init_param(self): self._set_v_ref() -- GitLab