diff --git a/backends/controller_base.py b/backends/controller_base.py index 885f941c680612f6efbbdc94df26963e83bdefaf..634756b5174c0f782764b31942abb1478bf75ce0 100644 --- a/backends/controller_base.py +++ b/backends/controller_base.py @@ -56,8 +56,7 @@ class ControllerBase(PolicyBase): total_reward = 0 self.node_terminal_state_reached = False while not self.node_terminal_state_reached: - observation, reward, terminal, info = self.low_level_step_current_node( - ) + observation, reward, terminal, info = self.low_level_step_current_node() if visualize_low_level_steps: self.env.render() total_reward += reward diff --git a/backends/trained_policies/0.1mil_weights/changelane_weights_actor.h5f b/backends/trained_policies/0.1mil_weights/changelane_weights_actor.h5f deleted file mode 100644 index 739298c51cfa43433ff21b4962cbba17170ee7a9..0000000000000000000000000000000000000000 Binary files a/backends/trained_policies/0.1mil_weights/changelane_weights_actor.h5f and /dev/null differ diff --git a/backends/trained_policies/0.1mil_weights/changelane_weights_critic.h5f b/backends/trained_policies/0.1mil_weights/changelane_weights_critic.h5f deleted file mode 100644 index 681b967fa6dbd6a2aaf46413891ca7f9229efeeb..0000000000000000000000000000000000000000 Binary files a/backends/trained_policies/0.1mil_weights/changelane_weights_critic.h5f and /dev/null differ diff --git a/backends/trained_policies/0.1mil_weights/follow_weights_actor.h5f b/backends/trained_policies/0.1mil_weights/follow_weights_actor.h5f deleted file mode 100644 index fa417a1dc40b3f5e953e687315f960f2b1ad3e92..0000000000000000000000000000000000000000 Binary files a/backends/trained_policies/0.1mil_weights/follow_weights_actor.h5f and /dev/null differ diff --git a/backends/trained_policies/0.1mil_weights/follow_weights_critic.h5f b/backends/trained_policies/0.1mil_weights/follow_weights_critic.h5f deleted file mode 100644 index 0105de348166a4cbaa29998174f5e89e82b3b86e..0000000000000000000000000000000000000000 Binary files a/backends/trained_policies/0.1mil_weights/follow_weights_critic.h5f and /dev/null differ diff --git a/backends/trained_policies/0.1mil_weights/keeplane_weights_actor.h5f b/backends/trained_policies/0.1mil_weights/keeplane_weights_actor.h5f deleted file mode 100644 index 639fdd5b1889a604f6a61b4ad11b18ee9f703ea1..0000000000000000000000000000000000000000 Binary files a/backends/trained_policies/0.1mil_weights/keeplane_weights_actor.h5f and /dev/null differ diff --git a/backends/trained_policies/0.1mil_weights/keeplane_weights_critic.h5f b/backends/trained_policies/0.1mil_weights/keeplane_weights_critic.h5f deleted file mode 100644 index bc791095159a0f5656381a1c2458bb5cab227c4c..0000000000000000000000000000000000000000 Binary files a/backends/trained_policies/0.1mil_weights/keeplane_weights_critic.h5f and /dev/null differ diff --git a/backends/trained_policies/0.1mil_weights/stop_weights_actor.h5f b/backends/trained_policies/0.1mil_weights/stop_weights_actor.h5f deleted file mode 100644 index ee1d2af63e8812fc52a977d1db63342ffb5b0148..0000000000000000000000000000000000000000 Binary files a/backends/trained_policies/0.1mil_weights/stop_weights_actor.h5f and /dev/null differ diff --git a/backends/trained_policies/0.1mil_weights/stop_weights_critic.h5f b/backends/trained_policies/0.1mil_weights/stop_weights_critic.h5f deleted file mode 100644 index ad2e374269254b3fe6151f484ba04b5b9c29af91..0000000000000000000000000000000000000000 Binary files a/backends/trained_policies/0.1mil_weights/stop_weights_critic.h5f and /dev/null differ diff --git a/backends/trained_policies/0.1mil_weights/wait_weights_actor.h5f b/backends/trained_policies/0.1mil_weights/wait_weights_actor.h5f deleted file mode 100644 index a861c76ca25171a7644cb2fe3f75772fe7c5d6ae..0000000000000000000000000000000000000000 Binary files a/backends/trained_policies/0.1mil_weights/wait_weights_actor.h5f and /dev/null differ diff --git a/backends/trained_policies/0.1mil_weights/wait_weights_critic.h5f b/backends/trained_policies/0.1mil_weights/wait_weights_critic.h5f deleted file mode 100644 index b20d0e3209408f46f31f2c46e6b6b51ce23b2b65..0000000000000000000000000000000000000000 Binary files a/backends/trained_policies/0.1mil_weights/wait_weights_critic.h5f and /dev/null differ diff --git a/backends/trained_policies/changelane/changelane_weights_actor.h5f b/backends/trained_policies/changelane/changelane_weights_actor.h5f index 0a7ae9e96102f399e64dc83c4cc77e9e3b7a44cd..3f6e76aa921b07e851ab4eac8045e74c0c28e848 100644 Binary files a/backends/trained_policies/changelane/changelane_weights_actor.h5f and b/backends/trained_policies/changelane/changelane_weights_actor.h5f differ diff --git a/backends/trained_policies/changelane/changelane_weights_critic.h5f b/backends/trained_policies/changelane/changelane_weights_critic.h5f index 40ca3a83088d8984002bc0a4546020174e9cd431..b53dfd8f69aa96544b986a2de8c506b20162358f 100644 Binary files a/backends/trained_policies/changelane/changelane_weights_critic.h5f and b/backends/trained_policies/changelane/changelane_weights_critic.h5f differ diff --git a/backends/trained_policies/follow/follow_weights_actor.h5f b/backends/trained_policies/follow/follow_weights_actor.h5f index bcb0af7c5012216d05eb966550600c28a3bc322c..0f4bf96684f16bf8fc8db62dd56a301f5a96b62f 100644 Binary files a/backends/trained_policies/follow/follow_weights_actor.h5f and b/backends/trained_policies/follow/follow_weights_actor.h5f differ diff --git a/backends/trained_policies/follow/follow_weights_critic.h5f b/backends/trained_policies/follow/follow_weights_critic.h5f index f6258b53c8589db6a04382245c31f20d7f0915dc..2f51be39f37e0956887e930d969caf9ea421e0c2 100644 Binary files a/backends/trained_policies/follow/follow_weights_critic.h5f and b/backends/trained_policies/follow/follow_weights_critic.h5f differ diff --git a/backends/trained_policies/keeplane/keeplane_weights_actor.h5f b/backends/trained_policies/keeplane/keeplane_weights_actor.h5f index d9224714ec7717274033cb3f778800e6cacb690f..8efaf8b74e3fc805bbb373c3e24d3584df7ccf0c 100644 Binary files a/backends/trained_policies/keeplane/keeplane_weights_actor.h5f and b/backends/trained_policies/keeplane/keeplane_weights_actor.h5f differ diff --git a/backends/trained_policies/keeplane/keeplane_weights_critic.h5f b/backends/trained_policies/keeplane/keeplane_weights_critic.h5f index e5b4bf77af956cdae29a45518d768738fe647b6c..d7a70181897d2632e8c43446ee15c6dde69f294c 100644 Binary files a/backends/trained_policies/keeplane/keeplane_weights_critic.h5f and b/backends/trained_policies/keeplane/keeplane_weights_critic.h5f differ diff --git a/backends/trained_policies/stop/stop_weights_actor.h5f b/backends/trained_policies/stop/stop_weights_actor.h5f index 7ef9b12a24464c9c9c4d0ca0bd625b6d1875efb9..1f89ca2104f5801eb243b0898c3f050f91761f77 100644 Binary files a/backends/trained_policies/stop/stop_weights_actor.h5f and b/backends/trained_policies/stop/stop_weights_actor.h5f differ diff --git a/backends/trained_policies/stop/stop_weights_critic.h5f b/backends/trained_policies/stop/stop_weights_critic.h5f index 41a357a5e68aac591dea24edb47f5339960c0c90..12d7d202761bfafba4f6e9d564c154926bbc72a8 100644 Binary files a/backends/trained_policies/stop/stop_weights_critic.h5f and b/backends/trained_policies/stop/stop_weights_critic.h5f differ diff --git a/options/simple_intersection/maneuver_base.py b/options/simple_intersection/maneuver_base.py index 78f7a63a67dd19ce93ea0f08e5ae0c9749616f3f..1d9b45cfe36b0efce5f508e722b36bd89ea0f974 100644 --- a/options/simple_intersection/maneuver_base.py +++ b/options/simple_intersection/maneuver_base.py @@ -29,7 +29,7 @@ class ManeuverBase(EpisodicEnvBase): # _extra_action_weights_flag = True); note that a cost is defined # as a negative reward, so a cost will be summed up to the reward # with subtraction. - _cost_weights = (1.0 * 1e-3, 1.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3, + _cost_weights = (10.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3, 100.0 * 1e-3, 0.1 * 1e-3, 0.25 * 1e-3, 0.1 * 1e-3) _extra_r_terminal = None @@ -292,9 +292,8 @@ class ManeuverBase(EpisodicEnvBase): raise NotImplemented(self.__class__.__name__ + ".generate_learning_scenario is not implemented.") - def generate_validation_scenario( - self - ): # Override this method in the subclass if some customization is needed. + # Override this method in the subclass if some customization is needed. + def generate_validation_scenario(self): self.generate_learning_scenario() self._enable_low_level_training_properties = False diff --git a/options/simple_intersection/maneuvers.py b/options/simple_intersection/maneuvers.py index 7b6ccf4ac243ff0be013a8164a3263a45ae895d1..748fb0ea0c1dd0743f00e1b683986e7186a31e2a 100644 --- a/options/simple_intersection/maneuvers.py +++ b/options/simple_intersection/maneuvers.py @@ -16,30 +16,51 @@ class KeepLane(ManeuverBase): def _init_LTL_preconditions(self): self._LTL_preconditions.append(LTLProperty("G ( not veh_ahead )", 0)) - self._LTL_preconditions.append( - LTLProperty("G ( not stopped_now )", 200, - self._enable_low_level_training_properties)) + #self._LTL_preconditions.append( + # LTLProperty("G ( not stopped_now )", 100, + # self._enable_low_level_training_properties)) self._LTL_preconditions.append( LTLProperty( "G ( (lane and target_lane) or (not lane and not target_lane) )", - 200, self._enable_low_level_training_properties)) + 100, self._enable_low_level_training_properties)) def generate_learning_scenario(self): self.generate_scenario( enable_LTL_preconditions=False, - ego_pos_range=(rd.hlanes.start_pos, rd.hlanes.end_pos), + ego_pos_range=(rd.intersection_width_w_offset, rd.hlanes.end_pos), ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6), + v_max_multiplier=0.75, ego_heading_towards_lane_centre=True) # the goal reward and termination is led by the SimpleIntersectionEnv - self.env._terminate_in_goal = True - self.env._reward_in_goal = 200 + self.env._terminate_in_goal = False + self.env._reward_in_goal = None self._enable_low_level_training_properties = True + def generate_validation_scenario(self): + self.generate_scenario( + enable_LTL_preconditions=False, + ego_pos_range=(rd.hlanes.start_pos, 0), + ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6), + ego_heading_towards_lane_centre=True) + # the goal reward and termination is led by the SimpleIntersectionEnv + self.env._terminate_in_goal = False + self.env._reward_in_goal = None + @staticmethod def _features_dim_reduction(features_tuple): - return extract_ego_features(features_tuple, 'pos_near_stop_region', - 'v', 'v_ref', 'e_y', 'psi', 'theta', 'acc', - 'psi_dot') + return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot') + + @property + def extra_termination_condition(self): + if self._enable_low_level_training_properties: # activated only for the low-level training. + if (self.env.ego.v < self._v_ref / 5) and self.env.ego.acc < 0: + self._extra_r_terminal = -100 + return True + else: + self._extra_r_terminal = None + return False + + return False class Stop(ManeuverBase): @@ -47,28 +68,30 @@ class Stop(ManeuverBase): _terminate_in_goal = True _reward_in_goal = None + _penalty_in_violation = None + def _init_param(self): self._set_v_ref() self._target_lane = self.env.ego.APs['lane'] def _init_LTL_preconditions(self): - self._LTL_preconditions.append( - LTLProperty("G ( not has_stopped_in_stop_region )", - self._penalty(self._reward_in_goal))) + #self._LTL_preconditions.append( + # LTLProperty("G ( not has_stopped_in_stop_region )", + # self._penalty(self._reward_in_goal))) self._LTL_preconditions.append( LTLProperty( "G ( (before_but_close_to_stop_region or in_stop_region) U has_stopped_in_stop_region )", - 0)) + self._penalty_in_violation)) self._LTL_preconditions.append( - LTLProperty("G ( not stopped_now U in_stop_region )", 200, + LTLProperty("G ( not stopped_now U in_stop_region )", 100, self._enable_low_level_training_properties)) self._LTL_preconditions.append( LTLProperty( "G ( (lane and target_lane) or (not lane and not target_lane) )", - 200, self._enable_low_level_training_properties)) + 100, self._enable_low_level_training_properties)) def _update_param(self): self._set_v_ref() @@ -89,8 +112,10 @@ class Stop(ManeuverBase): ego_pos_range=(rd.hlanes.near_stop_region, -rd.intersection_width_w_offset / 2), ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6), + v_max_multiplier=0.75, ego_heading_towards_lane_centre=True) self._reward_in_goal = 200 + self._penalty_in_violation = 150 self._enable_low_level_training_properties = True def _low_level_manual_policy(self): @@ -99,8 +124,32 @@ class Stop(ManeuverBase): @staticmethod def _features_dim_reduction(features_tuple): return extract_ego_features(features_tuple, 'pos_near_stop_region', - 'v', 'v_ref', 'e_y', 'psi', 'theta', 'acc', - 'psi_dot', 'not_in_stop_region') + 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', + 'psi_dot', 'pos_stop_region', 'not_in_stop_region') + + @property + def extra_termination_condition(self): + if self.env.ego.APs['has_stopped_in_stop_region']: + if self._reward_in_goal is not None: + self._extra_r_terminal = self._reward_in_goal + self._extra_r_terminal *= np.exp(- pow(self.env.ego.theta, 2) + - pow(self.env.ego.y - rd.hlanes.centres[self._target_lane], 2) + - 0.25*pow(self.env.ego.psi, 2)) + else: + self._extra_r_terminal = None + return True + + elif self._enable_low_level_training_properties: # activated only for the low-level training. + if (rd.speed_limit / 5 < self._v_ref) and \ + (self.env.ego.v < self._v_ref / 2) and self.env.ego.acc < 0: + self._extra_r_terminal = -100 + return True + + else: + self._extra_r_terminal = None + return False + + return False class Wait(ManeuverBase): @@ -111,32 +160,38 @@ class Wait(ManeuverBase): def _init_LTL_preconditions(self): self._LTL_preconditions.append( LTLProperty( - "G ( (in_stop_region and stopped_now) U highest_priority )", - 0)) + "G ( (in_stop_region and stopped_now) U (highest_priority and intersection_is_clear))", 0, + not self._enable_low_level_training_properties)) # not available in low-level training... self._LTL_preconditions.append( LTLProperty("G ( not (in_intersection and highest_priority) )", self._penalty(self._reward_in_goal))) + self._LTL_preconditions.append( + LTLProperty( + "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150, self._enable_low_level_training_properties)) + + self._LTL_preconditions.append( + LTLProperty( + "G ( (lane and target_lane) or (not lane and not target_lane) )", + 100, self._enable_low_level_training_properties)) + def _init_param(self): - ego = self.env.ego - self._v_ref = rd.speed_limit if self.env.ego.APs[ - 'highest_priority'] else 0 - self._target_lane = ego.APs['lane'] - self._ego_stop_count = 0 + self._update_param() + self._target_lane = self.env.ego.APs['lane'] def _update_param(self): - if self.env.ego.APs['highest_priority']: - self._v_ref = rd.speed_limit - if self._enable_low_level_training_properties: - if self.env.n_others_with_higher_priority == 0: - self._ego_stop_count += 1 + if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear']: + self._v_ref = rd.speed_limit / 5 + else: + self._v_ref = 0 def generate_learning_scenario(self): - n_others = np.random.randint(0, 3) + + n_others = 0 if np.random.rand() <= 0 else np.random.randint(1, 4) + self.generate_scenario( enable_LTL_preconditions=True, - timeout=62, n_others_range=(n_others, n_others), ego_pos_range=rd.hlanes.stop_region, n_others_stopped_in_stop_region=n_others, @@ -145,32 +200,46 @@ class Wait(ManeuverBase): ego_heading_towards_lane_centre=True) max_waited_count = 0 + min_waited_count = 1 for veh in self.env.vehs[1:]: max_waited_count = max(max_waited_count, veh.waited_count) + min_waited_count = min(min_waited_count, veh.waited_count) + + min_waited_count = min(min_waited_count, max_waited_count) self._extra_action_weights_flag = False - self.env.ego.waited_count = np.random.randint(0, max_waited_count + 21) + + if np.random.rand() <= 0.2: + self.env.ego.waited_count = np.random.randint(0, min_waited_count+1) + else: + self.env.ego.waited_count = np.random.randint(min_waited_count, max_waited_count + 21) + self.env.init_APs(False) self._reward_in_goal = 200 - self._extra_r_on_timeout = -200 self._enable_low_level_training_properties = True - self._ego_stop_count = 0 + + def generate_validation_scenario(self): + super().generate_validation_scenario() + #self._enable_low_level_training_properties = True @property def extra_termination_condition(self): if self._enable_low_level_training_properties: # activated only for the low-level training. - if self._ego_stop_count >= 50: - self._extra_r_terminal = -200 + if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear'] \ + and np.random.rand() <= 0.05 and self.env.ego.v <= self._v_ref / 10: + self._extra_r_terminal = - 100 return True else: self._extra_r_terminal = None return False + return False + @staticmethod def _features_dim_reduction(features_tuple): return extract_ego_features( - features_tuple, 'v', 'v_ref', 'psi', 'theta', 'acc', 'psi_dot', + features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot', 'pos_stop_region', 'intersection_is_clear', 'highest_priority') @@ -195,10 +264,10 @@ class ChangeLane(ManeuverBase): LTLProperty("G ( on_route and not over_speed_limit )", self._violation_penalty_in_low_level_training, self._enable_low_level_training_properties)) + self._LTL_preconditions.append( LTLProperty("G ( not stopped_now )", - self._violation_penalty_in_low_level_training, - self._enable_low_level_training_properties)) + 100, self._enable_low_level_training_properties)) @property def goal_achieved(self): @@ -217,12 +286,25 @@ class ChangeLane(ManeuverBase): self.generate_scenario( enable_LTL_preconditions=False, timeout=15, - ego_pos_range=(rd.hlanes.start_pos, rd.hlanes.end_pos), + ego_pos_range=(rd.intersection_width_w_offset, rd.hlanes.end_pos), ego_lane=np.random.choice([0, 1]), - ego_perturb_lim=(rd.hlanes.width / 5, np.pi / 6)) + ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6), + v_max_multiplier=0.75) + # print('our range was %s, %s, ego at %s' % (before_intersection, after_intersection, self.env.ego.x)) self._reward_in_goal = 200 - self._violation_penalty_in_low_level_training = 200 + self._violation_penalty_in_low_level_training = 150 + self._enable_low_level_training_properties = True + + def generate_validation_scenario(self): + self.generate_scenario( + enable_LTL_preconditions=False, + ego_pos_range=(rd.hlanes.start_pos, rd.hlanes.end_pos), + ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6)) + + # print('our range was %s, %s, ego at %s' % (before_intersection, after_intersection, self.env.ego.x)) + self._reward_in_goal = 200 + self._violation_penalty_in_low_level_training = 150 self._enable_low_level_training_properties = True # TODO: It is not a good idea to specify features by numbers, as the list @@ -237,49 +319,77 @@ class ChangeLane(ManeuverBase): class Follow(ManeuverBase): _target_veh_i = None - _penalty_for_out_of_follow_range = None + _penalty_for_out_of_range = None + _penalty_for_change_lane = None def _init_LTL_preconditions(self): self._LTL_preconditions.append( - LTLProperty("G ( veh_ahead )", - self._penalty_for_out_of_follow_range)) + LTLProperty("G ( veh_ahead )", self._penalty_for_out_of_range)) self._LTL_preconditions.append( LTLProperty( "G ( (lane and target_lane) or (not lane and not target_lane) )", - self._penalty_for_out_of_follow_range)) + self._penalty_for_change_lane)) + + #self._LTL_preconditions.append( + # LTLProperty("G ( not stopped_now U veh_ahead_stopped_now)", 200, + # self._enable_low_level_training_properties)) self._LTL_preconditions.append( - LTLProperty("G ( not stopped_now U veh_ahead_stopped_now)", 200, + LTLProperty("G ( not veh_ahead_too_close )", self._penalty_for_out_of_range, self._enable_low_level_training_properties)) self._LTL_preconditions.append( - LTLProperty("G ( not veh_ahead_too_close )", 200, - self._enable_low_level_training_properties)) + LTLProperty("G ( not in_stop_region)", 0, not self._enable_low_level_training_properties)) def generate_learning_scenario(self): self.generate_scenario( enable_LTL_preconditions=False, n_others_range=(1, 1), - ego_perturb_lim=(rd.hlanes.width / 2, np.pi / 4), + v_max_multiplier=0.75, + ego_perturb_lim=(0, 0), veh_ahead_scenario=True) self.env._terminate_in_goal = False - self._penalty_for_out_of_follow_range = 200 + self._penalty_for_out_of_range = 200 + self._penalty_for_change_lane = 170 self._enable_low_level_training_properties = True + def generate_validation_scenario(self): + self.generate_learning_scenario() + def _update_param(self): self._target_veh_i, _ = self.env.get_V2V_distance() + if self._target_veh_i is not None: + self._v_ref = self.env.vehs[self._target_veh_i].v + def _low_level_manual_policy(self): return self.env.aggressive_driving_policy(EGO_INDEX) + @property + def extra_termination_condition(self): + APs = self.env.ego.APs + + if self._target_veh_i is None: + return False + + elif not self._enable_low_level_training_properties: # activated only for the high-level training. + if (APs['in_stop_region'] or APs['before_but_close_to_stop_region']) \ + and (self.env.vehs[self._target_veh_i].APs['in_intersection'] or + self.env.vehs[self._target_veh_i].x > 0): + return True + else: + return False + + return False + def _features_dim_reduction(self, features_tuple): ego_features = extract_ego_features( - features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', - 'theta', 'lane', 'e_y,lane', 'acc', 'psi_dot') + features_tuple, 'pos_near_stop_region', 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', + 'theta', 'lane', 'acc', 'psi_dot') + if self._target_veh_i is not None: return ego_features + extract_other_veh_features( - features_tuple, self._target_veh_i, 'rel_x', 'rel_y', 'v', - 'acc') + features_tuple, self._target_veh_i, 'rel_x', 'rel_y', 'v', 'acc') else: return ego_features + (0.0, 0.0, 0.0, 0.0)