diff --git a/backends/controller_base.py b/backends/controller_base.py index ad3185664f1eab6d506a84117af20de75cbad57c..015a12c8768269003eb6d620b0b0c22d0930dba1 100644 --- a/backends/controller_base.py +++ b/backends/controller_base.py @@ -51,7 +51,7 @@ class ControllerBase(PolicyBase): Returns state at end of node execution, total reward, epsiode_termination_flag, info ''' - # TODO: this is never called when you test high-level policy rather than train... + # TODO: this is never called when you TEST high-level policy (w/o MCTS) rather than train... def step_current_node(self, visualize_low_level_steps=False): total_reward = 0 self.node_terminal_state_reached = False diff --git a/backends/trained_policies/follow/follow_weights_actor.h5f b/backends/trained_policies/follow/follow_weights_actor.h5f index 0f4bf96684f16bf8fc8db62dd56a301f5a96b62f..2ac985cb51323be7472bb64862d167335aa36e9d 100644 Binary files a/backends/trained_policies/follow/follow_weights_actor.h5f and b/backends/trained_policies/follow/follow_weights_actor.h5f differ diff --git a/backends/trained_policies/follow/follow_weights_critic.h5f b/backends/trained_policies/follow/follow_weights_critic.h5f index 2f51be39f37e0956887e930d969caf9ea421e0c2..4b713c8bc9cc34336c7cbb9dc8948f29a089eb1c 100644 Binary files a/backends/trained_policies/follow/follow_weights_critic.h5f and b/backends/trained_policies/follow/follow_weights_critic.h5f differ diff --git a/backends/trained_policies/highlevel/highlevel_weights.h5f b/backends/trained_policies/highlevel/highlevel_weights.h5f index b3c5347197f637d893b80c7258a6504d4368c59b..758edd04dfb997f8bc019986abdc55c291294497 100644 Binary files a/backends/trained_policies/highlevel/highlevel_weights.h5f and b/backends/trained_policies/highlevel/highlevel_weights.h5f differ diff --git a/env/simple_intersection/simple_intersection_env.py b/env/simple_intersection/simple_intersection_env.py index a2faa92c4b66e6d7d29e55c0b09b255903f3a863..62a85ce711b980f3e968ecb6fc75514c5c7cde04 100644 --- a/env/simple_intersection/simple_intersection_env.py +++ b/env/simple_intersection/simple_intersection_env.py @@ -273,7 +273,9 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase): # stopped_car_scenario = bool(np.random.randint(0, 1)) TODO: this scenario may not work n_others_stopped_in_stop_region = np.random.randint( 0, min(3, n_others - stopped_car_scenario)) - veh_ahead_scenario = bool(np.random.randint(0, 1)) or veh_ahead_scenario + veh_ahead_scenario = bool(np.random.randint(0, 2)) or veh_ahead_scenario + # Activate the comment below if you want to generate more veh_ahead_scenario + # veh_ahead_scenario = np.random.rand() <= 0.7 or veh_ahead_scenario if n_others_stopped_in_stop_region > min( n_others - stopped_car_scenario, 3): diff --git a/options/simple_intersection/maneuvers.py b/options/simple_intersection/maneuvers.py index 67c8318d6c57801566d7693a60ceb2212dcbe64e..b6625b21ff53d3e13456b30ca2421a98cec458f2 100644 --- a/options/simple_intersection/maneuvers.py +++ b/options/simple_intersection/maneuvers.py @@ -152,7 +152,6 @@ class Stop(ManeuverBase): LTLProperty("G ( not has_stopped_in_stop_region )", self._penalty(self._reward_in_goal), not self._enable_low_level_training_properties)) - # before_intersection rather than "before_but_close_to_stop_region or in_stop_region"? self._LTL_preconditions.append( LTLProperty( "G ( (before_but_close_to_stop_region or in_stop_region) U has_stopped_in_stop_region )", @@ -385,22 +384,29 @@ class ChangeLane(ManeuverBase): class Follow(ManeuverBase): + _reward_in_goal = None + _target_veh_i = None _penalty_for_out_of_range = None _penalty_for_change_lane = None def _init_LTL_preconditions(self): self._LTL_preconditions.append( - LTLProperty("G ( veh_ahead )", self._penalty_for_out_of_range)) + LTLProperty("G ( veh_ahead U (in_stop_region or before_but_close_to_stop_region ) )", self._penalty_for_out_of_range)) self._LTL_preconditions.append( LTLProperty( "G ( (lane and target_lane) or (not lane and not target_lane) )", self._penalty_for_change_lane)) - # self._LTL_preconditions.append( - # LTLProperty("G ( not stopped_now U veh_ahead_stopped_now)", 200, - # self._enable_low_level_training_properties)) + self._LTL_preconditions.append( + LTLProperty( + "G ( not (has_stopped_in_stop_region and in_stop_region ) )", + None)) + + #self._LTL_preconditions.append( + # LTLProperty("G ( not stopped_now U (veh_ahead_stopped_now or (in_stop_region and stopped_now )) )", 200, + # self._enable_low_level_training_properties)) self._LTL_preconditions.append( LTLProperty("G ( not veh_ahead_too_close )", self._penalty_for_out_of_range, @@ -408,14 +414,14 @@ class Follow(ManeuverBase): def generate_learning_scenario(self): self.generate_scenario( - enable_LTL_preconditions=False, - n_others_range=(1, 1), + n_others_range=(1, 6), v_max_multiplier=0.75, ego_perturb_lim=(0, 0), veh_ahead_scenario=True) self.env._terminate_in_goal = False self._penalty_for_out_of_range = 200 - self._penalty_for_change_lane = 170 + self._penalty_for_change_lane = 200 + self._reward_in_goal = 200 self._enable_low_level_training_properties = True self._extra_action_weights_flag = True @@ -433,11 +439,20 @@ class Follow(ManeuverBase): self._target_veh_i, _ = self.env.get_V2V_distance() if self._target_veh_i is not None: - self._v_ref = self.env.vehs[self._target_veh_i].v + if self.env.ego.APs['before_intersection'] and \ + not self.env.vehs[self._target_veh_i].APs['before_intersection']: + x = self.env.ego.x + if rd.hlanes.near_stop_region >= x: + self._v_ref = rd.speed_limit + elif rd.hlanes.near_stop_region < x <= rd.hlanes.stop_region_centre: + self._v_ref = -(rd.speed_limit / abs(rd.hlanes.near_stop_region)) * ( + x - rd.hlanes.stop_region_centre) + elif x > rd.hlanes.stop_region_centre: + self._v_ref = 0 + else: + self._v_ref = self.env.vehs[self._target_veh_i].v else: self._v_ref = 0 - #else: - # self._v_ref = rd.speed_limit def _low_level_manual_policy(self): return self.env.aggressive_driving_policy(EGO_INDEX) @@ -449,18 +464,29 @@ class Follow(ManeuverBase): if self._target_veh_i is None: return False - #elif not self._enable_low_level_training_properties: # activated only for the high-level training. - # if (APs['in_stop_region'] or APs['before_but_close_to_stop_region']) \ - # and (self.env.vehs[self._target_veh_i].APs['in_intersection'] or - # self.env.vehs[self._target_veh_i].x > 0): - # return True - # else: + if self._enable_low_level_training_properties: # activated only for the low-level training. + if self.env.ego.APs['has_stopped_in_stop_region'] and \ + self.env.ego.APs['in_stop_region']: + self._extra_r_terminal = None + return True + + elif (rd.speed_limit / 5 < self._v_ref) and \ + (self.env.ego.v < self._v_ref / 2) and \ + self.env.ego.acc < 0 and \ + not self.env.ego.APs['veh_ahead_stopped_now']: + self._extra_r_terminal = -100 + return True + + else: + self._extra_r_terminal = None + return False + return False def _features_dim_reduction(self, features_tuple): ego_features = extract_ego_features( features_tuple, 'pos_near_stop_region', 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', - 'theta', 'lane', 'acc', 'psi_dot') + 'theta', 'lane', 'acc', 'psi_dot', 'pos_stop_region', 'not_in_stop_region') if self._target_veh_i is not None: return ego_features + extract_other_veh_features(