diff --git a/backends/controller_base.py b/backends/controller_base.py index 015a12c8768269003eb6d620b0b0c22d0930dba1..4ef9b55ee179ecc5dbd0c83c940fe72c41b0e0d5 100644 --- a/backends/controller_base.py +++ b/backends/controller_base.py @@ -52,6 +52,8 @@ class ControllerBase(PolicyBase): Returns state at end of node execution, total reward, epsiode_termination_flag, info ''' # TODO: this is never called when you TEST high-level policy (w/o MCTS) rather than train... + # (make some integrated interface btw testing and training and b.t.w. the high- and low-level + # methods with and without MCTS. def step_current_node(self, visualize_low_level_steps=False): total_reward = 0 self.node_terminal_state_reached = False diff --git a/options/simple_intersection/maneuvers.py b/options/simple_intersection/maneuvers.py index cda94f8113ea94b49c5c1f549172161f645071e8..3a99a45378b8d00c9eb4778404300f748a0ef6e9 100644 --- a/options/simple_intersection/maneuvers.py +++ b/options/simple_intersection/maneuvers.py @@ -52,6 +52,16 @@ class KeepLane(ManeuverBase): def _features_dim_reduction(features_tuple): return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot') + @property + def initiation_condition(self): + """a virtual function (property) from ManeuverBase. + As KeepLane is a default maneuver, it has to be activated to be chosen at any time, state, and condition + (refer to initiation_condition of ManeuverBase for the usual case. + :returns True. + """ + + return True + @property def extra_termination_condition(self): if self._enable_low_level_training_properties: # activated only for the low-level training. @@ -383,6 +393,7 @@ class ChangeLane(ManeuverBase): 'psi_dot') +# TODO: In the low-level training of Follow, sometime the initial state doesn't satisfy its initiation condition. class Follow(ManeuverBase): _reward_in_goal = None @@ -422,7 +433,6 @@ class Follow(ManeuverBase): self.env._terminate_in_goal = False self._penalty_for_out_of_range = 200 self._penalty_for_change_lane = 200 - self._reward_in_goal = 200 self._enable_low_level_training_properties = True self._extra_action_weights_flag = True @@ -436,7 +446,6 @@ class Follow(ManeuverBase): self._set_v_ref() def _set_v_ref(self): - #if self._enable_low_level_training_properties: self._target_veh_i, _ = self.env.get_V2V_distance() if self._target_veh_i is not None: @@ -460,23 +469,18 @@ class Follow(ManeuverBase): @property def extra_termination_condition(self): - # APs = self.env.ego.APs - if self._target_veh_i is None: return False if self._enable_low_level_training_properties: # activated only for the low-level training. - if self.env.ego.APs['has_stopped_in_stop_region'] and \ - self.env.ego.APs['in_stop_region']: - self._extra_r_terminal = None - return True - - elif (rd.speed_limit / 5 < self._v_ref) and \ + APs = self.env.ego.APs + if (rd.speed_limit / 5 < self._v_ref) and \ (self.env.ego.v < self._v_ref / 2) and \ self.env.ego.acc < 0 and \ - not self.env.ego.APs['veh_ahead_stopped_now']: - self._extra_r_terminal = -100 - return True + not APs['veh_ahead_stopped_now'] and \ + not APs['in_stop_region']: + self._extra_r_terminal = -100 + return True else: self._extra_r_terminal = None @@ -493,4 +497,4 @@ class Follow(ManeuverBase): return ego_features + extract_other_veh_features( features_tuple, self._target_veh_i, 'rel_x', 'rel_y', 'v', 'acc') else: - return ego_features + (0.0, 0.0, 0.0, 0.0) \ No newline at end of file + return ego_features + (0.0, 0.0, 0.0, 0.0)