diff --git a/backends/controller_base.py b/backends/controller_base.py index 634756b5174c0f782764b31942abb1478bf75ce0..16170366e002e44d714c67331e4ef6093f6fbe97 100644 --- a/backends/controller_base.py +++ b/backends/controller_base.py @@ -51,7 +51,7 @@ class ControllerBase(PolicyBase): Returns state at end of node execution, total reward, epsiode_termination_flag, info ''' - + # TODO: this is never called when you test high-level policy rather than train... def step_current_node(self, visualize_low_level_steps=False): total_reward = 0 self.node_terminal_state_reached = False @@ -59,6 +59,7 @@ class ControllerBase(PolicyBase): observation, reward, terminal, info = self.low_level_step_current_node() if visualize_low_level_steps: self.env.render() + # TODO: make the total_reward discounted.... total_reward += reward total_reward += self.current_node.high_level_extra_reward diff --git a/backends/kerasrl_learner.py b/backends/kerasrl_learner.py index a98dc928bec3d63e0a3975f85144e98671710b95..a24c4daa9bd45c548e3278597547ef768db6698a 100644 --- a/backends/kerasrl_learner.py +++ b/backends/kerasrl_learner.py @@ -269,9 +269,12 @@ class DQNLearner(LearnerBase): """ model = Sequential() model.add(Flatten(input_shape=(1, ) + self.input_shape)) - model.add(Dense(32)) + #model.add(Dense(64)) + model.add(Dense(64)) model.add(Activation('relu')) - model.add(Dense(32)) + model.add(Dense(64)) + model.add(Activation('relu')) + model.add(Dense(64)) model.add(Activation('relu')) model.add(Dense(self.nb_actions)) model.add(Activation('linear')) @@ -435,6 +438,7 @@ class DQNAgentOverOptions(DQNAgent): initiation conditions.""" invalid_node_indices = list() for index, option_alias in enumerate(self.low_level_policy_aliases): + # TODO: Locate reset_maneuver to another place as this is a "get" function. self.low_level_policies[option_alias].reset_maneuver() if not self.low_level_policies[option_alias].initiation_condition: invalid_node_indices.append(index) diff --git a/backends/manual_policy.py b/backends/manual_policy.py index 6e59db13b580d3762507f6fe286431cbb7b02ceb..3876b1d97dd2754309689d9b229d6fd6cb7ab7a6 100644 --- a/backends/manual_policy.py +++ b/backends/manual_policy.py @@ -30,6 +30,7 @@ class ManualPolicy(ControllerBase): new_node = None if self.low_level_policies[self.current_node].termination_condition: for next_node in self.adj[self.current_node]: + self.low_level_policies[next_node].reset_maneuver() if self.low_level_policies[next_node].initiation_condition: new_node = next_node break # change current_node to the highest priority next node diff --git a/backends/trained_policies/highlevel/highlevel_weights.h5f b/backends/trained_policies/highlevel/highlevel_weights.h5f index 6dcc7ba4718d82e329f833ff23a8c68595309c61..b3c5347197f637d893b80c7258a6504d4368c59b 100644 Binary files a/backends/trained_policies/highlevel/highlevel_weights.h5f and b/backends/trained_policies/highlevel/highlevel_weights.h5f differ diff --git a/backends/trained_policies/highlevel/highlevel_weights_772.h5f b/backends/trained_policies/highlevel/highlevel_weights_772.h5f deleted file mode 100644 index 7b986c74005a62b1dc11cbdd3022105ec5317d37..0000000000000000000000000000000000000000 Binary files a/backends/trained_policies/highlevel/highlevel_weights_772.h5f and /dev/null differ diff --git a/backends/trained_policies/immediatestop/immediatestop_weights_actor.h5f b/backends/trained_policies/immediatestop/immediatestop_weights_actor.h5f new file mode 100644 index 0000000000000000000000000000000000000000..3ade0ae38ec13eb1040c1a4dc8a91390d9ed6752 Binary files /dev/null and b/backends/trained_policies/immediatestop/immediatestop_weights_actor.h5f differ diff --git a/backends/trained_policies/immediatestop/immediatestop_weights_critic.h5f b/backends/trained_policies/immediatestop/immediatestop_weights_critic.h5f new file mode 100644 index 0000000000000000000000000000000000000000..2b97e53475d094a51c29cc8ed82f5bdc47f9ea3f Binary files /dev/null and b/backends/trained_policies/immediatestop/immediatestop_weights_critic.h5f differ diff --git a/backends/trained_policies/left/left_weights_actor.h5f b/backends/trained_policies/left/left_weights_actor.h5f new file mode 100644 index 0000000000000000000000000000000000000000..3f6e76aa921b07e851ab4eac8045e74c0c28e848 Binary files /dev/null and b/backends/trained_policies/left/left_weights_actor.h5f differ diff --git a/backends/trained_policies/left/left_weights_critic.h5f b/backends/trained_policies/left/left_weights_critic.h5f new file mode 100644 index 0000000000000000000000000000000000000000..b53dfd8f69aa96544b986a2de8c506b20162358f Binary files /dev/null and b/backends/trained_policies/left/left_weights_critic.h5f differ diff --git a/backends/trained_policies/right/right_weights_actor.h5f b/backends/trained_policies/right/right_weights_actor.h5f new file mode 100644 index 0000000000000000000000000000000000000000..3f6e76aa921b07e851ab4eac8045e74c0c28e848 Binary files /dev/null and b/backends/trained_policies/right/right_weights_actor.h5f differ diff --git a/backends/trained_policies/right/right_weights_critic.h5f b/backends/trained_policies/right/right_weights_critic.h5f new file mode 100644 index 0000000000000000000000000000000000000000..b53dfd8f69aa96544b986a2de8c506b20162358f Binary files /dev/null and b/backends/trained_policies/right/right_weights_critic.h5f differ diff --git a/backends/trained_policies/wait/wait_weights_actor.h5f b/backends/trained_policies/wait/wait_weights_actor.h5f deleted file mode 100644 index 00703d9cc1931bc31083f9a77575598a030f92c3..0000000000000000000000000000000000000000 Binary files a/backends/trained_policies/wait/wait_weights_actor.h5f and /dev/null differ diff --git a/backends/trained_policies/wait/wait_weights_critic.h5f b/backends/trained_policies/wait/wait_weights_critic.h5f deleted file mode 100644 index 330e90c08e129f18a05e2a54882f8bb9fffc1957..0000000000000000000000000000000000000000 Binary files a/backends/trained_policies/wait/wait_weights_critic.h5f and /dev/null differ diff --git a/config.json b/config.json index b8d9b1c65aeebae2c3a2ccf40f6f9c6b4b6de70d..5e40c774e0957492c3db2f94b74dfde642b396a2 100644 --- a/config.json +++ b/config.json @@ -3,7 +3,7 @@ "wait": "Wait", "follow": "Follow", "stop": "Stop", - "changelane": "ChangeLane", + "immediatestop": "ImmediateStop", "keeplane": "KeepLane" }, diff --git a/env/simple_intersection/features.py b/env/simple_intersection/features.py index 2b70e51fa7357378e061ea599566ae0fea1dc5b2..2c422c6a2ffa97d16d47dd118306e8d289db2789 100644 --- a/env/simple_intersection/features.py +++ b/env/simple_intersection/features.py @@ -174,6 +174,6 @@ class Features(object): # Add buffer features to make a fixed length feature vector for i in range(MAX_NUM_VEHICLES - len(self.other_vehs)): - feature += (0.0, 0.0, 0.0, 0.0, -1) + feature += (0.0, 0.0, 0.0, 0.0, -1.0) return feature diff --git a/env/simple_intersection/simple_intersection_env.py b/env/simple_intersection/simple_intersection_env.py index 1d61b91ad2a9300a4cbf657bbaf1a8c80dd9347e..3855aea8e699fd521c89b8b4b51039572307e970 100644 --- a/env/simple_intersection/simple_intersection_env.py +++ b/env/simple_intersection/simple_intersection_env.py @@ -1156,12 +1156,13 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase): Returns True if the environment has terminated """ - model_checks_violated = (self._LTL_preconditions_enable and \ - self.current_model_checking_result()) + model_checks_violated = self._LTL_preconditions_enable and \ + self.current_model_checking_result() reached_goal = self._terminate_in_goal and self.goal_achieved self._check_collisions() self._check_ego_theta_out_of_range() terminated = self.termination_condition + return model_checks_violated or reached_goal or terminated @property @@ -1181,7 +1182,7 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase): return (self.ego.x >= rd.hlanes.end_pos) and \ not self.collision_happened and \ - not self.ego.APs['over_speed_limit'] + (self.ego.v <= 1.1*rd.speed_limit) def reset(self): """Gym compliant reset function. @@ -1229,7 +1230,6 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase): self.window.dispatch_events() # Text information about ego vehicle's states - # Right now, we are only training one option (Stop) info = "Ego Attributes:" + get_APs( self, EGO_INDEX, 'in_stop_region', 'has_entered_stop_region', 'has_stopped_in_stop_region', diff --git a/high_level_policy_main.py b/high_level_policy_main.py index 3c5a0f3c787d7937618af5a76816372e5688a2d2..8204c7e2a3d641780a17d71723ee4bde7f6dec57 100644 --- a/high_level_policy_main.py +++ b/high_level_policy_main.py @@ -11,7 +11,7 @@ def high_level_policy_training(nb_steps=25000, load_weights=False, training=True, testing=True, - nb_episodes_for_test=10, + nb_episodes_for_test=20, max_nb_steps=100, visualize=False, tensorboard=False, @@ -63,8 +63,7 @@ def high_level_policy_training(nb_steps=25000, agent.save_model(save_path) if testing: - options.set_controller_policy(agent.predict) - agent.test_model(options, nb_episodes=nb_episodes_for_test) + high_level_policy_testing(nb_episodes_for_test=nb_episodes_for_test) return agent @@ -228,7 +227,6 @@ if __name__ == "__main__": load_weights=args.load_weights, save_path=args.save_file, tensorboard=args.tensorboard, - nb_episodes_for_test=20, visualize=args.visualize) if args.test: diff --git a/options/options_loader.py b/options/options_loader.py index f58554f80af5c0d85909776ab4251ddfe75b4cfb..bce1a6cf1b7d885fe256a2a8748264f9eadcf8cc 100644 --- a/options/options_loader.py +++ b/options/options_loader.py @@ -1,4 +1,5 @@ import json +import os # for the use of os.path.isfile from .simple_intersection.maneuvers import * from .simple_intersection.mcts_maneuvers import * from backends import RLController, DDPGLearner, MCTSLearner, OnlineMCTSController, ManualPolicy @@ -155,19 +156,34 @@ class OptionsGraph: # TODO: error handling def load_trained_low_level_policies(self): for key, maneuver in self.maneuvers.items(): - agent = DDPGLearner( - input_shape=(maneuver.get_reduced_feature_length(), ), - nb_actions=2, - gamma=0.99, - nb_steps_warmup_critic=200, - nb_steps_warmup_actor=200, - lr=1e-3) - agent.load_model("backends/trained_policies/" + key + "/" + key + - "_weights.h5f") - maneuver.set_low_level_trained_policy(agent.predict) - maneuver._cost_weights = (20.0 * 1e-3, 1.0 * 1e-3, 0.25 * 1e-3, - 1.0 * 1e-3, 100.0 * 1e-3, 0.1 * 1e-3, - 0.25 * 1e-3, 0.1 * 1e-3) + trained_policy_path = "backends/trained_policies/" + key + "/" + critic_file_exists = os.path.isfile(trained_policy_path + key + "_weights_critic.h5f") + actor_file_exists = os.path.isfile(trained_policy_path + key + "_weights_actor.h5f") + + if actor_file_exists and critic_file_exists: + agent = DDPGLearner( + input_shape=(maneuver.get_reduced_feature_length(),), + nb_actions=2, + gamma=0.99, + nb_steps_warmup_critic=200, + nb_steps_warmup_actor=200, + lr=1e-3) + agent.load_model(trained_policy_path + key + "_weights.h5f") + maneuver.set_low_level_trained_policy(agent.predict) + + elif not critic_file_exists and actor_file_exists: + print("\n Warning: unable to load the low-level policy of \"" + key + + "\". the file of critic weights have to be located in the same " + + "directory of the actor weights file; the manual policy will be used instead.\n") + + else: + print("\n Warning: the trained low-level policy of \"" + key + + "\" does not exists; the manual policy will be used.\n") + + # setting the cost weights for high-level policy training. + # TODO: this shouldn't be initialized here, but within ManeuverBase class (e.g. make some flag indicating high-level training and set the weights)... + maneuver._cost_weights = (100.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3, + 100.0 * 1e-3, 0.1 * 1e-3, 0.25 * 1e-3, 0.1 * 1e-3) if self.config["method"] == "mcts": maneuver.timeout = np.inf diff --git a/options/simple_intersection/maneuver_base.py b/options/simple_intersection/maneuver_base.py index 1d9b45cfe36b0efce5f508e722b36bd89ea0f974..275e654c3c79590f3985da2f7afaadb458bf5906 100644 --- a/options/simple_intersection/maneuver_base.py +++ b/options/simple_intersection/maneuver_base.py @@ -20,7 +20,7 @@ class ManeuverBase(EpisodicEnvBase): learning_mode = 'training' #: timeout (i.e., time horizon for termination) - # By default, the time-out horizon is 1 as in Paxton et. al (2017). + # By default, the time-out horizon is 1. timeout = 1 #: the option specific weight vector for cost of driving, which is @@ -153,8 +153,7 @@ class ManeuverBase(EpisodicEnvBase): # in this case, no additional reward by Default # (i.e., self._extra_r_terminal = None by default). self._terminal_reward_superposition(self._extra_r_terminal) - info[ - 'maneuver_termination_reason'] = 'extra_termination_condition' + info['maneuver_termination_reason'] = 'extra_termination_condition' if self.timeout_happened: if self._give_reward_on_timeout: # in this case, no additional reward by Default diff --git a/options/simple_intersection/maneuvers.py b/options/simple_intersection/maneuvers.py index 748fb0ea0c1dd0743f00e1b683986e7186a31e2a..570b4acc558cfc6cf3ebdc015419ed64af006817 100644 --- a/options/simple_intersection/maneuvers.py +++ b/options/simple_intersection/maneuvers.py @@ -16,9 +16,11 @@ class KeepLane(ManeuverBase): def _init_LTL_preconditions(self): self._LTL_preconditions.append(LTLProperty("G ( not veh_ahead )", 0)) + #self._LTL_preconditions.append( # LTLProperty("G ( not stopped_now )", 100, # self._enable_low_level_training_properties)) + self._LTL_preconditions.append( LTLProperty( "G ( (lane and target_lane) or (not lane and not target_lane) )", @@ -63,6 +65,76 @@ class KeepLane(ManeuverBase): return False +class ImmediateStop(ManeuverBase): + + _terminate_in_goal = True + _reward_in_goal = None + + _penalty_in_violation = None + _ego_pos_range = (rd.intersection_width_w_offset, rd.hlanes.end_pos) + + def _init_param(self): + self._v_ref = 0 if self._enable_low_level_training_properties else rd.speed_limit + self._target_lane = self.env.ego.APs['lane'] + + def _init_LTL_preconditions(self): + + self._LTL_preconditions.append( + LTLProperty( + "G ( (veh_ahead and before_but_close_to_stop_region) U highest_priority )", + None, not self._enable_low_level_training_properties)) + + self._LTL_preconditions.append( + LTLProperty("G ( not stopped_now )", self._penalty(self._reward_in_goal), + not self._enable_low_level_training_properties)) + + self._LTL_preconditions.append( + LTLProperty( + "G ( (lane and target_lane) or (not lane and not target_lane) )", + 100, self._enable_low_level_training_properties)) + + def generate_learning_scenario(self): + self.generate_scenario( + ego_pos_range=self._ego_pos_range, + ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6), + ego_heading_towards_lane_centre=True) + self.env._terminate_in_goal = False + self.env._reward_in_goal = None + self._reward_in_goal = 200 + self._enable_low_level_training_properties = True + + def generate_validation_scenario(self): + self._ego_pos_range = (rd.hlanes.start_pos, rd.hlanes.end_pos) + self.generate_learning_scenario() + + def _low_level_manual_policy(self): + return self.env.aggressive_driving_policy(EGO_INDEX) + + @staticmethod + def _features_dim_reduction(features_tuple): + return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi', + 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot') + + @property + def extra_termination_condition(self): + if self._enable_low_level_training_properties: # activated only for the low-level training. + if self.env.ego.APs['stopped_now']: + if self._reward_in_goal is not None: + self._extra_r_terminal = self._reward_in_goal + self._extra_r_terminal *= np.exp(- pow(self.env.ego.theta, 2) + - pow(self.env.ego.y - rd.hlanes.centres[self._target_lane], 2) + - 0.25 * pow(self.env.ego.psi, 2)) + else: + self._extra_r_terminal = None + return True + + else: + self._extra_r_terminal = None + return False + + return False + + class Stop(ManeuverBase): _terminate_in_goal = True @@ -79,6 +151,10 @@ class Stop(ManeuverBase): # LTLProperty("G ( not has_stopped_in_stop_region )", # self._penalty(self._reward_in_goal))) + self._LTL_preconditions.append( + LTLProperty("G ( not has_stopped_in_stop_region )", -150, + not self._enable_low_level_training_properties)) + self._LTL_preconditions.append( LTLProperty( "G ( (before_but_close_to_stop_region or in_stop_region) U has_stopped_in_stop_region )", @@ -98,13 +174,11 @@ class Stop(ManeuverBase): def _set_v_ref(self): self._v_ref = rd.speed_limit + #if self._enable_low_level_training_properties: x = self.env.ego.x - if x <= rd.hlanes.near_stop_region: - self._v_ref = rd.speed_limit - elif x <= rd.hlanes.stop_region_centre: - self._v_ref = -(rd.speed_limit / abs(rd.hlanes.near_stop_region) - ) * (x - rd.hlanes.stop_region_centre) - else: + if rd.hlanes.near_stop_region < x <= rd.hlanes.stop_region_centre: + self._v_ref = -(rd.speed_limit / abs(rd.hlanes.near_stop_region)) * (x - rd.hlanes.stop_region_centre) + elif x > rd.hlanes.stop_region_centre: self._v_ref = 0 def generate_learning_scenario(self): @@ -129,18 +203,18 @@ class Stop(ManeuverBase): @property def extra_termination_condition(self): - if self.env.ego.APs['has_stopped_in_stop_region']: - if self._reward_in_goal is not None: - self._extra_r_terminal = self._reward_in_goal - self._extra_r_terminal *= np.exp(- pow(self.env.ego.theta, 2) - - pow(self.env.ego.y - rd.hlanes.centres[self._target_lane], 2) - - 0.25*pow(self.env.ego.psi, 2)) - else: - self._extra_r_terminal = None - return True + if self._enable_low_level_training_properties: # activated only for the low-level training. + if self.env.ego.APs['has_stopped_in_stop_region']: + if self._reward_in_goal is not None: + self._extra_r_terminal = self._reward_in_goal + self._extra_r_terminal *= np.exp(- pow(self.env.ego.theta, 2) + - pow(self.env.ego.y - rd.hlanes.centres[self._target_lane], 2) + - 0.25 * pow(self.env.ego.psi, 2)) + else: + self._extra_r_terminal = None + return True - elif self._enable_low_level_training_properties: # activated only for the low-level training. - if (rd.speed_limit / 5 < self._v_ref) and \ + elif (rd.speed_limit / 5 < self._v_ref) and \ (self.env.ego.v < self._v_ref / 2) and self.env.ego.acc < 0: self._extra_r_terminal = -100 return True @@ -154,93 +228,91 @@ class Stop(ManeuverBase): class Wait(ManeuverBase): - _terminate_in_goal = True _reward_in_goal = None + _terminate_in_goal = True def _init_LTL_preconditions(self): - self._LTL_preconditions.append( - LTLProperty( - "G ( (in_stop_region and stopped_now) U (highest_priority and intersection_is_clear))", 0, - not self._enable_low_level_training_properties)) # not available in low-level training... - - self._LTL_preconditions.append( - LTLProperty("G ( not (in_intersection and highest_priority) )", - self._penalty(self._reward_in_goal))) self._LTL_preconditions.append( - LTLProperty( - "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150, self._enable_low_level_training_properties)) - self._LTL_preconditions.append( - LTLProperty( - "G ( (lane and target_lane) or (not lane and not target_lane) )", - 100, self._enable_low_level_training_properties)) + LTLProperty( + "G ( (in_stop_region and stopped_now) and not (highest_priority and intersection_is_clear))", 0, + not self._enable_low_level_training_properties)) # not available in low-level training... - def _init_param(self): - self._update_param() - self._target_lane = self.env.ego.APs['lane'] + #LTLProperty( + # "G ( (in_stop_region and stopped_now) U (highest_priority and intersection_is_clear))", 0, + # not self._enable_low_level_training_properties)) # not available in low-level training... - def _update_param(self): - if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear']: - self._v_ref = rd.speed_limit / 5 - else: - self._v_ref = 0 + #self._LTL_preconditions.append( + # LTLProperty("G ( not (in_intersection and highest_priority) )", + # self._penalty(self._reward_in_goal))) - def generate_learning_scenario(self): + #self._LTL_preconditions.append( + # LTLProperty( + # "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150, self._enable_low_level_training_properties)) - n_others = 0 if np.random.rand() <= 0 else np.random.randint(1, 4) + #self._LTL_preconditions.append( + # LTLProperty( + # "G ( (lane and target_lane) or (not lane and not target_lane) )", + # 100, self._enable_low_level_training_properties)) - self.generate_scenario( - enable_LTL_preconditions=True, - n_others_range=(n_others, n_others), - ego_pos_range=rd.hlanes.stop_region, - n_others_stopped_in_stop_region=n_others, - ego_v_upper_lim=0, - ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6), - ego_heading_towards_lane_centre=True) + def _init_param(self): + self._v_ref = 0 #if self._enable_low_level_training_properties else rd.speed_limit + self._target_lane = self.env.ego.APs['lane'] - max_waited_count = 0 - min_waited_count = 1 - for veh in self.env.vehs[1:]: - max_waited_count = max(max_waited_count, veh.waited_count) - min_waited_count = min(min_waited_count, veh.waited_count) + def _low_level_manual_policy(self): + return (0, 0) # Do nothing during "Wait" but just wait until the highest priority is given. - min_waited_count = min(min_waited_count, max_waited_count) +# @staticmethod +# def _features_dim_reduction(features_tuple): +# return extract_ego_features( +# features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot', +# 'pos_stop_region', 'intersection_is_clear', 'highest_priority') - self._extra_action_weights_flag = False - if np.random.rand() <= 0.2: - self.env.ego.waited_count = np.random.randint(0, min_waited_count+1) - else: - self.env.ego.waited_count = np.random.randint(min_waited_count, max_waited_count + 21) +class Left(ManeuverBase): - self.env.init_APs(False) + min_y_distance = rd.hlanes.width / 4 - self._reward_in_goal = 200 - self._enable_low_level_training_properties = True + _terminate_in_goal = True + _reward_in_goal = None - def generate_validation_scenario(self): - super().generate_validation_scenario() - #self._enable_low_level_training_properties = True + def _init_param(self): + self._v_ref = rd.speed_limit + self._target_lane = False + self._terminate_in_goal = True @property - def extra_termination_condition(self): - if self._enable_low_level_training_properties: # activated only for the low-level training. - if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear'] \ - and np.random.rand() <= 0.05 and self.env.ego.v <= self._v_ref / 10: - self._extra_r_terminal = - 100 - return True - else: - self._extra_r_terminal = None - return False + def goal_achieved(self): + ego = self.env.ego + APs = self.env.ego.APs + on_other_lane = APs['lane'] == self._target_lane + achieved_y_displacement = np.sign(ego.y) * \ + (ego.y - rd.hlanes.centres[APs['target_lane']]) >= - self.min_y_distance + return on_other_lane and APs['on_route'] and \ + achieved_y_displacement and APs['parallel_to_lane'] - return False + @property + def extra_initiation_condition(self): + return self.env.ego.APs['lane'] @staticmethod def _features_dim_reduction(features_tuple): - return extract_ego_features( - features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot', - 'pos_stop_region', 'intersection_is_clear', 'highest_priority') + return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi', + 'v tan(psi/L)', 'theta', 'lane', 'acc', + 'psi_dot') + + +class Right(Left): + + def _init_param(self): + self._v_ref = rd.speed_limit + self._target_lane = True + self._terminate_in_goal = True + + @property + def extra_initiation_condition(self): + return not self.env.ego.APs['lane'] class ChangeLane(ManeuverBase): @@ -252,12 +324,11 @@ class ChangeLane(ManeuverBase): _violation_penalty_in_low_level_training = None - high_level_extra_reward = -20 + # high_level_extra_reward = -1000000 def _init_param(self): self._v_ref = rd.speed_limit self._target_lane = not self.env.ego.APs['lane'] - self._terminate_in_goal = True def _init_LTL_preconditions(self): self._LTL_preconditions.append( @@ -307,8 +378,6 @@ class ChangeLane(ManeuverBase): self._violation_penalty_in_low_level_training = 150 self._enable_low_level_training_properties = True - # TODO: It is not a good idea to specify features by numbers, as the list - # of features is ever changing. We should specify them by strings. @staticmethod def _features_dim_reduction(features_tuple): return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi', @@ -357,11 +426,22 @@ class Follow(ManeuverBase): def generate_validation_scenario(self): self.generate_learning_scenario() + def _init_param(self): + self._set_v_ref() + def _update_param(self): - self._target_veh_i, _ = self.env.get_V2V_distance() + self._set_v_ref() - if self._target_veh_i is not None: - self._v_ref = self.env.vehs[self._target_veh_i].v + def _set_v_ref(self): + #if self._enable_low_level_training_properties: + self._target_veh_i, _ = self.env.get_V2V_distance() + + if self._target_veh_i is not None: + self._v_ref = self.env.vehs[self._target_veh_i].v + else: + self._v_ref = 0 + #else: + # self._v_ref = rd.speed_limit def _low_level_manual_policy(self): return self.env.aggressive_driving_policy(EGO_INDEX)