diff --git a/backends/trained_policies/wait/wait_weights_actor.h5f b/backends/trained_policies/wait/wait_weights_actor.h5f new file mode 100644 index 0000000000000000000000000000000000000000..df6c51f2e9a39202430b13bf31428b9d88f261c8 Binary files /dev/null and b/backends/trained_policies/wait/wait_weights_actor.h5f differ diff --git a/backends/trained_policies/wait/wait_weights_critic.h5f b/backends/trained_policies/wait/wait_weights_critic.h5f new file mode 100644 index 0000000000000000000000000000000000000000..93b370ddc75f75f14acd0a708e8200c36cc1f550 Binary files /dev/null and b/backends/trained_policies/wait/wait_weights_critic.h5f differ diff --git a/env/simple_intersection/simple_intersection_env.py b/env/simple_intersection/simple_intersection_env.py index 5e3355bc8d3b80df7d294bc0b5c774b34b6125bd..58ef9a3a5b9d7a36e9ca75f12d81f9ce7b238fa2 100644 --- a/env/simple_intersection/simple_intersection_env.py +++ b/env/simple_intersection/simple_intersection_env.py @@ -53,9 +53,8 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase): # can be set to a specific value which may be different than the default. # TODO: check _cost_weights in both here and ManeuverBase. The _cost_weights has to be substituted to here, but it doesn't sometimes. # TODO: set a functionality of setting _cost_weights for low and high level training separately. - _cost_weights = (0, 0, 0, 0, 0, 0, 0, 0) - #(10.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3, - #100.0 * 1e-3, 0.1 * 1e-3, 0.05 * 1e-3, 0.1 * 1e-3) + _cost_weights = (10.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3, + 100.0 * 1e-3, 0.1 * 1e-3, 0.05 * 1e-3, 0.1 * 1e-3) #TODO: Move this to constants # The empirical min and max of each term in the cost vector, which is used to normalize the values diff --git a/high_level_policy_main.py b/high_level_policy_main.py index 8204c7e2a3d641780a17d71723ee4bde7f6dec57..3d69046c7fba8ffe196ecc757827c2f9910e73e0 100644 --- a/high_level_policy_main.py +++ b/high_level_policy_main.py @@ -47,7 +47,8 @@ def high_level_policy_training(nb_steps=25000, nb_actions=options.get_number_of_nodes(), target_model_update=1e-3, delta_clip=100, - low_level_policies=options.maneuvers) + low_level_policies=options.maneuvers, + gamma=1) if load_weights: agent.load_model(save_path) @@ -77,7 +78,8 @@ def high_level_policy_testing(nb_episodes_for_test=100, agent = DQNLearner( input_shape=(50, ), nb_actions=options.get_number_of_nodes(), - low_level_policies=options.maneuvers) + low_level_policies=options.maneuvers, + gamma=1) if pretrained: trained_agent_file = "backends/trained_policies/highlevel/" + trained_agent_file @@ -99,7 +101,8 @@ def evaluate_high_level_policy(nb_episodes_for_test=100, agent = DQNLearner( input_shape=(50, ), nb_actions=options.get_number_of_nodes(), - low_level_policies=options.maneuvers) + low_level_policies=options.maneuvers, + gamma=1) if pretrained: trained_agent_file = "backends/trained_policies/highlevel/" + trained_agent_file diff --git a/highlevel_weights.h5f b/highlevel_weights.h5f new file mode 100644 index 0000000000000000000000000000000000000000..6102e5db24557b9ce5ddc012bb211c75edffc1e2 Binary files /dev/null and b/highlevel_weights.h5f differ diff --git a/options/simple_intersection/maneuver_base.py b/options/simple_intersection/maneuver_base.py index e43b300a1a16dd7fa689e9c0a2692d81b4af221d..c680f334dae91839b45a7a8580b8c5b85639f707 100644 --- a/options/simple_intersection/maneuver_base.py +++ b/options/simple_intersection/maneuver_base.py @@ -30,9 +30,8 @@ class ManeuverBase(EpisodicEnvBase): # as a negative reward, so a cost will be summed up to the reward # with subtraction. # TODO: remove or to provide additional functionality, keep _cost_weights in ManeuverBase here (see other TODOs in simple_intersection_env regarding _cost_weights). - _cost_weights = (0, 0, 0, 0, 0, 0, 0, 0) - #(10.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3, - #100.0 * 1e-3, 0.1 * 1e-3, 0.05 * 1e-3, 0.1 * 1e-3) + _cost_weights = (10.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3, + 100.0 * 1e-3, 0.1 * 1e-3, 0.05 * 1e-3, 0.1 * 1e-3) _extra_r_terminal = None _extra_r_on_timeout = None diff --git a/options/simple_intersection/maneuvers.py b/options/simple_intersection/maneuvers.py index 3a99a45378b8d00c9eb4778404300f748a0ef6e9..0723489afb28e9c72216759bf0efa96e23b92717 100644 --- a/options/simple_intersection/maneuvers.py +++ b/options/simple_intersection/maneuvers.py @@ -243,34 +243,81 @@ class Wait(ManeuverBase): def _init_LTL_preconditions(self): self._LTL_preconditions.append( - LTLProperty("G ( (in_stop_region and stopped_now) and not (highest_priority and intersection_is_clear))", + LTLProperty("G ( (in_stop_region and stopped_now) U (highest_priority and intersection_is_clear))", None, not self._enable_low_level_training_properties)) # not available in low-level training... self._LTL_preconditions.append( - LTLProperty("G ( not (in_intersection and highest_priority) )", + LTLProperty("G ( not (in_intersection and highest_priority and intersection_is_clear) )", self._penalty(self._reward_in_goal))) self._LTL_preconditions.append( LTLProperty( - "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150, self._enable_low_level_training_properties)) + "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150, + self._enable_low_level_training_properties)) self._LTL_preconditions.append( LTLProperty( "G ( (lane and target_lane) or (not lane and not target_lane) )", - 100, self._enable_low_level_training_properties)) + 150, self._enable_low_level_training_properties)) def _init_param(self): - self._v_ref = 0 #if self._enable_low_level_training_properties else rd.speed_limit + self._update_param() self._target_lane = self.env.ego.APs['lane'] - def _low_level_manual_policy(self): - return (0, 0) # Do nothing during "Wait" but just wait until the highest priority is given. + def _update_param(self): + if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear']: + self._v_ref = rd.speed_limit + else: + self._v_ref = 0 -# @staticmethod -# def _features_dim_reduction(features_tuple): -# return extract_ego_features( -# features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot', -# 'pos_stop_region', 'intersection_is_clear', 'highest_priority') + def generate_learning_scenario(self): + n_others = 0 if np.random.rand() <= 0 else np.random.randint(1, 4) + self.generate_scenario( + n_others_range=(n_others, n_others), + ego_pos_range=rd.hlanes.stop_region, + n_others_stopped_in_stop_region=n_others, + ego_v_upper_lim=0, + ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6), + ego_heading_towards_lane_centre=True) + + max_waited_count = 0 + min_waited_count = 1 + for veh in self.env.vehs[1:]: + max_waited_count = max(max_waited_count, veh.waited_count) + min_waited_count = min(min_waited_count, veh.waited_count) + + min_waited_count = min(min_waited_count, max_waited_count) + self._extra_action_weights_flag = False + + if np.random.rand() <= 0.5: + self.env.ego.waited_count = np.random.randint(0, min_waited_count+1) + else: + self.env.ego.waited_count = np.random.randint(min_waited_count, max_waited_count + 21) + + self.env.init_APs(False) + self._reward_in_goal = 200 + self._enable_low_level_training_properties = True + self._extra_action_weights_flag = True + + @property + def extra_termination_condition(self): + if self._enable_low_level_training_properties: # activated only for the low-level training. + if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear'] \ + and np.random.rand() <= 0.1 and self.env.ego.v <= self._v_ref / 10 \ + and self.env.ego.acc < 0: + self._extra_r_terminal = - 100 + return True + else: + self._extra_r_terminal = None + return False + + return False + + @staticmethod + def _features_dim_reduction(features_tuple): + return extract_ego_features( + features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot', + 'pos_stop_region', 'intersection_is_clear', 'highest_priority') class Left(ManeuverBase):