Merge branch 'retraining_wait_maneuver' into 'master'

Retraining wait maneuver See merge request !4

Merge branch 'retraining_wait_maneuver' into 'master'
558f2efd · Jae Young Lee · 062ad4ff · 074d2f49 · 558f2efd · 558f2efd
Commit 558f2efd authored 6 years ago by Jae Young Lee
--- a/backends/kerasrl_learner.py
+++ b/backends/kerasrl_learner.py
@@ -270,17 +270,17 @@ class DQNLearner(LearnerBase):
        Returns:     Keras Model object of actor
        """
        model = Sequential()
        model.add(Flatten(input_shape=(1, ) + self.input_shape))
-        model.add(Dense(64))
+        model.add(Dense(64, activation='relu'))
-        model.add(Activation('relu'))
+        model.add(Dense(64, activation='relu'))
-        model.add(Dense(64))
+        model.add(Dense(64, activation='relu'))
-        model.add(Activation('relu'))
+        model.add(Dense(64, activation='relu'))
-        model.add(Dense(64))
+        model.add(Dense(64, activation='relu'))
-        model.add(Activation('relu'))
+        model.add(Dense(64, activation='tanh'))
        model.add(Dense(self.nb_actions))
-        model.add(Activation('linear'))
+        print(model.summary())
-        # print(model.summary())
        return model
@@ -555,4 +555,4 @@ class DQNAgentOverOptions(DQNAgent):
            for node_index in invalid_node_indices:
                q_values[node_index] = -np.inf
        return q_values
\ No newline at end of file
--- a/backends/trained_policies/highlevel/highlevel_weights.h5f
+++ b/backends/trained_policies/highlevel/highlevel_weights.h5f
--- a/backends/trained_policies/wait/wait_weights_actor.h5f
+++ b/backends/trained_policies/wait/wait_weights_actor.h5f
--- a/backends/trained_policies/wait/wait_weights_critic.h5f
+++ b/backends/trained_policies/wait/wait_weights_critic.h5f
--- a/high_level_policy_main.py
+++ b/high_level_policy_main.py
@@ -48,6 +48,7 @@ def high_level_policy_training(nb_steps=25000,
        target_model_update=1e-3,
        delta_clip=100,
        low_level_policies=options.maneuvers)
+        #gamma=1)
    if load_weights:
        agent.load_model(save_path)

--- a/options/simple_intersection/maneuver_base.py
+++ b/options/simple_intersection/maneuver_base.py
@@ -31,7 +31,7 @@ class ManeuverBase(EpisodicEnvBase):
    # with subtraction.
    # TODO: remove or to provide additional functionality, keep _cost_weights in ManeuverBase here (see other TODOs in simple_intersection_env regarding _cost_weights).
    _cost_weights = (10.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3,
-                     100.0 * 1e-3, 0.1 * 1e-3, 0.05 * 1e-3, 0.1 * 1e-3)
+                    100.0 * 1e-3, 0.1 * 1e-3, 0.05 * 1e-3, 0.1 * 1e-3)
    _extra_r_terminal = None
    _extra_r_on_timeout = None

--- a/options/simple_intersection/maneuvers.py
+++ b/options/simple_intersection/maneuvers.py
@@ -50,13 +50,16 @@ class KeepLane(ManeuverBase):
    @staticmethod
    def _features_dim_reduction(features_tuple):
-        return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot')
+        return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y',
+                                    'psi', 'v tan(psi/L)', 'theta', 'lane',
+                                    'acc', 'psi_dot')
    @property
    def initiation_condition(self):
        """a virtual function (property) from ManeuverBase.
-            As KeepLane is a default maneuver, it has to be activated to be chosen at any time, state, and condition
+            As KeepLane is a default maneuver, it has to be activated to be
-            (refer to initiation_condition of ManeuverBase for the usual case.
+            chosen at any time, state, and condition (refer to
+            initiation_condition of ManeuverBase for the usual case).
            :returns True.
        """
@@ -243,35 +246,107 @@ class Wait(ManeuverBase):
    def _init_LTL_preconditions(self):
        self._LTL_preconditions.append(
-            LTLProperty("G ( (in_stop_region and stopped_now) and not (highest_priority and intersection_is_clear))",
+            LTLProperty("G ( (in_stop_region and stopped_now) U (highest_priority and intersection_is_clear))",
                        None, not self._enable_low_level_training_properties))  # not available in low-level training...
        self._LTL_preconditions.append(
-            LTLProperty("G ( not (in_intersection and highest_priority) )",
+            LTLProperty("G ( not (in_intersection and highest_priority and intersection_is_clear) )",
                        self._penalty(self._reward_in_goal)))
        self._LTL_preconditions.append(
           LTLProperty(
-               "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150, self._enable_low_level_training_properties))
+               "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150,
+               self._enable_low_level_training_properties))
        self._LTL_preconditions.append(
           LTLProperty(
               "G ( (lane and target_lane) or (not lane and not target_lane) )",
-               100, self._enable_low_level_training_properties))
+               150, self._enable_low_level_training_properties))
    def _init_param(self):
-        self._v_ref = 0 #if self._enable_low_level_training_properties else rd.speed_limit
+        self._update_param()
+        self._target_lane = self.env.ego.APs['lane']
+    def _update_param(self):
+        if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear']:
+            self._v_ref = rd.speed_limit
+        else:
+            self._v_ref = 0
+    def generate_learning_scenario(self):
+        n_others = 0 if np.random.rand() <= 0 else np.random.randint(1, 4)
+        self.generate_scenario(
+            n_others_range=(n_others, n_others),
+            ego_pos_range=rd.hlanes.stop_region,
+            n_others_stopped_in_stop_region=n_others,
+            ego_v_upper_lim=0,
+            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
+            ego_heading_towards_lane_centre=True)
+        max_waited_count = 0
+        min_waited_count = 1
+        for veh in self.env.vehs[1:]:
+            max_waited_count = max(max_waited_count, veh.waited_count)
+            min_waited_count = min(min_waited_count, veh.waited_count)
+        min_waited_count = min(min_waited_count, max_waited_count)
+        self._extra_action_weights_flag = False
+        if np.random.rand() <= 0.5:
+            self.env.ego.waited_count = np.random.randint(0, min_waited_count+1)
+        else:
+            self.env.ego.waited_count = np.random.randint(min_waited_count, max_waited_count + 21)
+        self.env.init_APs(False)
+        self._reward_in_goal = 200
+        self._enable_low_level_training_properties = True
+        self._extra_action_weights_flag = True
+    @property
+    def extra_termination_condition(self):
+        if self._enable_low_level_training_properties:  # activated only for the low-level training.
+            if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear'] \
+                    and np.random.rand() <= 0.1 and self.env.ego.v <= self._v_ref / 10 \
+                    and self.env.ego.acc < 0:
+                self._extra_r_terminal = - 100
+                return True
+            else:
+                self._extra_r_terminal = None
+                return False
+        return False
+    @staticmethod
+    def _features_dim_reduction(features_tuple):
+        return extract_ego_features(
+            features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot',
+            'pos_stop_region', 'intersection_is_clear', 'highest_priority')
+class ManualWait(ManeuverBase):
+    _reward_in_goal = None
+    _terminate_in_goal = True
+    def _init_LTL_preconditions(self):
+        self._LTL_preconditions.append(
+            LTLProperty("G ( (in_stop_region and stopped_now) and not (highest_priority and intersection_is_clear))",
+                    None, not self._enable_low_level_training_properties))  # not available in low-level training...
+        self._LTL_preconditions.append(
+            LTLProperty("G ( in_stop_region U (highest_priority and intersection_is_clear) )",
+                        150, self._enable_low_level_training_properties))
+        self._LTL_preconditions.append(
+            LTLProperty("G ( (lane and target_lane) or (not lane and not target_lane) )",
+                        100, self._enable_low_level_training_properties))
+    def _init_param(self):
+        self._v_ref = 0
        self._target_lane = self.env.ego.APs['lane']
    def _low_level_manual_policy(self):
        return (0, 0)  # Do nothing during "Wait" but just wait until the highest priority is given.
-#    @staticmethod
-#    def _features_dim_reduction(features_tuple):
-#        return extract_ego_features(
-#            features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot',
-#            'pos_stop_region', 'intersection_is_clear', 'highest_priority')
 class Left(ManeuverBase):