Successful low-level policies training except Wait maneuver.

Each low level policy was retrained with better LTL conditions and rewards, some parts of which are also designed to encourage exploration (to prevent the vehicle from being stopped all the time).

Successful low-level policies training except Wait maneuver.
70ad9bf5 · Jae Young Lee · f2171d2c · 70ad9bf5
Commit 70ad9bf5 authored 6 years ago by Jae Young Lee
--- a/options/simple_intersection/maneuvers.py
+++ b/options/simple_intersection/maneuvers.py
@@ -16,30 +16,51 @@ class KeepLane(ManeuverBase):

    def _init_LTL_preconditions(self):
        self._LTL_preconditions.append(LTLProperty("G ( not veh_ahead )", 0))
-        self._LTL_preconditions.append(
-            LTLProperty("G ( not stopped_now )", 200,
-                        self._enable_low_level_training_properties))
+        #self._LTL_preconditions.append(
+        #    LTLProperty("G ( not stopped_now )", 100,
+        #                self._enable_low_level_training_properties))
        self._LTL_preconditions.append(
            LTLProperty(
                "G ( (lane and target_lane) or (not lane and not target_lane) )",
-                200, self._enable_low_level_training_properties))
+                100, self._enable_low_level_training_properties))

    def generate_learning_scenario(self):
        self.generate_scenario(
            enable_LTL_preconditions=False,
-            ego_pos_range=(rd.hlanes.start_pos, rd.hlanes.end_pos),
+            ego_pos_range=(rd.intersection_width_w_offset, rd.hlanes.end_pos),
            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
+            v_max_multiplier=0.75,
            ego_heading_towards_lane_centre=True)
        # the goal reward and termination is led by the SimpleIntersectionEnv
-        self.env._terminate_in_goal = True
-        self.env._reward_in_goal = 200
+        self.env._terminate_in_goal = False
+        self.env._reward_in_goal = None
        self._enable_low_level_training_properties = True

+    def generate_validation_scenario(self):
+        self.generate_scenario(
+            enable_LTL_preconditions=False,
+            ego_pos_range=(rd.hlanes.start_pos, 0),
+            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
+            ego_heading_towards_lane_centre=True)
+        # the goal reward and termination is led by the SimpleIntersectionEnv
+        self.env._terminate_in_goal = False
+        self.env._reward_in_goal = None
+
    @staticmethod
    def _features_dim_reduction(features_tuple):
-        return extract_ego_features(features_tuple, 'pos_near_stop_region',
-                                    'v', 'v_ref', 'e_y', 'psi', 'theta', 'acc',
-                                    'psi_dot')
+        return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot')
+
+    @property
+    def extra_termination_condition(self):
+        if self._enable_low_level_training_properties:  # activated only for the low-level training.
+            if (self.env.ego.v < self._v_ref / 5) and self.env.ego.acc < 0:
+                self._extra_r_terminal = -100
+                return True
+            else:
+                self._extra_r_terminal = None
+                return False
+
+        return False


 class Stop(ManeuverBase):
@@ -47,28 +68,30 @@ class Stop(ManeuverBase):
    _terminate_in_goal = True
    _reward_in_goal = None

+    _penalty_in_violation = None
+
    def _init_param(self):
        self._set_v_ref()
        self._target_lane = self.env.ego.APs['lane']

    def _init_LTL_preconditions(self):
-        self._LTL_preconditions.append(
-            LTLProperty("G ( not has_stopped_in_stop_region )",
-                        self._penalty(self._reward_in_goal)))
+        #self._LTL_preconditions.append(
+        #    LTLProperty("G ( not has_stopped_in_stop_region )",
+        #                self._penalty(self._reward_in_goal)))

        self._LTL_preconditions.append(
            LTLProperty(
                "G ( (before_but_close_to_stop_region or in_stop_region) U has_stopped_in_stop_region )",
-                0))
+                self._penalty_in_violation))

        self._LTL_preconditions.append(
-            LTLProperty("G ( not stopped_now U in_stop_region )", 200,
+            LTLProperty("G ( not stopped_now U in_stop_region )", 100,
                        self._enable_low_level_training_properties))

        self._LTL_preconditions.append(
            LTLProperty(
                "G ( (lane and target_lane) or (not lane and not target_lane) )",
-                200, self._enable_low_level_training_properties))
+                100, self._enable_low_level_training_properties))

    def _update_param(self):
        self._set_v_ref()
@@ -89,8 +112,10 @@ class Stop(ManeuverBase):
            ego_pos_range=(rd.hlanes.near_stop_region,
                           -rd.intersection_width_w_offset / 2),
            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
+            v_max_multiplier=0.75,
            ego_heading_towards_lane_centre=True)
        self._reward_in_goal = 200
+        self._penalty_in_violation = 150
        self._enable_low_level_training_properties = True

    def _low_level_manual_policy(self):
@@ -99,8 +124,32 @@ class Stop(ManeuverBase):
    @staticmethod
    def _features_dim_reduction(features_tuple):
        return extract_ego_features(features_tuple, 'pos_near_stop_region',
-                                    'v', 'v_ref', 'e_y', 'psi', 'theta', 'acc',
-                                    'psi_dot', 'not_in_stop_region')
+                                    'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc',
+                                    'psi_dot', 'pos_stop_region', 'not_in_stop_region')
+
+    @property
+    def extra_termination_condition(self):
+        if self.env.ego.APs['has_stopped_in_stop_region']:
+            if self._reward_in_goal is not None:
+                self._extra_r_terminal = self._reward_in_goal
+                self._extra_r_terminal *= np.exp(- pow(self.env.ego.theta, 2)
+                                                 - pow(self.env.ego.y - rd.hlanes.centres[self._target_lane], 2)
+                                                 - 0.25*pow(self.env.ego.psi, 2))
+            else:
+                self._extra_r_terminal = None
+            return True
+
+        elif self._enable_low_level_training_properties:  # activated only for the low-level training.
+            if (rd.speed_limit / 5 < self._v_ref) and \
+                    (self.env.ego.v < self._v_ref / 2) and self.env.ego.acc < 0:
+                self._extra_r_terminal = -100
+                return True
+
+            else:
+                self._extra_r_terminal = None
+                return False
+
+        return False


 class Wait(ManeuverBase):
@@ -111,32 +160,38 @@ class Wait(ManeuverBase):
    def _init_LTL_preconditions(self):
        self._LTL_preconditions.append(
            LTLProperty(
-                "G ( (in_stop_region and stopped_now) U highest_priority )",
-                0))
+                "G ( (in_stop_region and stopped_now) U (highest_priority and intersection_is_clear))", 0,
+                not self._enable_low_level_training_properties))  # not available in low-level training...

        self._LTL_preconditions.append(
            LTLProperty("G ( not (in_intersection and highest_priority) )",
                        self._penalty(self._reward_in_goal)))

+        self._LTL_preconditions.append(
+            LTLProperty(
+                "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150, self._enable_low_level_training_properties))
+
+        self._LTL_preconditions.append(
+            LTLProperty(
+                "G ( (lane and target_lane) or (not lane and not target_lane) )",
+                100, self._enable_low_level_training_properties))
+
    def _init_param(self):
-        ego = self.env.ego
-        self._v_ref = rd.speed_limit if self.env.ego.APs[
-            'highest_priority'] else 0
-        self._target_lane = ego.APs['lane']
-        self._ego_stop_count = 0
+        self._update_param()
+        self._target_lane = self.env.ego.APs['lane']

    def _update_param(self):
-        if self.env.ego.APs['highest_priority']:
-            self._v_ref = rd.speed_limit
-            if self._enable_low_level_training_properties:
-                if self.env.n_others_with_higher_priority == 0:
-                    self._ego_stop_count += 1
+        if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear']:
+            self._v_ref = rd.speed_limit / 5
+        else:
+            self._v_ref = 0

    def generate_learning_scenario(self):
-        n_others = np.random.randint(0, 3)
+
+        n_others = 0 if np.random.rand() <= 0 else np.random.randint(1, 4)
+
        self.generate_scenario(
            enable_LTL_preconditions=True,
-            timeout=62,
            n_others_range=(n_others, n_others),
            ego_pos_range=rd.hlanes.stop_region,
            n_others_stopped_in_stop_region=n_others,
@@ -145,32 +200,46 @@ class Wait(ManeuverBase):
            ego_heading_towards_lane_centre=True)

        max_waited_count = 0
+        min_waited_count = 1
        for veh in self.env.vehs[1:]:
            max_waited_count = max(max_waited_count, veh.waited_count)
+            min_waited_count = min(min_waited_count, veh.waited_count)
+
+        min_waited_count = min(min_waited_count, max_waited_count)

        self._extra_action_weights_flag = False
-        self.env.ego.waited_count = np.random.randint(0, max_waited_count + 21)
+
+        if np.random.rand() <= 0.2:
+            self.env.ego.waited_count = np.random.randint(0, min_waited_count+1)
+        else:
+            self.env.ego.waited_count = np.random.randint(min_waited_count, max_waited_count + 21)
+
        self.env.init_APs(False)

        self._reward_in_goal = 200
-        self._extra_r_on_timeout = -200
        self._enable_low_level_training_properties = True
-        self._ego_stop_count = 0
+
+    def generate_validation_scenario(self):
+        super().generate_validation_scenario()
+        #self._enable_low_level_training_properties = True

    @property
    def extra_termination_condition(self):
        if self._enable_low_level_training_properties:  # activated only for the low-level training.
-            if self._ego_stop_count >= 50:
-                self._extra_r_terminal = -200
+            if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear'] \
+                    and np.random.rand() <= 0.05 and self.env.ego.v <= self._v_ref / 10:
+                self._extra_r_terminal = - 100
                return True
            else:
                self._extra_r_terminal = None
                return False

+        return False
+
    @staticmethod
    def _features_dim_reduction(features_tuple):
        return extract_ego_features(
-            features_tuple, 'v', 'v_ref', 'psi', 'theta', 'acc', 'psi_dot',
+            features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot',
            'pos_stop_region', 'intersection_is_clear', 'highest_priority')


@@ -195,10 +264,10 @@ class ChangeLane(ManeuverBase):
            LTLProperty("G ( on_route and not over_speed_limit )",
                        self._violation_penalty_in_low_level_training,
                        self._enable_low_level_training_properties))
+
        self._LTL_preconditions.append(
            LTLProperty("G ( not stopped_now )",
-                        self._violation_penalty_in_low_level_training,
-                        self._enable_low_level_training_properties))
+                        100, self._enable_low_level_training_properties))

    @property
    def goal_achieved(self):
@@ -217,12 +286,25 @@ class ChangeLane(ManeuverBase):
        self.generate_scenario(
            enable_LTL_preconditions=False,
            timeout=15,
-            ego_pos_range=(rd.hlanes.start_pos, rd.hlanes.end_pos),
+            ego_pos_range=(rd.intersection_width_w_offset, rd.hlanes.end_pos),
            ego_lane=np.random.choice([0, 1]),
-            ego_perturb_lim=(rd.hlanes.width / 5, np.pi / 6))
+            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
+            v_max_multiplier=0.75)
+
        # print('our range was %s, %s, ego at %s' % (before_intersection, after_intersection, self.env.ego.x))
        self._reward_in_goal = 200
-        self._violation_penalty_in_low_level_training = 200
+        self._violation_penalty_in_low_level_training = 150
+        self._enable_low_level_training_properties = True
+
+    def generate_validation_scenario(self):
+        self.generate_scenario(
+            enable_LTL_preconditions=False,
+            ego_pos_range=(rd.hlanes.start_pos, rd.hlanes.end_pos),
+            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6))
+
+        # print('our range was %s, %s, ego at %s' % (before_intersection, after_intersection, self.env.ego.x))
+        self._reward_in_goal = 200
+        self._violation_penalty_in_low_level_training = 150
        self._enable_low_level_training_properties = True

    # TODO: It is not a good idea to specify features by numbers, as the list
@@ -237,49 +319,77 @@ class ChangeLane(ManeuverBase):
 class Follow(ManeuverBase):

    _target_veh_i = None
-    _penalty_for_out_of_follow_range = None
+    _penalty_for_out_of_range = None
+    _penalty_for_change_lane = None

    def _init_LTL_preconditions(self):
        self._LTL_preconditions.append(
-            LTLProperty("G ( veh_ahead )",
-                        self._penalty_for_out_of_follow_range))
+            LTLProperty("G ( veh_ahead )", self._penalty_for_out_of_range))

        self._LTL_preconditions.append(
            LTLProperty(
                "G ( (lane and target_lane) or (not lane and not target_lane) )",
-                self._penalty_for_out_of_follow_range))
+                self._penalty_for_change_lane))
+
+        #self._LTL_preconditions.append(
+        #    LTLProperty("G ( not stopped_now U veh_ahead_stopped_now)", 200,
+        #                self._enable_low_level_training_properties))

        self._LTL_preconditions.append(
-            LTLProperty("G ( not stopped_now U veh_ahead_stopped_now)", 200,
+            LTLProperty("G ( not veh_ahead_too_close )", self._penalty_for_out_of_range,
                        self._enable_low_level_training_properties))

        self._LTL_preconditions.append(
-            LTLProperty("G ( not veh_ahead_too_close )", 200,
-                        self._enable_low_level_training_properties))
+            LTLProperty("G ( not in_stop_region)", 0, not self._enable_low_level_training_properties))

    def generate_learning_scenario(self):
        self.generate_scenario(
            enable_LTL_preconditions=False,
            n_others_range=(1, 1),
-            ego_perturb_lim=(rd.hlanes.width / 2, np.pi / 4),
+            v_max_multiplier=0.75,
+            ego_perturb_lim=(0, 0),
            veh_ahead_scenario=True)
        self.env._terminate_in_goal = False
-        self._penalty_for_out_of_follow_range = 200
+        self._penalty_for_out_of_range = 200
+        self._penalty_for_change_lane = 170
        self._enable_low_level_training_properties = True

+    def generate_validation_scenario(self):
+        self.generate_learning_scenario()
+
    def _update_param(self):
        self._target_veh_i, _ = self.env.get_V2V_distance()

+        if self._target_veh_i is not None:
+            self._v_ref = self.env.vehs[self._target_veh_i].v
+
    def _low_level_manual_policy(self):
        return self.env.aggressive_driving_policy(EGO_INDEX)

+    @property
+    def extra_termination_condition(self):
+        APs = self.env.ego.APs
+
+        if self._target_veh_i is None:
+            return False
+
+        elif not self._enable_low_level_training_properties:  # activated only for the high-level training.
+            if (APs['in_stop_region'] or APs['before_but_close_to_stop_region']) \
+                and (self.env.vehs[self._target_veh_i].APs['in_intersection'] or
+                     self.env.vehs[self._target_veh_i].x > 0):
+                return True
+            else:
+                return False
+
+        return False
+
    def _features_dim_reduction(self, features_tuple):
        ego_features = extract_ego_features(
-            features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)',
-            'theta', 'lane', 'e_y,lane', 'acc', 'psi_dot')
+            features_tuple, 'pos_near_stop_region', 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)',
+            'theta', 'lane', 'acc', 'psi_dot')
+
        if self._target_veh_i is not None:
            return ego_features + extract_other_veh_features(
-                features_tuple, self._target_veh_i, 'rel_x', 'rel_y', 'v',
-                'acc')
+                features_tuple, self._target_veh_i, 'rel_x', 'rel_y', 'v', 'acc')
        else:
            return ego_features + (0.0, 0.0, 0.0, 0.0)