diff --git a/backends/controller_base.py b/backends/controller_base.py
index 885f941c680612f6efbbdc94df26963e83bdefaf..634756b5174c0f782764b31942abb1478bf75ce0 100644
--- a/backends/controller_base.py
+++ b/backends/controller_base.py
@@ -56,8 +56,7 @@ class ControllerBase(PolicyBase):
         total_reward = 0
         self.node_terminal_state_reached = False
         while not self.node_terminal_state_reached:
-            observation, reward, terminal, info = self.low_level_step_current_node(
-            )
+            observation, reward, terminal, info = self.low_level_step_current_node()
             if visualize_low_level_steps:
                 self.env.render()
             total_reward += reward
diff --git a/backends/trained_policies/0.1mil_weights/changelane_weights_actor.h5f b/backends/trained_policies/0.1mil_weights/changelane_weights_actor.h5f
deleted file mode 100644
index 739298c51cfa43433ff21b4962cbba17170ee7a9..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/changelane_weights_actor.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/changelane_weights_critic.h5f b/backends/trained_policies/0.1mil_weights/changelane_weights_critic.h5f
deleted file mode 100644
index 681b967fa6dbd6a2aaf46413891ca7f9229efeeb..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/changelane_weights_critic.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/follow_weights_actor.h5f b/backends/trained_policies/0.1mil_weights/follow_weights_actor.h5f
deleted file mode 100644
index fa417a1dc40b3f5e953e687315f960f2b1ad3e92..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/follow_weights_actor.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/follow_weights_critic.h5f b/backends/trained_policies/0.1mil_weights/follow_weights_critic.h5f
deleted file mode 100644
index 0105de348166a4cbaa29998174f5e89e82b3b86e..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/follow_weights_critic.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/keeplane_weights_actor.h5f b/backends/trained_policies/0.1mil_weights/keeplane_weights_actor.h5f
deleted file mode 100644
index 639fdd5b1889a604f6a61b4ad11b18ee9f703ea1..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/keeplane_weights_actor.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/keeplane_weights_critic.h5f b/backends/trained_policies/0.1mil_weights/keeplane_weights_critic.h5f
deleted file mode 100644
index bc791095159a0f5656381a1c2458bb5cab227c4c..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/keeplane_weights_critic.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/stop_weights_actor.h5f b/backends/trained_policies/0.1mil_weights/stop_weights_actor.h5f
deleted file mode 100644
index ee1d2af63e8812fc52a977d1db63342ffb5b0148..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/stop_weights_actor.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/stop_weights_critic.h5f b/backends/trained_policies/0.1mil_weights/stop_weights_critic.h5f
deleted file mode 100644
index ad2e374269254b3fe6151f484ba04b5b9c29af91..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/stop_weights_critic.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/wait_weights_actor.h5f b/backends/trained_policies/0.1mil_weights/wait_weights_actor.h5f
deleted file mode 100644
index a861c76ca25171a7644cb2fe3f75772fe7c5d6ae..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/wait_weights_actor.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/wait_weights_critic.h5f b/backends/trained_policies/0.1mil_weights/wait_weights_critic.h5f
deleted file mode 100644
index b20d0e3209408f46f31f2c46e6b6b51ce23b2b65..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/wait_weights_critic.h5f and /dev/null differ
diff --git a/backends/trained_policies/changelane/changelane_weights_actor.h5f b/backends/trained_policies/changelane/changelane_weights_actor.h5f
index 0a7ae9e96102f399e64dc83c4cc77e9e3b7a44cd..3f6e76aa921b07e851ab4eac8045e74c0c28e848 100644
Binary files a/backends/trained_policies/changelane/changelane_weights_actor.h5f and b/backends/trained_policies/changelane/changelane_weights_actor.h5f differ
diff --git a/backends/trained_policies/changelane/changelane_weights_critic.h5f b/backends/trained_policies/changelane/changelane_weights_critic.h5f
index 40ca3a83088d8984002bc0a4546020174e9cd431..b53dfd8f69aa96544b986a2de8c506b20162358f 100644
Binary files a/backends/trained_policies/changelane/changelane_weights_critic.h5f and b/backends/trained_policies/changelane/changelane_weights_critic.h5f differ
diff --git a/backends/trained_policies/follow/follow_weights_actor.h5f b/backends/trained_policies/follow/follow_weights_actor.h5f
index bcb0af7c5012216d05eb966550600c28a3bc322c..0f4bf96684f16bf8fc8db62dd56a301f5a96b62f 100644
Binary files a/backends/trained_policies/follow/follow_weights_actor.h5f and b/backends/trained_policies/follow/follow_weights_actor.h5f differ
diff --git a/backends/trained_policies/follow/follow_weights_critic.h5f b/backends/trained_policies/follow/follow_weights_critic.h5f
index f6258b53c8589db6a04382245c31f20d7f0915dc..2f51be39f37e0956887e930d969caf9ea421e0c2 100644
Binary files a/backends/trained_policies/follow/follow_weights_critic.h5f and b/backends/trained_policies/follow/follow_weights_critic.h5f differ
diff --git a/backends/trained_policies/keeplane/keeplane_weights_actor.h5f b/backends/trained_policies/keeplane/keeplane_weights_actor.h5f
index d9224714ec7717274033cb3f778800e6cacb690f..8efaf8b74e3fc805bbb373c3e24d3584df7ccf0c 100644
Binary files a/backends/trained_policies/keeplane/keeplane_weights_actor.h5f and b/backends/trained_policies/keeplane/keeplane_weights_actor.h5f differ
diff --git a/backends/trained_policies/keeplane/keeplane_weights_critic.h5f b/backends/trained_policies/keeplane/keeplane_weights_critic.h5f
index e5b4bf77af956cdae29a45518d768738fe647b6c..d7a70181897d2632e8c43446ee15c6dde69f294c 100644
Binary files a/backends/trained_policies/keeplane/keeplane_weights_critic.h5f and b/backends/trained_policies/keeplane/keeplane_weights_critic.h5f differ
diff --git a/backends/trained_policies/stop/stop_weights_actor.h5f b/backends/trained_policies/stop/stop_weights_actor.h5f
index 7ef9b12a24464c9c9c4d0ca0bd625b6d1875efb9..1f89ca2104f5801eb243b0898c3f050f91761f77 100644
Binary files a/backends/trained_policies/stop/stop_weights_actor.h5f and b/backends/trained_policies/stop/stop_weights_actor.h5f differ
diff --git a/backends/trained_policies/stop/stop_weights_critic.h5f b/backends/trained_policies/stop/stop_weights_critic.h5f
index 41a357a5e68aac591dea24edb47f5339960c0c90..12d7d202761bfafba4f6e9d564c154926bbc72a8 100644
Binary files a/backends/trained_policies/stop/stop_weights_critic.h5f and b/backends/trained_policies/stop/stop_weights_critic.h5f differ
diff --git a/options/simple_intersection/maneuver_base.py b/options/simple_intersection/maneuver_base.py
index 78f7a63a67dd19ce93ea0f08e5ae0c9749616f3f..1d9b45cfe36b0efce5f508e722b36bd89ea0f974 100644
--- a/options/simple_intersection/maneuver_base.py
+++ b/options/simple_intersection/maneuver_base.py
@@ -29,7 +29,7 @@ class ManeuverBase(EpisodicEnvBase):
     # _extra_action_weights_flag = True); note that a cost is defined
     # as a negative reward, so a cost will be summed up to the reward
     # with subtraction.
-    _cost_weights = (1.0 * 1e-3, 1.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3,
+    _cost_weights = (10.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3,
                      100.0 * 1e-3, 0.1 * 1e-3, 0.25 * 1e-3, 0.1 * 1e-3)
 
     _extra_r_terminal = None
@@ -292,9 +292,8 @@ class ManeuverBase(EpisodicEnvBase):
         raise NotImplemented(self.__class__.__name__ +
                              ".generate_learning_scenario is not implemented.")
 
-    def generate_validation_scenario(
-            self
-    ):  # Override this method in the subclass if some customization is needed.
+    # Override this method in the subclass if some customization is needed.
+    def generate_validation_scenario(self):
         self.generate_learning_scenario()
         self._enable_low_level_training_properties = False
 
diff --git a/options/simple_intersection/maneuvers.py b/options/simple_intersection/maneuvers.py
index 7b6ccf4ac243ff0be013a8164a3263a45ae895d1..748fb0ea0c1dd0743f00e1b683986e7186a31e2a 100644
--- a/options/simple_intersection/maneuvers.py
+++ b/options/simple_intersection/maneuvers.py
@@ -16,30 +16,51 @@ class KeepLane(ManeuverBase):
 
     def _init_LTL_preconditions(self):
         self._LTL_preconditions.append(LTLProperty("G ( not veh_ahead )", 0))
-        self._LTL_preconditions.append(
-            LTLProperty("G ( not stopped_now )", 200,
-                        self._enable_low_level_training_properties))
+        #self._LTL_preconditions.append(
+        #    LTLProperty("G ( not stopped_now )", 100,
+        #                self._enable_low_level_training_properties))
         self._LTL_preconditions.append(
             LTLProperty(
                 "G ( (lane and target_lane) or (not lane and not target_lane) )",
-                200, self._enable_low_level_training_properties))
+                100, self._enable_low_level_training_properties))
 
     def generate_learning_scenario(self):
         self.generate_scenario(
             enable_LTL_preconditions=False,
-            ego_pos_range=(rd.hlanes.start_pos, rd.hlanes.end_pos),
+            ego_pos_range=(rd.intersection_width_w_offset, rd.hlanes.end_pos),
             ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
+            v_max_multiplier=0.75,
             ego_heading_towards_lane_centre=True)
         # the goal reward and termination is led by the SimpleIntersectionEnv
-        self.env._terminate_in_goal = True
-        self.env._reward_in_goal = 200
+        self.env._terminate_in_goal = False
+        self.env._reward_in_goal = None
         self._enable_low_level_training_properties = True
 
+    def generate_validation_scenario(self):
+        self.generate_scenario(
+            enable_LTL_preconditions=False,
+            ego_pos_range=(rd.hlanes.start_pos, 0),
+            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
+            ego_heading_towards_lane_centre=True)
+        # the goal reward and termination is led by the SimpleIntersectionEnv
+        self.env._terminate_in_goal = False
+        self.env._reward_in_goal = None
+
     @staticmethod
     def _features_dim_reduction(features_tuple):
-        return extract_ego_features(features_tuple, 'pos_near_stop_region',
-                                    'v', 'v_ref', 'e_y', 'psi', 'theta', 'acc',
-                                    'psi_dot')
+        return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot')
+
+    @property
+    def extra_termination_condition(self):
+        if self._enable_low_level_training_properties:  # activated only for the low-level training.
+            if (self.env.ego.v < self._v_ref / 5) and self.env.ego.acc < 0:
+                self._extra_r_terminal = -100
+                return True
+            else:
+                self._extra_r_terminal = None
+                return False
+
+        return False
 
 
 class Stop(ManeuverBase):
@@ -47,28 +68,30 @@ class Stop(ManeuverBase):
     _terminate_in_goal = True
     _reward_in_goal = None
 
+    _penalty_in_violation = None
+
     def _init_param(self):
         self._set_v_ref()
         self._target_lane = self.env.ego.APs['lane']
 
     def _init_LTL_preconditions(self):
-        self._LTL_preconditions.append(
-            LTLProperty("G ( not has_stopped_in_stop_region )",
-                        self._penalty(self._reward_in_goal)))
+        #self._LTL_preconditions.append(
+        #    LTLProperty("G ( not has_stopped_in_stop_region )",
+        #                self._penalty(self._reward_in_goal)))
 
         self._LTL_preconditions.append(
             LTLProperty(
                 "G ( (before_but_close_to_stop_region or in_stop_region) U has_stopped_in_stop_region )",
-                0))
+                self._penalty_in_violation))
 
         self._LTL_preconditions.append(
-            LTLProperty("G ( not stopped_now U in_stop_region )", 200,
+            LTLProperty("G ( not stopped_now U in_stop_region )", 100,
                         self._enable_low_level_training_properties))
 
         self._LTL_preconditions.append(
             LTLProperty(
                 "G ( (lane and target_lane) or (not lane and not target_lane) )",
-                200, self._enable_low_level_training_properties))
+                100, self._enable_low_level_training_properties))
 
     def _update_param(self):
         self._set_v_ref()
@@ -89,8 +112,10 @@ class Stop(ManeuverBase):
             ego_pos_range=(rd.hlanes.near_stop_region,
                            -rd.intersection_width_w_offset / 2),
             ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
+            v_max_multiplier=0.75,
             ego_heading_towards_lane_centre=True)
         self._reward_in_goal = 200
+        self._penalty_in_violation = 150
         self._enable_low_level_training_properties = True
 
     def _low_level_manual_policy(self):
@@ -99,8 +124,32 @@ class Stop(ManeuverBase):
     @staticmethod
     def _features_dim_reduction(features_tuple):
         return extract_ego_features(features_tuple, 'pos_near_stop_region',
-                                    'v', 'v_ref', 'e_y', 'psi', 'theta', 'acc',
-                                    'psi_dot', 'not_in_stop_region')
+                                    'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc',
+                                    'psi_dot', 'pos_stop_region', 'not_in_stop_region')
+
+    @property
+    def extra_termination_condition(self):
+        if self.env.ego.APs['has_stopped_in_stop_region']:
+            if self._reward_in_goal is not None:
+                self._extra_r_terminal = self._reward_in_goal
+                self._extra_r_terminal *= np.exp(- pow(self.env.ego.theta, 2)
+                                                 - pow(self.env.ego.y - rd.hlanes.centres[self._target_lane], 2)
+                                                 - 0.25*pow(self.env.ego.psi, 2))
+            else:
+                self._extra_r_terminal = None
+            return True
+
+        elif self._enable_low_level_training_properties:  # activated only for the low-level training.
+            if (rd.speed_limit / 5 < self._v_ref) and \
+                    (self.env.ego.v < self._v_ref / 2) and self.env.ego.acc < 0:
+                self._extra_r_terminal = -100
+                return True
+
+            else:
+                self._extra_r_terminal = None
+                return False
+
+        return False
 
 
 class Wait(ManeuverBase):
@@ -111,32 +160,38 @@ class Wait(ManeuverBase):
     def _init_LTL_preconditions(self):
         self._LTL_preconditions.append(
             LTLProperty(
-                "G ( (in_stop_region and stopped_now) U highest_priority )",
-                0))
+                "G ( (in_stop_region and stopped_now) U (highest_priority and intersection_is_clear))", 0,
+                not self._enable_low_level_training_properties))  # not available in low-level training...
 
         self._LTL_preconditions.append(
             LTLProperty("G ( not (in_intersection and highest_priority) )",
                         self._penalty(self._reward_in_goal)))
 
+        self._LTL_preconditions.append(
+            LTLProperty(
+                "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150, self._enable_low_level_training_properties))
+
+        self._LTL_preconditions.append(
+            LTLProperty(
+                "G ( (lane and target_lane) or (not lane and not target_lane) )",
+                100, self._enable_low_level_training_properties))
+
     def _init_param(self):
-        ego = self.env.ego
-        self._v_ref = rd.speed_limit if self.env.ego.APs[
-            'highest_priority'] else 0
-        self._target_lane = ego.APs['lane']
-        self._ego_stop_count = 0
+        self._update_param()
+        self._target_lane = self.env.ego.APs['lane']
 
     def _update_param(self):
-        if self.env.ego.APs['highest_priority']:
-            self._v_ref = rd.speed_limit
-            if self._enable_low_level_training_properties:
-                if self.env.n_others_with_higher_priority == 0:
-                    self._ego_stop_count += 1
+        if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear']:
+            self._v_ref = rd.speed_limit / 5
+        else:
+            self._v_ref = 0
 
     def generate_learning_scenario(self):
-        n_others = np.random.randint(0, 3)
+
+        n_others = 0 if np.random.rand() <= 0 else np.random.randint(1, 4)
+
         self.generate_scenario(
             enable_LTL_preconditions=True,
-            timeout=62,
             n_others_range=(n_others, n_others),
             ego_pos_range=rd.hlanes.stop_region,
             n_others_stopped_in_stop_region=n_others,
@@ -145,32 +200,46 @@ class Wait(ManeuverBase):
             ego_heading_towards_lane_centre=True)
 
         max_waited_count = 0
+        min_waited_count = 1
         for veh in self.env.vehs[1:]:
             max_waited_count = max(max_waited_count, veh.waited_count)
+            min_waited_count = min(min_waited_count, veh.waited_count)
+
+        min_waited_count = min(min_waited_count, max_waited_count)
 
         self._extra_action_weights_flag = False
-        self.env.ego.waited_count = np.random.randint(0, max_waited_count + 21)
+
+        if np.random.rand() <= 0.2:
+            self.env.ego.waited_count = np.random.randint(0, min_waited_count+1)
+        else:
+            self.env.ego.waited_count = np.random.randint(min_waited_count, max_waited_count + 21)
+
         self.env.init_APs(False)
 
         self._reward_in_goal = 200
-        self._extra_r_on_timeout = -200
         self._enable_low_level_training_properties = True
-        self._ego_stop_count = 0
+
+    def generate_validation_scenario(self):
+        super().generate_validation_scenario()
+        #self._enable_low_level_training_properties = True
 
     @property
     def extra_termination_condition(self):
         if self._enable_low_level_training_properties:  # activated only for the low-level training.
-            if self._ego_stop_count >= 50:
-                self._extra_r_terminal = -200
+            if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear'] \
+                    and np.random.rand() <= 0.05 and self.env.ego.v <= self._v_ref / 10:
+                self._extra_r_terminal = - 100
                 return True
             else:
                 self._extra_r_terminal = None
                 return False
 
+        return False
+
     @staticmethod
     def _features_dim_reduction(features_tuple):
         return extract_ego_features(
-            features_tuple, 'v', 'v_ref', 'psi', 'theta', 'acc', 'psi_dot',
+            features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot',
             'pos_stop_region', 'intersection_is_clear', 'highest_priority')
 
 
@@ -195,10 +264,10 @@ class ChangeLane(ManeuverBase):
             LTLProperty("G ( on_route and not over_speed_limit )",
                         self._violation_penalty_in_low_level_training,
                         self._enable_low_level_training_properties))
+
         self._LTL_preconditions.append(
             LTLProperty("G ( not stopped_now )",
-                        self._violation_penalty_in_low_level_training,
-                        self._enable_low_level_training_properties))
+                        100, self._enable_low_level_training_properties))
 
     @property
     def goal_achieved(self):
@@ -217,12 +286,25 @@ class ChangeLane(ManeuverBase):
         self.generate_scenario(
             enable_LTL_preconditions=False,
             timeout=15,
-            ego_pos_range=(rd.hlanes.start_pos, rd.hlanes.end_pos),
+            ego_pos_range=(rd.intersection_width_w_offset, rd.hlanes.end_pos),
             ego_lane=np.random.choice([0, 1]),
-            ego_perturb_lim=(rd.hlanes.width / 5, np.pi / 6))
+            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
+            v_max_multiplier=0.75)
+
         # print('our range was %s, %s, ego at %s' % (before_intersection, after_intersection, self.env.ego.x))
         self._reward_in_goal = 200
-        self._violation_penalty_in_low_level_training = 200
+        self._violation_penalty_in_low_level_training = 150
+        self._enable_low_level_training_properties = True
+
+    def generate_validation_scenario(self):
+        self.generate_scenario(
+            enable_LTL_preconditions=False,
+            ego_pos_range=(rd.hlanes.start_pos, rd.hlanes.end_pos),
+            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6))
+
+        # print('our range was %s, %s, ego at %s' % (before_intersection, after_intersection, self.env.ego.x))
+        self._reward_in_goal = 200
+        self._violation_penalty_in_low_level_training = 150
         self._enable_low_level_training_properties = True
 
     # TODO: It is not a good idea to specify features by numbers, as the list
@@ -237,49 +319,77 @@ class ChangeLane(ManeuverBase):
 class Follow(ManeuverBase):
 
     _target_veh_i = None
-    _penalty_for_out_of_follow_range = None
+    _penalty_for_out_of_range = None
+    _penalty_for_change_lane = None
 
     def _init_LTL_preconditions(self):
         self._LTL_preconditions.append(
-            LTLProperty("G ( veh_ahead )",
-                        self._penalty_for_out_of_follow_range))
+            LTLProperty("G ( veh_ahead )", self._penalty_for_out_of_range))
 
         self._LTL_preconditions.append(
             LTLProperty(
                 "G ( (lane and target_lane) or (not lane and not target_lane) )",
-                self._penalty_for_out_of_follow_range))
+                self._penalty_for_change_lane))
+
+        #self._LTL_preconditions.append(
+        #    LTLProperty("G ( not stopped_now U veh_ahead_stopped_now)", 200,
+        #                self._enable_low_level_training_properties))
 
         self._LTL_preconditions.append(
-            LTLProperty("G ( not stopped_now U veh_ahead_stopped_now)", 200,
+            LTLProperty("G ( not veh_ahead_too_close )", self._penalty_for_out_of_range,
                         self._enable_low_level_training_properties))
 
         self._LTL_preconditions.append(
-            LTLProperty("G ( not veh_ahead_too_close )", 200,
-                        self._enable_low_level_training_properties))
+            LTLProperty("G ( not in_stop_region)", 0, not self._enable_low_level_training_properties))
 
     def generate_learning_scenario(self):
         self.generate_scenario(
             enable_LTL_preconditions=False,
             n_others_range=(1, 1),
-            ego_perturb_lim=(rd.hlanes.width / 2, np.pi / 4),
+            v_max_multiplier=0.75,
+            ego_perturb_lim=(0, 0),
             veh_ahead_scenario=True)
         self.env._terminate_in_goal = False
-        self._penalty_for_out_of_follow_range = 200
+        self._penalty_for_out_of_range = 200
+        self._penalty_for_change_lane = 170
         self._enable_low_level_training_properties = True
 
+    def generate_validation_scenario(self):
+        self.generate_learning_scenario()
+
     def _update_param(self):
         self._target_veh_i, _ = self.env.get_V2V_distance()
 
+        if self._target_veh_i is not None:
+            self._v_ref = self.env.vehs[self._target_veh_i].v
+
     def _low_level_manual_policy(self):
         return self.env.aggressive_driving_policy(EGO_INDEX)
 
+    @property
+    def extra_termination_condition(self):
+        APs = self.env.ego.APs
+
+        if self._target_veh_i is None:
+            return False
+
+        elif not self._enable_low_level_training_properties:  # activated only for the high-level training.
+            if (APs['in_stop_region'] or APs['before_but_close_to_stop_region']) \
+                and (self.env.vehs[self._target_veh_i].APs['in_intersection'] or
+                     self.env.vehs[self._target_veh_i].x > 0):
+                return True
+            else:
+                return False
+
+        return False
+
     def _features_dim_reduction(self, features_tuple):
         ego_features = extract_ego_features(
-            features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)',
-            'theta', 'lane', 'e_y,lane', 'acc', 'psi_dot')
+            features_tuple, 'pos_near_stop_region', 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)',
+            'theta', 'lane', 'acc', 'psi_dot')
+
         if self._target_veh_i is not None:
             return ego_features + extract_other_veh_features(
-                features_tuple, self._target_veh_i, 'rel_x', 'rel_y', 'v',
-                'acc')
+                features_tuple, self._target_veh_i, 'rel_x', 'rel_y', 'v', 'acc')
         else:
             return ego_features + (0.0, 0.0, 0.0, 0.0)