diff --git a/backends/controller_base.py b/backends/controller_base.py
index 634756b5174c0f782764b31942abb1478bf75ce0..16170366e002e44d714c67331e4ef6093f6fbe97 100644
--- a/backends/controller_base.py
+++ b/backends/controller_base.py
@@ -51,7 +51,7 @@ class ControllerBase(PolicyBase):
     
     Returns state at end of node execution, total reward, epsiode_termination_flag, info
     '''
-
+    # TODO: this is never called when you test high-level policy rather than train...
     def step_current_node(self, visualize_low_level_steps=False):
         total_reward = 0
         self.node_terminal_state_reached = False
@@ -59,6 +59,7 @@ class ControllerBase(PolicyBase):
             observation, reward, terminal, info = self.low_level_step_current_node()
             if visualize_low_level_steps:
                 self.env.render()
+            # TODO: make the total_reward discounted....
             total_reward += reward
 
         total_reward += self.current_node.high_level_extra_reward
diff --git a/backends/kerasrl_learner.py b/backends/kerasrl_learner.py
index a98dc928bec3d63e0a3975f85144e98671710b95..a24c4daa9bd45c548e3278597547ef768db6698a 100644
--- a/backends/kerasrl_learner.py
+++ b/backends/kerasrl_learner.py
@@ -269,9 +269,12 @@ class DQNLearner(LearnerBase):
         """
         model = Sequential()
         model.add(Flatten(input_shape=(1, ) + self.input_shape))
-        model.add(Dense(32))
+        #model.add(Dense(64))
+        model.add(Dense(64))
         model.add(Activation('relu'))
-        model.add(Dense(32))
+        model.add(Dense(64))
+        model.add(Activation('relu'))
+        model.add(Dense(64))
         model.add(Activation('relu'))
         model.add(Dense(self.nb_actions))
         model.add(Activation('linear'))
@@ -435,6 +438,7 @@ class DQNAgentOverOptions(DQNAgent):
         initiation conditions."""
         invalid_node_indices = list()
         for index, option_alias in enumerate(self.low_level_policy_aliases):
+            # TODO: Locate reset_maneuver to another place as this is a "get" function.
             self.low_level_policies[option_alias].reset_maneuver()
             if not self.low_level_policies[option_alias].initiation_condition:
                 invalid_node_indices.append(index)
diff --git a/backends/manual_policy.py b/backends/manual_policy.py
index 6e59db13b580d3762507f6fe286431cbb7b02ceb..3876b1d97dd2754309689d9b229d6fd6cb7ab7a6 100644
--- a/backends/manual_policy.py
+++ b/backends/manual_policy.py
@@ -30,6 +30,7 @@ class ManualPolicy(ControllerBase):
         new_node = None
         if self.low_level_policies[self.current_node].termination_condition:
             for next_node in self.adj[self.current_node]:
+                self.low_level_policies[next_node].reset_maneuver()
                 if self.low_level_policies[next_node].initiation_condition:
                     new_node = next_node
                     break  # change current_node to the highest priority next node
diff --git a/backends/trained_policies/highlevel/highlevel_weights.h5f b/backends/trained_policies/highlevel/highlevel_weights.h5f
index 6dcc7ba4718d82e329f833ff23a8c68595309c61..b3c5347197f637d893b80c7258a6504d4368c59b 100644
Binary files a/backends/trained_policies/highlevel/highlevel_weights.h5f and b/backends/trained_policies/highlevel/highlevel_weights.h5f differ
diff --git a/backends/trained_policies/highlevel/highlevel_weights_772.h5f b/backends/trained_policies/highlevel/highlevel_weights_772.h5f
deleted file mode 100644
index 7b986c74005a62b1dc11cbdd3022105ec5317d37..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/highlevel/highlevel_weights_772.h5f and /dev/null differ
diff --git a/backends/trained_policies/immediatestop/immediatestop_weights_actor.h5f b/backends/trained_policies/immediatestop/immediatestop_weights_actor.h5f
new file mode 100644
index 0000000000000000000000000000000000000000..3ade0ae38ec13eb1040c1a4dc8a91390d9ed6752
Binary files /dev/null and b/backends/trained_policies/immediatestop/immediatestop_weights_actor.h5f differ
diff --git a/backends/trained_policies/immediatestop/immediatestop_weights_critic.h5f b/backends/trained_policies/immediatestop/immediatestop_weights_critic.h5f
new file mode 100644
index 0000000000000000000000000000000000000000..2b97e53475d094a51c29cc8ed82f5bdc47f9ea3f
Binary files /dev/null and b/backends/trained_policies/immediatestop/immediatestop_weights_critic.h5f differ
diff --git a/backends/trained_policies/left/left_weights_actor.h5f b/backends/trained_policies/left/left_weights_actor.h5f
new file mode 100644
index 0000000000000000000000000000000000000000..3f6e76aa921b07e851ab4eac8045e74c0c28e848
Binary files /dev/null and b/backends/trained_policies/left/left_weights_actor.h5f differ
diff --git a/backends/trained_policies/left/left_weights_critic.h5f b/backends/trained_policies/left/left_weights_critic.h5f
new file mode 100644
index 0000000000000000000000000000000000000000..b53dfd8f69aa96544b986a2de8c506b20162358f
Binary files /dev/null and b/backends/trained_policies/left/left_weights_critic.h5f differ
diff --git a/backends/trained_policies/right/right_weights_actor.h5f b/backends/trained_policies/right/right_weights_actor.h5f
new file mode 100644
index 0000000000000000000000000000000000000000..3f6e76aa921b07e851ab4eac8045e74c0c28e848
Binary files /dev/null and b/backends/trained_policies/right/right_weights_actor.h5f differ
diff --git a/backends/trained_policies/right/right_weights_critic.h5f b/backends/trained_policies/right/right_weights_critic.h5f
new file mode 100644
index 0000000000000000000000000000000000000000..b53dfd8f69aa96544b986a2de8c506b20162358f
Binary files /dev/null and b/backends/trained_policies/right/right_weights_critic.h5f differ
diff --git a/backends/trained_policies/wait/wait_weights_actor.h5f b/backends/trained_policies/wait/wait_weights_actor.h5f
deleted file mode 100644
index 00703d9cc1931bc31083f9a77575598a030f92c3..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/wait/wait_weights_actor.h5f and /dev/null differ
diff --git a/backends/trained_policies/wait/wait_weights_critic.h5f b/backends/trained_policies/wait/wait_weights_critic.h5f
deleted file mode 100644
index 330e90c08e129f18a05e2a54882f8bb9fffc1957..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/wait/wait_weights_critic.h5f and /dev/null differ
diff --git a/config.json b/config.json
index b8d9b1c65aeebae2c3a2ccf40f6f9c6b4b6de70d..5e40c774e0957492c3db2f94b74dfde642b396a2 100644
--- a/config.json
+++ b/config.json
@@ -3,7 +3,7 @@
 		"wait": "Wait",
 		"follow": "Follow",
 		"stop": "Stop",
-		"changelane": "ChangeLane",
+		"immediatestop": "ImmediateStop",
 		"keeplane": "KeepLane"
 	},
 
diff --git a/env/simple_intersection/features.py b/env/simple_intersection/features.py
index 2b70e51fa7357378e061ea599566ae0fea1dc5b2..2c422c6a2ffa97d16d47dd118306e8d289db2789 100644
--- a/env/simple_intersection/features.py
+++ b/env/simple_intersection/features.py
@@ -174,6 +174,6 @@ class Features(object):
 
         # Add buffer features to make a fixed length feature vector
         for i in range(MAX_NUM_VEHICLES - len(self.other_vehs)):
-            feature += (0.0, 0.0, 0.0, 0.0, -1)
+            feature += (0.0, 0.0, 0.0, 0.0, -1.0)
 
         return feature
diff --git a/env/simple_intersection/simple_intersection_env.py b/env/simple_intersection/simple_intersection_env.py
index 1d61b91ad2a9300a4cbf657bbaf1a8c80dd9347e..3855aea8e699fd521c89b8b4b51039572307e970 100644
--- a/env/simple_intersection/simple_intersection_env.py
+++ b/env/simple_intersection/simple_intersection_env.py
@@ -1156,12 +1156,13 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase):
         Returns True if the environment has terminated
         """
 
-        model_checks_violated = (self._LTL_preconditions_enable and \
-            self.current_model_checking_result())
+        model_checks_violated = self._LTL_preconditions_enable and \
+                                self.current_model_checking_result()
         reached_goal = self._terminate_in_goal and self.goal_achieved
         self._check_collisions()
         self._check_ego_theta_out_of_range()
         terminated = self.termination_condition
+
         return model_checks_violated or reached_goal or terminated
 
     @property
@@ -1181,7 +1182,7 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase):
 
         return (self.ego.x >= rd.hlanes.end_pos) and \
                not self.collision_happened and \
-               not self.ego.APs['over_speed_limit']
+               (self.ego.v <= 1.1*rd.speed_limit)
 
     def reset(self):
         """Gym compliant reset function.
@@ -1229,7 +1230,6 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase):
         self.window.dispatch_events()
 
         # Text information about ego vehicle's states
-        # Right now, we are only training one option (Stop)
         info = "Ego Attributes:" + get_APs(
             self, EGO_INDEX, 'in_stop_region',
             'has_entered_stop_region', 'has_stopped_in_stop_region',
diff --git a/high_level_policy_main.py b/high_level_policy_main.py
index 3c5a0f3c787d7937618af5a76816372e5688a2d2..8204c7e2a3d641780a17d71723ee4bde7f6dec57 100644
--- a/high_level_policy_main.py
+++ b/high_level_policy_main.py
@@ -11,7 +11,7 @@ def high_level_policy_training(nb_steps=25000,
                                load_weights=False,
                                training=True,
                                testing=True,
-                               nb_episodes_for_test=10,
+                               nb_episodes_for_test=20,
                                max_nb_steps=100,
                                visualize=False,
                                tensorboard=False,
@@ -63,8 +63,7 @@ def high_level_policy_training(nb_steps=25000,
         agent.save_model(save_path)
 
     if testing:
-        options.set_controller_policy(agent.predict)
-        agent.test_model(options, nb_episodes=nb_episodes_for_test)
+        high_level_policy_testing(nb_episodes_for_test=nb_episodes_for_test)
 
     return agent
 
@@ -228,7 +227,6 @@ if __name__ == "__main__":
             load_weights=args.load_weights,
             save_path=args.save_file,
             tensorboard=args.tensorboard,
-            nb_episodes_for_test=20,
             visualize=args.visualize)
 
     if args.test:
diff --git a/options/options_loader.py b/options/options_loader.py
index f58554f80af5c0d85909776ab4251ddfe75b4cfb..bce1a6cf1b7d885fe256a2a8748264f9eadcf8cc 100644
--- a/options/options_loader.py
+++ b/options/options_loader.py
@@ -1,4 +1,5 @@
 import json
+import os  # for the use of os.path.isfile
 from .simple_intersection.maneuvers import *
 from .simple_intersection.mcts_maneuvers import *
 from backends import RLController, DDPGLearner, MCTSLearner, OnlineMCTSController, ManualPolicy
@@ -155,19 +156,34 @@ class OptionsGraph:
     # TODO: error handling
     def load_trained_low_level_policies(self):
         for key, maneuver in self.maneuvers.items():
-            agent = DDPGLearner(
-                input_shape=(maneuver.get_reduced_feature_length(), ),
-                nb_actions=2,
-                gamma=0.99,
-                nb_steps_warmup_critic=200,
-                nb_steps_warmup_actor=200,
-                lr=1e-3)
-            agent.load_model("backends/trained_policies/" + key + "/" + key +
-                             "_weights.h5f")
-            maneuver.set_low_level_trained_policy(agent.predict)
-            maneuver._cost_weights = (20.0 * 1e-3, 1.0 * 1e-3, 0.25 * 1e-3,
-                                      1.0 * 1e-3, 100.0 * 1e-3, 0.1 * 1e-3,
-                                      0.25 * 1e-3, 0.1 * 1e-3)
+            trained_policy_path = "backends/trained_policies/" + key + "/"
+            critic_file_exists = os.path.isfile(trained_policy_path + key + "_weights_critic.h5f")
+            actor_file_exists = os.path.isfile(trained_policy_path + key + "_weights_actor.h5f")
+
+            if actor_file_exists and critic_file_exists:
+                agent = DDPGLearner(
+                    input_shape=(maneuver.get_reduced_feature_length(),),
+                    nb_actions=2,
+                    gamma=0.99,
+                    nb_steps_warmup_critic=200,
+                    nb_steps_warmup_actor=200,
+                    lr=1e-3)
+                agent.load_model(trained_policy_path + key + "_weights.h5f")
+                maneuver.set_low_level_trained_policy(agent.predict)
+
+            elif not critic_file_exists and actor_file_exists:
+                print("\n Warning: unable to load the low-level policy of \"" + key +
+                      "\". the file of critic weights have to be located in the same " +
+                      "directory of the actor weights file; the manual policy will be used instead.\n")
+
+            else:
+                print("\n Warning: the trained low-level policy of \"" + key +
+                      "\" does not exists; the manual policy will be used.\n")
+
+            # setting the cost weights for high-level policy training.
+            # TODO: this shouldn't be initialized here, but within ManeuverBase class (e.g. make some flag indicating high-level training and set the weights)...
+            maneuver._cost_weights = (100.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3,
+                                     100.0 * 1e-3, 0.1 * 1e-3, 0.25 * 1e-3, 0.1 * 1e-3)
 
             if self.config["method"] == "mcts":
                 maneuver.timeout = np.inf
diff --git a/options/simple_intersection/maneuver_base.py b/options/simple_intersection/maneuver_base.py
index 1d9b45cfe36b0efce5f508e722b36bd89ea0f974..275e654c3c79590f3985da2f7afaadb458bf5906 100644
--- a/options/simple_intersection/maneuver_base.py
+++ b/options/simple_intersection/maneuver_base.py
@@ -20,7 +20,7 @@ class ManeuverBase(EpisodicEnvBase):
     learning_mode = 'training'
 
     #: timeout (i.e., time horizon for termination)
-    # By default, the time-out horizon is 1 as in Paxton et. al (2017).
+    # By default, the time-out horizon is 1.
     timeout = 1
 
     #: the option specific weight vector for cost of driving, which is
@@ -153,8 +153,7 @@ class ManeuverBase(EpisodicEnvBase):
                 # in this case, no additional reward by Default
                 # (i.e., self._extra_r_terminal = None by default).
                 self._terminal_reward_superposition(self._extra_r_terminal)
-                info[
-                    'maneuver_termination_reason'] = 'extra_termination_condition'
+                info['maneuver_termination_reason'] = 'extra_termination_condition'
             if self.timeout_happened:
                 if self._give_reward_on_timeout:
                     # in this case, no additional reward by Default
diff --git a/options/simple_intersection/maneuvers.py b/options/simple_intersection/maneuvers.py
index 748fb0ea0c1dd0743f00e1b683986e7186a31e2a..570b4acc558cfc6cf3ebdc015419ed64af006817 100644
--- a/options/simple_intersection/maneuvers.py
+++ b/options/simple_intersection/maneuvers.py
@@ -16,9 +16,11 @@ class KeepLane(ManeuverBase):
 
     def _init_LTL_preconditions(self):
         self._LTL_preconditions.append(LTLProperty("G ( not veh_ahead )", 0))
+
         #self._LTL_preconditions.append(
         #    LTLProperty("G ( not stopped_now )", 100,
         #                self._enable_low_level_training_properties))
+
         self._LTL_preconditions.append(
             LTLProperty(
                 "G ( (lane and target_lane) or (not lane and not target_lane) )",
@@ -63,6 +65,76 @@ class KeepLane(ManeuverBase):
         return False
 
 
+class ImmediateStop(ManeuverBase):
+
+    _terminate_in_goal = True
+    _reward_in_goal = None
+
+    _penalty_in_violation = None
+    _ego_pos_range = (rd.intersection_width_w_offset, rd.hlanes.end_pos)
+
+    def _init_param(self):
+        self._v_ref = 0 if self._enable_low_level_training_properties else rd.speed_limit
+        self._target_lane = self.env.ego.APs['lane']
+
+    def _init_LTL_preconditions(self):
+
+        self._LTL_preconditions.append(
+            LTLProperty(
+                "G ( (veh_ahead and before_but_close_to_stop_region) U highest_priority )",
+                None, not self._enable_low_level_training_properties))
+
+        self._LTL_preconditions.append(
+            LTLProperty("G ( not stopped_now )", self._penalty(self._reward_in_goal),
+                        not self._enable_low_level_training_properties))
+
+        self._LTL_preconditions.append(
+            LTLProperty(
+                "G ( (lane and target_lane) or (not lane and not target_lane) )",
+                100, self._enable_low_level_training_properties))
+
+    def generate_learning_scenario(self):
+        self.generate_scenario(
+            ego_pos_range=self._ego_pos_range,
+            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
+            ego_heading_towards_lane_centre=True)
+        self.env._terminate_in_goal = False
+        self.env._reward_in_goal = None
+        self._reward_in_goal = 200
+        self._enable_low_level_training_properties = True
+
+    def generate_validation_scenario(self):
+        self._ego_pos_range = (rd.hlanes.start_pos, rd.hlanes.end_pos)
+        self.generate_learning_scenario()
+
+    def _low_level_manual_policy(self):
+        return self.env.aggressive_driving_policy(EGO_INDEX)
+
+    @staticmethod
+    def _features_dim_reduction(features_tuple):
+        return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi',
+                                    'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot')
+
+    @property
+    def extra_termination_condition(self):
+        if self._enable_low_level_training_properties:  # activated only for the low-level training.
+            if self.env.ego.APs['stopped_now']:
+                if self._reward_in_goal is not None:
+                    self._extra_r_terminal = self._reward_in_goal
+                    self._extra_r_terminal *= np.exp(- pow(self.env.ego.theta, 2)
+                                                     - pow(self.env.ego.y - rd.hlanes.centres[self._target_lane], 2)
+                                                     - 0.25 * pow(self.env.ego.psi, 2))
+                else:
+                    self._extra_r_terminal = None
+                return True
+
+            else:
+                self._extra_r_terminal = None
+                return False
+
+        return False
+
+
 class Stop(ManeuverBase):
 
     _terminate_in_goal = True
@@ -79,6 +151,10 @@ class Stop(ManeuverBase):
         #    LTLProperty("G ( not has_stopped_in_stop_region )",
         #                self._penalty(self._reward_in_goal)))
 
+        self._LTL_preconditions.append(
+            LTLProperty("G ( not has_stopped_in_stop_region )", -150,
+                        not self._enable_low_level_training_properties))
+
         self._LTL_preconditions.append(
             LTLProperty(
                 "G ( (before_but_close_to_stop_region or in_stop_region) U has_stopped_in_stop_region )",
@@ -98,13 +174,11 @@ class Stop(ManeuverBase):
 
     def _set_v_ref(self):
         self._v_ref = rd.speed_limit
+        #if self._enable_low_level_training_properties:
         x = self.env.ego.x
-        if x <= rd.hlanes.near_stop_region:
-            self._v_ref = rd.speed_limit
-        elif x <= rd.hlanes.stop_region_centre:
-            self._v_ref = -(rd.speed_limit / abs(rd.hlanes.near_stop_region)
-                            ) * (x - rd.hlanes.stop_region_centre)
-        else:
+        if rd.hlanes.near_stop_region < x <= rd.hlanes.stop_region_centre:
+            self._v_ref = -(rd.speed_limit / abs(rd.hlanes.near_stop_region)) * (x - rd.hlanes.stop_region_centre)
+        elif x > rd.hlanes.stop_region_centre:
             self._v_ref = 0
 
     def generate_learning_scenario(self):
@@ -129,18 +203,18 @@ class Stop(ManeuverBase):
 
     @property
     def extra_termination_condition(self):
-        if self.env.ego.APs['has_stopped_in_stop_region']:
-            if self._reward_in_goal is not None:
-                self._extra_r_terminal = self._reward_in_goal
-                self._extra_r_terminal *= np.exp(- pow(self.env.ego.theta, 2)
-                                                 - pow(self.env.ego.y - rd.hlanes.centres[self._target_lane], 2)
-                                                 - 0.25*pow(self.env.ego.psi, 2))
-            else:
-                self._extra_r_terminal = None
-            return True
+        if self._enable_low_level_training_properties:  # activated only for the low-level training.
+            if self.env.ego.APs['has_stopped_in_stop_region']:
+                if self._reward_in_goal is not None:
+                    self._extra_r_terminal = self._reward_in_goal
+                    self._extra_r_terminal *= np.exp(- pow(self.env.ego.theta, 2)
+                                                     - pow(self.env.ego.y - rd.hlanes.centres[self._target_lane], 2)
+                                                     - 0.25 * pow(self.env.ego.psi, 2))
+                else:
+                    self._extra_r_terminal = None
+                return True
 
-        elif self._enable_low_level_training_properties:  # activated only for the low-level training.
-            if (rd.speed_limit / 5 < self._v_ref) and \
+            elif (rd.speed_limit / 5 < self._v_ref) and \
                     (self.env.ego.v < self._v_ref / 2) and self.env.ego.acc < 0:
                 self._extra_r_terminal = -100
                 return True
@@ -154,93 +228,91 @@ class Stop(ManeuverBase):
 
 class Wait(ManeuverBase):
 
-    _terminate_in_goal = True
     _reward_in_goal = None
+    _terminate_in_goal = True
 
     def _init_LTL_preconditions(self):
-        self._LTL_preconditions.append(
-            LTLProperty(
-                "G ( (in_stop_region and stopped_now) U (highest_priority and intersection_is_clear))", 0,
-                not self._enable_low_level_training_properties))  # not available in low-level training...
-
-        self._LTL_preconditions.append(
-            LTLProperty("G ( not (in_intersection and highest_priority) )",
-                        self._penalty(self._reward_in_goal)))
 
         self._LTL_preconditions.append(
-            LTLProperty(
-                "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150, self._enable_low_level_training_properties))
 
-        self._LTL_preconditions.append(
-            LTLProperty(
-                "G ( (lane and target_lane) or (not lane and not target_lane) )",
-                100, self._enable_low_level_training_properties))
+        LTLProperty(
+            "G ( (in_stop_region and stopped_now) and not (highest_priority and intersection_is_clear))", 0,
+            not self._enable_low_level_training_properties))  # not available in low-level training...
 
-    def _init_param(self):
-        self._update_param()
-        self._target_lane = self.env.ego.APs['lane']
+        #LTLProperty(
+        #    "G ( (in_stop_region and stopped_now) U (highest_priority and intersection_is_clear))", 0,
+        #    not self._enable_low_level_training_properties))  # not available in low-level training...
 
-    def _update_param(self):
-        if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear']:
-            self._v_ref = rd.speed_limit / 5
-        else:
-            self._v_ref = 0
+        #self._LTL_preconditions.append(
+        #    LTLProperty("G ( not (in_intersection and highest_priority) )",
+        #                self._penalty(self._reward_in_goal)))
 
-    def generate_learning_scenario(self):
+        #self._LTL_preconditions.append(
+        #    LTLProperty(
+        #        "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150, self._enable_low_level_training_properties))
 
-        n_others = 0 if np.random.rand() <= 0 else np.random.randint(1, 4)
+        #self._LTL_preconditions.append(
+        #    LTLProperty(
+        #        "G ( (lane and target_lane) or (not lane and not target_lane) )",
+        #        100, self._enable_low_level_training_properties))
 
-        self.generate_scenario(
-            enable_LTL_preconditions=True,
-            n_others_range=(n_others, n_others),
-            ego_pos_range=rd.hlanes.stop_region,
-            n_others_stopped_in_stop_region=n_others,
-            ego_v_upper_lim=0,
-            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
-            ego_heading_towards_lane_centre=True)
+    def _init_param(self):
+        self._v_ref = 0 #if self._enable_low_level_training_properties else rd.speed_limit
+        self._target_lane = self.env.ego.APs['lane']
 
-        max_waited_count = 0
-        min_waited_count = 1
-        for veh in self.env.vehs[1:]:
-            max_waited_count = max(max_waited_count, veh.waited_count)
-            min_waited_count = min(min_waited_count, veh.waited_count)
+    def _low_level_manual_policy(self):
+        return (0, 0)  # Do nothing during "Wait" but just wait until the highest priority is given.
 
-        min_waited_count = min(min_waited_count, max_waited_count)
+#    @staticmethod
+#    def _features_dim_reduction(features_tuple):
+#        return extract_ego_features(
+#            features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot',
+#            'pos_stop_region', 'intersection_is_clear', 'highest_priority')
 
-        self._extra_action_weights_flag = False
 
-        if np.random.rand() <= 0.2:
-            self.env.ego.waited_count = np.random.randint(0, min_waited_count+1)
-        else:
-            self.env.ego.waited_count = np.random.randint(min_waited_count, max_waited_count + 21)
+class Left(ManeuverBase):
 
-        self.env.init_APs(False)
+    min_y_distance = rd.hlanes.width / 4
 
-        self._reward_in_goal = 200
-        self._enable_low_level_training_properties = True
+    _terminate_in_goal = True
+    _reward_in_goal = None
 
-    def generate_validation_scenario(self):
-        super().generate_validation_scenario()
-        #self._enable_low_level_training_properties = True
+    def _init_param(self):
+        self._v_ref = rd.speed_limit
+        self._target_lane = False
+        self._terminate_in_goal = True
 
     @property
-    def extra_termination_condition(self):
-        if self._enable_low_level_training_properties:  # activated only for the low-level training.
-            if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear'] \
-                    and np.random.rand() <= 0.05 and self.env.ego.v <= self._v_ref / 10:
-                self._extra_r_terminal = - 100
-                return True
-            else:
-                self._extra_r_terminal = None
-                return False
+    def goal_achieved(self):
+        ego = self.env.ego
+        APs = self.env.ego.APs
+        on_other_lane = APs['lane'] == self._target_lane
+        achieved_y_displacement = np.sign(ego.y) * \
+                                  (ego.y - rd.hlanes.centres[APs['target_lane']]) >= - self.min_y_distance
+        return on_other_lane and APs['on_route'] and \
+               achieved_y_displacement and APs['parallel_to_lane']
 
-        return False
+    @property
+    def extra_initiation_condition(self):
+        return self.env.ego.APs['lane']
 
     @staticmethod
     def _features_dim_reduction(features_tuple):
-        return extract_ego_features(
-            features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot',
-            'pos_stop_region', 'intersection_is_clear', 'highest_priority')
+        return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi',
+                                    'v tan(psi/L)', 'theta', 'lane', 'acc',
+                                    'psi_dot')
+
+
+class Right(Left):
+
+    def _init_param(self):
+        self._v_ref = rd.speed_limit
+        self._target_lane = True
+        self._terminate_in_goal = True
+
+    @property
+    def extra_initiation_condition(self):
+        return not self.env.ego.APs['lane']
 
 
 class ChangeLane(ManeuverBase):
@@ -252,12 +324,11 @@ class ChangeLane(ManeuverBase):
 
     _violation_penalty_in_low_level_training = None
 
-    high_level_extra_reward = -20
+    # high_level_extra_reward = -1000000
 
     def _init_param(self):
         self._v_ref = rd.speed_limit
         self._target_lane = not self.env.ego.APs['lane']
-        self._terminate_in_goal = True
 
     def _init_LTL_preconditions(self):
         self._LTL_preconditions.append(
@@ -307,8 +378,6 @@ class ChangeLane(ManeuverBase):
         self._violation_penalty_in_low_level_training = 150
         self._enable_low_level_training_properties = True
 
-    # TODO: It is not a good idea to specify features by numbers, as the list
-    # of features is ever changing. We should specify them by strings.
     @staticmethod
     def _features_dim_reduction(features_tuple):
         return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi',
@@ -357,11 +426,22 @@ class Follow(ManeuverBase):
     def generate_validation_scenario(self):
         self.generate_learning_scenario()
 
+    def _init_param(self):
+        self._set_v_ref()
+
     def _update_param(self):
-        self._target_veh_i, _ = self.env.get_V2V_distance()
+        self._set_v_ref()
 
-        if self._target_veh_i is not None:
-            self._v_ref = self.env.vehs[self._target_veh_i].v
+    def _set_v_ref(self):
+        #if self._enable_low_level_training_properties:
+            self._target_veh_i, _ = self.env.get_V2V_distance()
+
+            if self._target_veh_i is not None:
+                self._v_ref = self.env.vehs[self._target_veh_i].v
+            else:
+                self._v_ref = 0
+        #else:
+        #    self._v_ref = rd.speed_limit
 
     def _low_level_manual_policy(self):
         return self.env.aggressive_driving_policy(EGO_INDEX)