diff --git a/backends/controller_base.py b/backends/controller_base.py
index 885f941c680612f6efbbdc94df26963e83bdefaf..ad3185664f1eab6d506a84117af20de75cbad57c 100644
--- a/backends/controller_base.py
+++ b/backends/controller_base.py
@@ -51,21 +51,21 @@ class ControllerBase(PolicyBase):
     
     Returns state at end of node execution, total reward, epsiode_termination_flag, info
     '''
-
+    # TODO: this is never called when you test high-level policy rather than train...
     def step_current_node(self, visualize_low_level_steps=False):
         total_reward = 0
         self.node_terminal_state_reached = False
         while not self.node_terminal_state_reached:
-            observation, reward, terminal, info = self.low_level_step_current_node(
-            )
+            observation, reward, terminal, info = self.low_level_step_current_node()
             if visualize_low_level_steps:
                 self.env.render()
+            # TODO: make the total_reward discounted....
             total_reward += reward
 
         total_reward += self.current_node.high_level_extra_reward
 
         # TODO for info
-        return observation, total_reward, self.env.termination_condition, info
+        return observation, total_reward, terminal, info
 
     # TODO: Looks generic. Move to an intermediate class/highlevel manager so that base class can be clean
     ''' Executes one step of current node. Sets node_terminal_state_reached flag if node termination condition
@@ -76,9 +76,7 @@ class ControllerBase(PolicyBase):
 
     def low_level_step_current_node(self):
 
-        u_ego = self.current_node.low_level_policy(
-            self.current_node.get_reduced_features_tuple())
+        u_ego = self.current_node.low_level_policy(self.current_node.get_reduced_features_tuple())
         feature, R, terminal, info = self.current_node.step(u_ego)
         self.node_terminal_state_reached = terminal
-        return self.env.get_features_tuple(
-        ), R, self.env.termination_condition, info
+        return self.env.get_features_tuple(), R, self.env.termination_condition, info
diff --git a/backends/kerasrl_learner.py b/backends/kerasrl_learner.py
index a98dc928bec3d63e0a3975f85144e98671710b95..0da901bd6f80a145b2d8980110a85105017af0f4 100644
--- a/backends/kerasrl_learner.py
+++ b/backends/kerasrl_learner.py
@@ -8,7 +8,7 @@ from keras.callbacks import TensorBoard
 from rl.agents import DDPGAgent, DQNAgent
 from rl.memory import SequentialMemory
 from rl.random import OrnsteinUhlenbeckProcess
-from rl.policy import BoltzmannQPolicy, MaxBoltzmannQPolicy
+from rl.policy import GreedyQPolicy, EpsGreedyQPolicy, MaxBoltzmannQPolicy
 
 from rl.callbacks import ModelIntervalCheckpoint
 
@@ -229,6 +229,7 @@ class DQNLearner(LearnerBase):
                  model=None,
                  policy=None,
                  memory=None,
+                 test_policy=None,
                  **kwargs):
         """The constructor which sets the properties of the class.
 
@@ -236,8 +237,8 @@ class DQNLearner(LearnerBase):
             input_shape: Shape of observation space, e.g (10,);
             nb_actions: number of values in action space;
             model: Keras Model of actor which takes observation as input and outputs actions. Uses default if not given
-            policy: KerasRL Policy. Uses default SequentialMemory if not given
-            memory: KerasRL Memory. Uses default BoltzmannQPolicy if not given
+            policy: KerasRL Policy. Uses default RestrictedEpsGreedyQPolicy if not given
+            memory: KerasRL Memory. Uses default SequentialMemory if not given
             **kwargs: other optional key-value arguments with defaults defined in property_defaults
         """
         super(DQNLearner, self).__init__(input_shape, nb_actions, **kwargs)
@@ -255,12 +256,14 @@ class DQNLearner(LearnerBase):
             model = self.get_default_model()
         if policy is None:
             policy = self.get_default_policy()
+        if test_policy is None:
+            test_policy = self.get_default_test_policy()
         if memory is None:
             memory = self.get_default_memory()
 
         self.low_level_policies = low_level_policies
 
-        self.agent_model = self.create_agent(model, policy, memory)
+        self.agent_model = self.create_agent(model, policy, memory, test_policy)
 
     def get_default_model(self):
         """Creates the default model.
@@ -269,9 +272,11 @@ class DQNLearner(LearnerBase):
         """
         model = Sequential()
         model.add(Flatten(input_shape=(1, ) + self.input_shape))
-        model.add(Dense(32))
+        model.add(Dense(64))
+        model.add(Activation('relu'))
+        model.add(Dense(64))
         model.add(Activation('relu'))
-        model.add(Dense(32))
+        model.add(Dense(64))
         model.add(Activation('relu'))
         model.add(Dense(self.nb_actions))
         model.add(Activation('linear'))
@@ -280,7 +285,10 @@ class DQNLearner(LearnerBase):
         return model
 
     def get_default_policy(self):
-        return MaxBoltzmannQPolicy(eps=0.3)
+        return RestrictedEpsGreedyQPolicy(0.3)
+
+    def get_default_test_policy(self):
+        return RestrictedGreedyQPolicy()
 
     def get_default_memory(self):
         """Creates the default memory model.
@@ -291,7 +299,7 @@ class DQNLearner(LearnerBase):
             limit=self.mem_size, window_length=self.mem_window_length)
         return memory
 
-    def create_agent(self, model, policy, memory):
+    def create_agent(self, model, policy, memory, test_policy):
         """Creates a KerasRL DDPGAgent with given components.
 
         Args:
@@ -309,6 +317,7 @@ class DQNLearner(LearnerBase):
             nb_steps_warmup=self.nb_steps_warmup,
             target_model_update=self.target_model_update,
             policy=policy,
+            test_policy=test_policy,
             enable_dueling_network=True)
 
         agent.compile(Adam(lr=self.lr), metrics=['mae'])
@@ -319,6 +328,8 @@ class DQNLearner(LearnerBase):
               env,
               nb_steps=1000000,
               visualize=False,
+              verbose=1,
+              log_interval=10000,
               nb_max_episode_steps=200,
               tensorboard=False,
               model_checkpoints=False,
@@ -338,7 +349,8 @@ class DQNLearner(LearnerBase):
             env,
             nb_steps=nb_steps,
             visualize=visualize,
-            verbose=1,
+            verbose=verbose,
+            log_interval=log_interval,
             nb_max_episode_steps=nb_max_episode_steps,
             callbacks=callbacks)
 
@@ -410,6 +422,82 @@ class DQNLearner(LearnerBase):
         return relevant
 
 
+class RestrictedEpsGreedyQPolicy(EpsGreedyQPolicy):
+    """Implement the epsilon greedy policy
+
+    Restricted Eps Greedy policy.
+    This policy ensures that it never chooses the action whose value is -inf
+
+    """
+
+    def __init__(self, eps=.1):
+        super(RestrictedEpsGreedyQPolicy, self).__init__(eps)
+
+    def select_action(self, q_values):
+        """Return the selected action
+
+        # Arguments
+            q_values (np.ndarray): List of the estimations of Q for each action
+
+        # Returns
+            Selection action
+        """
+        assert q_values.ndim == 1
+        nb_actions = q_values.shape[0]
+        index = list()
+
+        for i in range(0, nb_actions):
+            if q_values[i] != -np.inf:
+                index.append(i)
+
+        # every q_value is -np.inf (this sometimes inevitably happens within the fit and test functions
+        # of kerasrl at the terminal stage as they force to call forward in Kerasrl-learner which calls this function.
+        # TODO: exception process or some more process to choose action in this exceptional case.
+        if len(index) < 1:
+            # every q_value is -np.inf, we choose action = 0
+            action = 0
+            print("Warning: no action satisfies initiation condition, action = 0 is chosen by default.")
+
+        elif np.random.uniform() <= self.eps:
+            action = index[np.random.random_integers(0, len(index) - 1)]
+
+        else:
+            action = np.argmax(q_values)
+
+        return action
+
+
+class RestrictedGreedyQPolicy(GreedyQPolicy):
+    """Implement the epsilon greedy policy
+
+    Restricted Greedy policy.
+    This policy ensures that it never chooses the action whose value is -inf
+
+    """
+
+    def select_action(self, q_values):
+        """Return the selected action
+
+        # Arguments
+            q_values (np.ndarray): List of the estimations of Q for each action
+
+        # Returns
+            Selection action
+        """
+        assert q_values.ndim == 1
+
+        # TODO: exception process or some more process to choose action in this exceptional case.
+        if np.max(q_values) == - np.inf:
+            # every q_value is -np.inf, we choose action = 0
+            action = 0
+            print("Warning: no action satisfies initiation condition, action = 0 is chosen by default.")
+
+        else:
+            action = np.argmax(q_values)
+
+        return action
+
+
 class DQNAgentOverOptions(DQNAgent):
     def __init__(self,
                  model,
@@ -433,8 +521,10 @@ class DQNAgentOverOptions(DQNAgent):
     def __get_invalid_node_indices(self):
         """Returns a list of option indices that are invalid according to
         initiation conditions."""
+
         invalid_node_indices = list()
         for index, option_alias in enumerate(self.low_level_policy_aliases):
+            # TODO: Locate reset_maneuver to another place as this is a "get" function.
             self.low_level_policies[option_alias].reset_maneuver()
             if not self.low_level_policies[option_alias].initiation_condition:
                 invalid_node_indices.append(index)
diff --git a/backends/manual_policy.py b/backends/manual_policy.py
index 6e59db13b580d3762507f6fe286431cbb7b02ceb..3876b1d97dd2754309689d9b229d6fd6cb7ab7a6 100644
--- a/backends/manual_policy.py
+++ b/backends/manual_policy.py
@@ -30,6 +30,7 @@ class ManualPolicy(ControllerBase):
         new_node = None
         if self.low_level_policies[self.current_node].termination_condition:
             for next_node in self.adj[self.current_node]:
+                self.low_level_policies[next_node].reset_maneuver()
                 if self.low_level_policies[next_node].initiation_condition:
                     new_node = next_node
                     break  # change current_node to the highest priority next node
diff --git a/backends/trained_policies/0.1mil_weights/changelane_weights_actor.h5f b/backends/trained_policies/0.1mil_weights/changelane_weights_actor.h5f
deleted file mode 100644
index 739298c51cfa43433ff21b4962cbba17170ee7a9..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/changelane_weights_actor.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/changelane_weights_critic.h5f b/backends/trained_policies/0.1mil_weights/changelane_weights_critic.h5f
deleted file mode 100644
index 681b967fa6dbd6a2aaf46413891ca7f9229efeeb..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/changelane_weights_critic.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/follow_weights_actor.h5f b/backends/trained_policies/0.1mil_weights/follow_weights_actor.h5f
deleted file mode 100644
index fa417a1dc40b3f5e953e687315f960f2b1ad3e92..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/follow_weights_actor.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/follow_weights_critic.h5f b/backends/trained_policies/0.1mil_weights/follow_weights_critic.h5f
deleted file mode 100644
index 0105de348166a4cbaa29998174f5e89e82b3b86e..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/follow_weights_critic.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/keeplane_weights_actor.h5f b/backends/trained_policies/0.1mil_weights/keeplane_weights_actor.h5f
deleted file mode 100644
index 639fdd5b1889a604f6a61b4ad11b18ee9f703ea1..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/keeplane_weights_actor.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/keeplane_weights_critic.h5f b/backends/trained_policies/0.1mil_weights/keeplane_weights_critic.h5f
deleted file mode 100644
index bc791095159a0f5656381a1c2458bb5cab227c4c..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/keeplane_weights_critic.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/stop_weights_actor.h5f b/backends/trained_policies/0.1mil_weights/stop_weights_actor.h5f
deleted file mode 100644
index ee1d2af63e8812fc52a977d1db63342ffb5b0148..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/stop_weights_actor.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/stop_weights_critic.h5f b/backends/trained_policies/0.1mil_weights/stop_weights_critic.h5f
deleted file mode 100644
index ad2e374269254b3fe6151f484ba04b5b9c29af91..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/stop_weights_critic.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/wait_weights_actor.h5f b/backends/trained_policies/0.1mil_weights/wait_weights_actor.h5f
deleted file mode 100644
index a861c76ca25171a7644cb2fe3f75772fe7c5d6ae..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/wait_weights_actor.h5f and /dev/null differ
diff --git a/backends/trained_policies/0.1mil_weights/wait_weights_critic.h5f b/backends/trained_policies/0.1mil_weights/wait_weights_critic.h5f
deleted file mode 100644
index b20d0e3209408f46f31f2c46e6b6b51ce23b2b65..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/0.1mil_weights/wait_weights_critic.h5f and /dev/null differ
diff --git a/backends/trained_policies/changelane/changelane_weights_actor.h5f b/backends/trained_policies/changelane/changelane_weights_actor.h5f
index 0a7ae9e96102f399e64dc83c4cc77e9e3b7a44cd..3f6e76aa921b07e851ab4eac8045e74c0c28e848 100644
Binary files a/backends/trained_policies/changelane/changelane_weights_actor.h5f and b/backends/trained_policies/changelane/changelane_weights_actor.h5f differ
diff --git a/backends/trained_policies/changelane/changelane_weights_critic.h5f b/backends/trained_policies/changelane/changelane_weights_critic.h5f
index 40ca3a83088d8984002bc0a4546020174e9cd431..b53dfd8f69aa96544b986a2de8c506b20162358f 100644
Binary files a/backends/trained_policies/changelane/changelane_weights_critic.h5f and b/backends/trained_policies/changelane/changelane_weights_critic.h5f differ
diff --git a/backends/trained_policies/follow/follow_weights_actor.h5f b/backends/trained_policies/follow/follow_weights_actor.h5f
index bcb0af7c5012216d05eb966550600c28a3bc322c..0f4bf96684f16bf8fc8db62dd56a301f5a96b62f 100644
Binary files a/backends/trained_policies/follow/follow_weights_actor.h5f and b/backends/trained_policies/follow/follow_weights_actor.h5f differ
diff --git a/backends/trained_policies/follow/follow_weights_critic.h5f b/backends/trained_policies/follow/follow_weights_critic.h5f
index f6258b53c8589db6a04382245c31f20d7f0915dc..2f51be39f37e0956887e930d969caf9ea421e0c2 100644
Binary files a/backends/trained_policies/follow/follow_weights_critic.h5f and b/backends/trained_policies/follow/follow_weights_critic.h5f differ
diff --git a/backends/trained_policies/halt/immediatestop_weights_actor.h5f b/backends/trained_policies/halt/immediatestop_weights_actor.h5f
new file mode 100644
index 0000000000000000000000000000000000000000..3ade0ae38ec13eb1040c1a4dc8a91390d9ed6752
Binary files /dev/null and b/backends/trained_policies/halt/immediatestop_weights_actor.h5f differ
diff --git a/backends/trained_policies/halt/immediatestop_weights_critic.h5f b/backends/trained_policies/halt/immediatestop_weights_critic.h5f
new file mode 100644
index 0000000000000000000000000000000000000000..2b97e53475d094a51c29cc8ed82f5bdc47f9ea3f
Binary files /dev/null and b/backends/trained_policies/halt/immediatestop_weights_critic.h5f differ
diff --git a/backends/trained_policies/highlevel/highlevel_weights.h5f b/backends/trained_policies/highlevel/highlevel_weights.h5f
index 6dcc7ba4718d82e329f833ff23a8c68595309c61..b3c5347197f637d893b80c7258a6504d4368c59b 100644
Binary files a/backends/trained_policies/highlevel/highlevel_weights.h5f and b/backends/trained_policies/highlevel/highlevel_weights.h5f differ
diff --git a/backends/trained_policies/highlevel/highlevel_weights_772.h5f b/backends/trained_policies/highlevel/highlevel_weights_772.h5f
deleted file mode 100644
index 7b986c74005a62b1dc11cbdd3022105ec5317d37..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/highlevel/highlevel_weights_772.h5f and /dev/null differ
diff --git a/backends/trained_policies/keeplane/keeplane_weights_actor.h5f b/backends/trained_policies/keeplane/keeplane_weights_actor.h5f
index d9224714ec7717274033cb3f778800e6cacb690f..8efaf8b74e3fc805bbb373c3e24d3584df7ccf0c 100644
Binary files a/backends/trained_policies/keeplane/keeplane_weights_actor.h5f and b/backends/trained_policies/keeplane/keeplane_weights_actor.h5f differ
diff --git a/backends/trained_policies/keeplane/keeplane_weights_critic.h5f b/backends/trained_policies/keeplane/keeplane_weights_critic.h5f
index e5b4bf77af956cdae29a45518d768738fe647b6c..d7a70181897d2632e8c43446ee15c6dde69f294c 100644
Binary files a/backends/trained_policies/keeplane/keeplane_weights_critic.h5f and b/backends/trained_policies/keeplane/keeplane_weights_critic.h5f differ
diff --git a/backends/trained_policies/left/left_weights_actor.h5f b/backends/trained_policies/left/left_weights_actor.h5f
new file mode 100644
index 0000000000000000000000000000000000000000..3f6e76aa921b07e851ab4eac8045e74c0c28e848
Binary files /dev/null and b/backends/trained_policies/left/left_weights_actor.h5f differ
diff --git a/backends/trained_policies/left/left_weights_critic.h5f b/backends/trained_policies/left/left_weights_critic.h5f
new file mode 100644
index 0000000000000000000000000000000000000000..b53dfd8f69aa96544b986a2de8c506b20162358f
Binary files /dev/null and b/backends/trained_policies/left/left_weights_critic.h5f differ
diff --git a/backends/trained_policies/right/right_weights_actor.h5f b/backends/trained_policies/right/right_weights_actor.h5f
new file mode 100644
index 0000000000000000000000000000000000000000..3f6e76aa921b07e851ab4eac8045e74c0c28e848
Binary files /dev/null and b/backends/trained_policies/right/right_weights_actor.h5f differ
diff --git a/backends/trained_policies/right/right_weights_critic.h5f b/backends/trained_policies/right/right_weights_critic.h5f
new file mode 100644
index 0000000000000000000000000000000000000000..b53dfd8f69aa96544b986a2de8c506b20162358f
Binary files /dev/null and b/backends/trained_policies/right/right_weights_critic.h5f differ
diff --git a/backends/trained_policies/stop/stop_weights_actor.h5f b/backends/trained_policies/stop/stop_weights_actor.h5f
index 7ef9b12a24464c9c9c4d0ca0bd625b6d1875efb9..1f89ca2104f5801eb243b0898c3f050f91761f77 100644
Binary files a/backends/trained_policies/stop/stop_weights_actor.h5f and b/backends/trained_policies/stop/stop_weights_actor.h5f differ
diff --git a/backends/trained_policies/stop/stop_weights_critic.h5f b/backends/trained_policies/stop/stop_weights_critic.h5f
index 41a357a5e68aac591dea24edb47f5339960c0c90..12d7d202761bfafba4f6e9d564c154926bbc72a8 100644
Binary files a/backends/trained_policies/stop/stop_weights_critic.h5f and b/backends/trained_policies/stop/stop_weights_critic.h5f differ
diff --git a/backends/trained_policies/wait/wait_weights_actor.h5f b/backends/trained_policies/wait/wait_weights_actor.h5f
deleted file mode 100644
index 00703d9cc1931bc31083f9a77575598a030f92c3..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/wait/wait_weights_actor.h5f and /dev/null differ
diff --git a/backends/trained_policies/wait/wait_weights_critic.h5f b/backends/trained_policies/wait/wait_weights_critic.h5f
deleted file mode 100644
index 330e90c08e129f18a05e2a54882f8bb9fffc1957..0000000000000000000000000000000000000000
Binary files a/backends/trained_policies/wait/wait_weights_critic.h5f and /dev/null differ
diff --git a/env/env_base.py b/env/env_base.py
index 4a8b9a50d65d9113ec2d1c5f555ffd88d2e44050..038d59a6815566325a0c7716923d9425090897af 100644
--- a/env/env_base.py
+++ b/env/env_base.py
@@ -25,7 +25,8 @@ class EpisodicEnvBase(GymCompliantEnvBase):
 
     # three types possible ('min', 'max', or 'sum');
     # See _reward_superposition below.
-    terminal_reward_type = 'max'
+    # TODO: consider the case, where every terminal reward is None. Make this class have a default terminal value (not None) and use it in this case.
+    terminal_reward_type = 'min'
 
     #: If true, the maneuver terminates when the goal has been achieved.
     _terminate_in_goal = False
@@ -140,13 +141,11 @@ class EpisodicEnvBase(GymCompliantEnvBase):
 
     def _reset_model_checker(self, AP):
 
-        self.__mc_AP = int(AP)
-
         if self._LTL_preconditions_enable:
             for LTL_precondition in self._LTL_preconditions:
                 LTL_precondition.reset_property()
-                if LTL_precondition.enabled:
-                    LTL_precondition.check_incremental(self.__mc_AP)
+
+            self._incremental_model_checking(AP)
 
     def _set_mc_AP(self, AP):
         self.__mc_AP = int(AP)
@@ -158,7 +157,7 @@ class EpisodicEnvBase(GymCompliantEnvBase):
         if self._terminate_in_goal and self.goal_achieved:
             return True
 
-        return self.violation_happened and self._LTL_preconditions_enable
+        return self.violation_happened
 
     @property
     def goal_achieved(self):
@@ -176,8 +175,8 @@ class EpisodicEnvBase(GymCompliantEnvBase):
         if not self._LTL_preconditions_enable:
             return False
 
-        for LTL_precondition in self._LTL_preconditions:
-            if LTL_precondition.result == Parser.FALSE:
+        for LTL in self._LTL_preconditions:
+            if LTL.enabled and (LTL.result == Parser.FALSE):
                 return True
         return False
 
diff --git a/env/simple_intersection/features.py b/env/simple_intersection/features.py
index 2b70e51fa7357378e061ea599566ae0fea1dc5b2..2c422c6a2ffa97d16d47dd118306e8d289db2789 100644
--- a/env/simple_intersection/features.py
+++ b/env/simple_intersection/features.py
@@ -174,6 +174,6 @@ class Features(object):
 
         # Add buffer features to make a fixed length feature vector
         for i in range(MAX_NUM_VEHICLES - len(self.other_vehs)):
-            feature += (0.0, 0.0, 0.0, 0.0, -1)
+            feature += (0.0, 0.0, 0.0, 0.0, -1.0)
 
         return feature
diff --git a/env/simple_intersection/road_geokinemetry.py b/env/simple_intersection/road_geokinemetry.py
index 63190be6e9982cc9d14bc532bdbda0a7a666e3d0..602b3e5f9c800f211ad4ed20ec1ab9b952e8ff6f 100644
--- a/env/simple_intersection/road_geokinemetry.py
+++ b/env/simple_intersection/road_geokinemetry.py
@@ -93,7 +93,7 @@ vlanes = Route(
     [-vwidth - 5.0 - intersection_voffset, -vwidth - intersection_voffset], 35,
     [-4.0, 4.0])
 
-intersection_width = vlanes.n_lanes * vlanes.width
+intersection_width =  vlanes.n_lanes * vlanes.width
 intersection_height = hlanes.n_lanes * hlanes.width
 
 intersection_width_w_offset = intersection_width + 2 * intersection_hoffset
diff --git a/env/simple_intersection/simple_intersection_env.py b/env/simple_intersection/simple_intersection_env.py
index 1d61b91ad2a9300a4cbf657bbaf1a8c80dd9347e..bd344831411192ebbc4a58fe9e6c71a5360a17ca 100644
--- a/env/simple_intersection/simple_intersection_env.py
+++ b/env/simple_intersection/simple_intersection_env.py
@@ -51,7 +51,10 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase):
 
     #: The weight vector to calculate the cost. In the maneuver, cost_weights
     # can be set to a specific value which may be different than the default.
-    cost_weights = (1.0, 0.25, 0.1, 1.0, 100.0, 0.1, 0.25, 0.1)
+    # TODO: check _cost_weights in both here and ManeuverBase. The _cost_weights has to be substituted to here, but it doesn't sometimes.
+    # TODO: set a functionality of setting _cost_weights for low and high level training separately.
+    _cost_weights = (10.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3,
+                     100.0 * 1e-3, 0.1 * 1e-3, 0.05 * 1e-3, 0.1 * 1e-3)
 
     #TODO: Move this to constants
     # The empirical min and max of each term in the cost vector, which is used to normalize the values
@@ -271,7 +274,7 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase):
             # stopped_car_scenario = bool(np.random.randint(0, 1)) TODO: this scenario may not work
             n_others_stopped_in_stop_region = np.random.randint(
                 0, min(3, n_others - stopped_car_scenario))
-            veh_ahead_scenario = bool(np.random.randint(0, 1))
+            veh_ahead_scenario = bool(np.random.randint(0, 2)) or veh_ahead_scenario
 
         if n_others_stopped_in_stop_region > min(
                 n_others - stopped_car_scenario, 3):
@@ -1156,12 +1159,13 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase):
         Returns True if the environment has terminated
         """
 
-        model_checks_violated = (self._LTL_preconditions_enable and \
-            self.current_model_checking_result())
+        model_checks_violated = self._LTL_preconditions_enable and \
+                                self.current_model_checking_result()
         reached_goal = self._terminate_in_goal and self.goal_achieved
         self._check_collisions()
         self._check_ego_theta_out_of_range()
         terminated = self.termination_condition
+
         return model_checks_violated or reached_goal or terminated
 
     @property
@@ -1181,7 +1185,7 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase):
 
         return (self.ego.x >= rd.hlanes.end_pos) and \
                not self.collision_happened and \
-               not self.ego.APs['over_speed_limit']
+               (self.ego.v <= 1.1*rd.speed_limit)
 
     def reset(self):
         """Gym compliant reset function.
@@ -1229,7 +1233,6 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase):
         self.window.dispatch_events()
 
         # Text information about ego vehicle's states
-        # Right now, we are only training one option (Stop)
         info = "Ego Attributes:" + get_APs(
             self, EGO_INDEX, 'in_stop_region',
             'has_entered_stop_region', 'has_stopped_in_stop_region',
diff --git a/high_level_policy_main.py b/high_level_policy_main.py
index 3c5a0f3c787d7937618af5a76816372e5688a2d2..8204c7e2a3d641780a17d71723ee4bde7f6dec57 100644
--- a/high_level_policy_main.py
+++ b/high_level_policy_main.py
@@ -11,7 +11,7 @@ def high_level_policy_training(nb_steps=25000,
                                load_weights=False,
                                training=True,
                                testing=True,
-                               nb_episodes_for_test=10,
+                               nb_episodes_for_test=20,
                                max_nb_steps=100,
                                visualize=False,
                                tensorboard=False,
@@ -63,8 +63,7 @@ def high_level_policy_training(nb_steps=25000,
         agent.save_model(save_path)
 
     if testing:
-        options.set_controller_policy(agent.predict)
-        agent.test_model(options, nb_episodes=nb_episodes_for_test)
+        high_level_policy_testing(nb_episodes_for_test=nb_episodes_for_test)
 
     return agent
 
@@ -228,7 +227,6 @@ if __name__ == "__main__":
             load_weights=args.load_weights,
             save_path=args.save_file,
             tensorboard=args.tensorboard,
-            nb_episodes_for_test=20,
             visualize=args.visualize)
 
     if args.test:
diff --git a/options/options_loader.py b/options/options_loader.py
index f58554f80af5c0d85909776ab4251ddfe75b4cfb..f4f73869c67699775007b67c4175b724f57b3cbc 100644
--- a/options/options_loader.py
+++ b/options/options_loader.py
@@ -1,4 +1,5 @@
 import json
+import os  # for the use of os.path.isfile
 from .simple_intersection.maneuvers import *
 from .simple_intersection.mcts_maneuvers import *
 from backends import RLController, DDPGLearner, MCTSLearner, OnlineMCTSController, ManualPolicy
@@ -155,19 +156,29 @@ class OptionsGraph:
     # TODO: error handling
     def load_trained_low_level_policies(self):
         for key, maneuver in self.maneuvers.items():
-            agent = DDPGLearner(
-                input_shape=(maneuver.get_reduced_feature_length(), ),
-                nb_actions=2,
-                gamma=0.99,
-                nb_steps_warmup_critic=200,
-                nb_steps_warmup_actor=200,
-                lr=1e-3)
-            agent.load_model("backends/trained_policies/" + key + "/" + key +
-                             "_weights.h5f")
-            maneuver.set_low_level_trained_policy(agent.predict)
-            maneuver._cost_weights = (20.0 * 1e-3, 1.0 * 1e-3, 0.25 * 1e-3,
-                                      1.0 * 1e-3, 100.0 * 1e-3, 0.1 * 1e-3,
-                                      0.25 * 1e-3, 0.1 * 1e-3)
+            trained_policy_path = "backends/trained_policies/" + key + "/"
+            critic_file_exists = os.path.isfile(trained_policy_path + key + "_weights_critic.h5f")
+            actor_file_exists = os.path.isfile(trained_policy_path + key + "_weights_actor.h5f")
+
+            if actor_file_exists and critic_file_exists:
+                agent = DDPGLearner(
+                    input_shape=(maneuver.get_reduced_feature_length(),),
+                    nb_actions=2,
+                    gamma=0.99,
+                    nb_steps_warmup_critic=200,
+                    nb_steps_warmup_actor=200,
+                    lr=1e-3)
+                agent.load_model(trained_policy_path + key + "_weights.h5f")
+                maneuver.set_low_level_trained_policy(agent.predict)
+
+            elif not critic_file_exists and actor_file_exists:
+                print("\n Warning: unable to load the low-level policy of \"" + key +
+                      "\". the file of critic weights have to be located in the same " +
+                      "directory of the actor weights file; the manual policy will be used instead.\n")
+
+            else:
+                print("\n Warning: the trained low-level policy of \"" + key +
+                      "\" does not exists; the manual policy will be used.\n")
 
             if self.config["method"] == "mcts":
                 maneuver.timeout = np.inf
diff --git a/options/simple_intersection/maneuver_base.py b/options/simple_intersection/maneuver_base.py
index 78f7a63a67dd19ce93ea0f08e5ae0c9749616f3f..01e210db99ad439efea33561ba53cef298bf8193 100644
--- a/options/simple_intersection/maneuver_base.py
+++ b/options/simple_intersection/maneuver_base.py
@@ -20,7 +20,7 @@ class ManeuverBase(EpisodicEnvBase):
     learning_mode = 'training'
 
     #: timeout (i.e., time horizon for termination)
-    # By default, the time-out horizon is 1 as in Paxton et. al (2017).
+    # By default, the time-out horizon is 1.
     timeout = 1
 
     #: the option specific weight vector for cost of driving, which is
@@ -29,8 +29,9 @@ class ManeuverBase(EpisodicEnvBase):
     # _extra_action_weights_flag = True); note that a cost is defined
     # as a negative reward, so a cost will be summed up to the reward
     # with subtraction.
-    _cost_weights = (1.0 * 1e-3, 1.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3,
-                     100.0 * 1e-3, 0.1 * 1e-3, 0.25 * 1e-3, 0.1 * 1e-3)
+    # TODO: remove or to provide additional functionality, keep _cost_weights in ManeuverBase here (see other TODOs in simple_intersection_env regarding _cost_weights).
+    _cost_weights = (10.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3,
+                     100.0 * 1e-3, 0.1 * 1e-3, 0.05 * 1e-3, 0.1 * 1e-3)
 
     _extra_r_terminal = None
     _extra_r_on_timeout = None
@@ -38,7 +39,7 @@ class ManeuverBase(EpisodicEnvBase):
     #: the flag being False when _cost_weights is used without
     # modification; If True, then the action parts of _cost_weights
     # are increased for some edge cases (see the step method).
-    _extra_action_weights_flag = True
+    _extra_action_weights_flag = False
 
     #: the extra weights on the actions added to _cost_weights
     # for some edge cases when _extra_action_weights_flag = True.
@@ -153,8 +154,7 @@ class ManeuverBase(EpisodicEnvBase):
                 # in this case, no additional reward by Default
                 # (i.e., self._extra_r_terminal = None by default).
                 self._terminal_reward_superposition(self._extra_r_terminal)
-                info[
-                    'maneuver_termination_reason'] = 'extra_termination_condition'
+                info['maneuver_termination_reason'] = 'extra_termination_condition'
             if self.timeout_happened:
                 if self._give_reward_on_timeout:
                     # in this case, no additional reward by Default
@@ -292,9 +292,8 @@ class ManeuverBase(EpisodicEnvBase):
         raise NotImplemented(self.__class__.__name__ +
                              ".generate_learning_scenario is not implemented.")
 
-    def generate_validation_scenario(
-            self
-    ):  # Override this method in the subclass if some customization is needed.
+    # Override this method in the subclass if some customization is needed.
+    def generate_validation_scenario(self):
         self.generate_learning_scenario()
         self._enable_low_level_training_properties = False
 
@@ -334,8 +333,7 @@ class ManeuverBase(EpisodicEnvBase):
         Returns True if the condition is satisfied, and False otherwise.
         """
 
-        return not (self.env.termination_condition or self.violation_happened) and \
-               self.extra_initiation_condition
+        return not self.termination_condition and self.extra_initiation_condition
 
     @property
     def extra_initiation_condition(self):
diff --git a/options/simple_intersection/maneuvers.py b/options/simple_intersection/maneuvers.py
index 7b6ccf4ac243ff0be013a8164a3263a45ae895d1..67c8318d6c57801566d7693a60ceb2212dcbe64e 100644
--- a/options/simple_intersection/maneuvers.py
+++ b/options/simple_intersection/maneuvers.py
@@ -15,31 +15,125 @@ class KeepLane(ManeuverBase):
         self._target_lane = self.env.ego.APs['lane']
 
     def _init_LTL_preconditions(self):
-        self._LTL_preconditions.append(LTLProperty("G ( not veh_ahead )", 0))
+
         self._LTL_preconditions.append(
-            LTLProperty("G ( not stopped_now )", 200,
+            LTLProperty("G ( not stopped_now )", 100,
                         self._enable_low_level_training_properties))
+
         self._LTL_preconditions.append(
             LTLProperty(
                 "G ( (lane and target_lane) or (not lane and not target_lane) )",
-                200, self._enable_low_level_training_properties))
+                100, self._enable_low_level_training_properties))
 
     def generate_learning_scenario(self):
         self.generate_scenario(
             enable_LTL_preconditions=False,
-            ego_pos_range=(rd.hlanes.start_pos, rd.hlanes.end_pos),
+            ego_pos_range=(rd.intersection_width_w_offset, rd.hlanes.end_pos),
             ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
+            v_max_multiplier=0.75,
             ego_heading_towards_lane_centre=True)
         # the goal reward and termination is led by the SimpleIntersectionEnv
-        self.env._terminate_in_goal = True
-        self.env._reward_in_goal = 200
+        self.env._terminate_in_goal = False
+        self.env._reward_in_goal = None
         self._enable_low_level_training_properties = True
+        self._extra_action_weights_flag = True
+
+    def generate_validation_scenario(self):
+        self.generate_scenario(
+            enable_LTL_preconditions=False,
+            ego_pos_range=(rd.hlanes.start_pos, 0),
+            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
+            ego_heading_towards_lane_centre=True)
+        # the goal reward and termination is led by the SimpleIntersectionEnv
+        self.env._terminate_in_goal = False
+        self.env._reward_in_goal = None
 
     @staticmethod
     def _features_dim_reduction(features_tuple):
-        return extract_ego_features(features_tuple, 'pos_near_stop_region',
-                                    'v', 'v_ref', 'e_y', 'psi', 'theta', 'acc',
-                                    'psi_dot')
+        return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot')
+
+    @property
+    def extra_termination_condition(self):
+        if self._enable_low_level_training_properties:  # activated only for the low-level training.
+            if (self.env.ego.v < self._v_ref / 5) and self.env.ego.acc < 0:
+                self._extra_r_terminal = -100
+                return True
+            else:
+                self._extra_r_terminal = None
+                return False
+
+        return False
+
+
+class Halt(ManeuverBase):
+
+    _terminate_in_goal = True
+    _reward_in_goal = None
+
+    _penalty_in_violation = None
+    _ego_pos_range = (rd.intersection_width_w_offset, rd.hlanes.end_pos)
+
+    def _init_param(self):
+        self._v_ref = 0 if self._enable_low_level_training_properties else rd.speed_limit
+        self._target_lane = self.env.ego.APs['lane']
+
+    def _init_LTL_preconditions(self):
+
+        self._LTL_preconditions.append(
+            LTLProperty(
+                "G ( (veh_ahead and before_but_close_to_stop_region) U highest_priority )",
+                None, not self._enable_low_level_training_properties))
+
+        self._LTL_preconditions.append(
+            LTLProperty("G ( not stopped_now )", self._penalty(self._reward_in_goal),
+                        not self._enable_low_level_training_properties))
+
+        self._LTL_preconditions.append(
+            LTLProperty(
+                "G ( (lane and target_lane) or (not lane and not target_lane) )",
+                100, self._enable_low_level_training_properties))
+
+    def generate_learning_scenario(self):
+        self.generate_scenario(
+            ego_pos_range=self._ego_pos_range,
+            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
+            ego_heading_towards_lane_centre=True)
+        self.env._terminate_in_goal = False
+        self.env._reward_in_goal = None
+        self._reward_in_goal = 200
+        self._enable_low_level_training_properties = True
+        self._extra_action_weights_flag = True
+
+    def generate_validation_scenario(self):
+        self._ego_pos_range = (rd.hlanes.start_pos, rd.hlanes.end_pos)
+        self.generate_learning_scenario()
+
+    def _low_level_manual_policy(self):
+        return self.env.aggressive_driving_policy(EGO_INDEX)
+
+    @staticmethod
+    def _features_dim_reduction(features_tuple):
+        return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi',
+                                    'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot')
+
+    @property
+    def extra_termination_condition(self):
+        if self._enable_low_level_training_properties:  # activated only for the low-level training.
+            if self.env.ego.APs['stopped_now']:
+                if self._reward_in_goal is not None:
+                    self._extra_r_terminal = self._reward_in_goal
+                    self._extra_r_terminal *= np.exp(- pow(self.env.ego.theta, 2)
+                                                     - pow(self.env.ego.y - rd.hlanes.centres[self._target_lane], 2)
+                                                     - 0.25 * pow(self.env.ego.psi, 2))
+                else:
+                    self._extra_r_terminal = None
+                return True
+
+            else:
+                self._extra_r_terminal = None
+                return False
+
+        return False
 
 
 class Stop(ManeuverBase):
@@ -47,6 +141,8 @@ class Stop(ManeuverBase):
     _terminate_in_goal = True
     _reward_in_goal = None
 
+    _penalty_in_violation = None
+
     def _init_param(self):
         self._set_v_ref()
         self._target_lane = self.env.ego.APs['lane']
@@ -54,34 +150,33 @@ class Stop(ManeuverBase):
     def _init_LTL_preconditions(self):
         self._LTL_preconditions.append(
             LTLProperty("G ( not has_stopped_in_stop_region )",
-                        self._penalty(self._reward_in_goal)))
+                        self._penalty(self._reward_in_goal), not self._enable_low_level_training_properties))
 
+        # before_intersection rather than "before_but_close_to_stop_region or in_stop_region"?
         self._LTL_preconditions.append(
             LTLProperty(
                 "G ( (before_but_close_to_stop_region or in_stop_region) U has_stopped_in_stop_region )",
-                0))
+                self._penalty_in_violation))
 
         self._LTL_preconditions.append(
-            LTLProperty("G ( not stopped_now U in_stop_region )", 200,
+            LTLProperty("G ( not stopped_now U in_stop_region )", 100,
                         self._enable_low_level_training_properties))
 
         self._LTL_preconditions.append(
             LTLProperty(
                 "G ( (lane and target_lane) or (not lane and not target_lane) )",
-                200, self._enable_low_level_training_properties))
+                100, self._enable_low_level_training_properties))
 
     def _update_param(self):
         self._set_v_ref()
 
     def _set_v_ref(self):
         self._v_ref = rd.speed_limit
+        #if self._enable_low_level_training_properties:
         x = self.env.ego.x
-        if x <= rd.hlanes.near_stop_region:
-            self._v_ref = rd.speed_limit
-        elif x <= rd.hlanes.stop_region_centre:
-            self._v_ref = -(rd.speed_limit / abs(rd.hlanes.near_stop_region)
-                            ) * (x - rd.hlanes.stop_region_centre)
-        else:
+        if rd.hlanes.near_stop_region < x <= rd.hlanes.stop_region_centre:
+            self._v_ref = -(rd.speed_limit / abs(rd.hlanes.near_stop_region)) * (x - rd.hlanes.stop_region_centre)
+        elif x > rd.hlanes.stop_region_centre:
             self._v_ref = 0
 
     def generate_learning_scenario(self):
@@ -89,9 +184,12 @@ class Stop(ManeuverBase):
             ego_pos_range=(rd.hlanes.near_stop_region,
                            -rd.intersection_width_w_offset / 2),
             ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
+            v_max_multiplier=0.75,
             ego_heading_towards_lane_centre=True)
         self._reward_in_goal = 200
+        self._penalty_in_violation = 150
         self._enable_low_level_training_properties = True
+        self._extra_action_weights_flag = True
 
     def _low_level_manual_policy(self):
         return self.env.aggressive_driving_policy(EGO_INDEX)
@@ -99,79 +197,115 @@ class Stop(ManeuverBase):
     @staticmethod
     def _features_dim_reduction(features_tuple):
         return extract_ego_features(features_tuple, 'pos_near_stop_region',
-                                    'v', 'v_ref', 'e_y', 'psi', 'theta', 'acc',
-                                    'psi_dot', 'not_in_stop_region')
+                                    'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc',
+                                    'psi_dot', 'pos_stop_region', 'not_in_stop_region')
+
+    @property
+    def extra_termination_condition(self):
+        if self._enable_low_level_training_properties:  # activated only for the low-level training.
+            if self.env.ego.APs['has_stopped_in_stop_region']:
+                if self._reward_in_goal is not None:
+                    self._extra_r_terminal = self._reward_in_goal
+                    self._extra_r_terminal *= np.exp(- pow(self.env.ego.theta, 2)
+                                                     - pow(self.env.ego.y - rd.hlanes.centres[self._target_lane], 2)
+                                                     - 0.25 * pow(self.env.ego.psi, 2))
+                else:
+                    self._extra_r_terminal = None
+                return True
+
+            elif (rd.speed_limit / 5 < self._v_ref) and \
+                    (self.env.ego.v < self._v_ref / 2) and self.env.ego.acc < 0:
+                self._extra_r_terminal = -100
+                return True
+
+            else:
+                self._extra_r_terminal = None
+                return False
+
+        return False
 
 
 class Wait(ManeuverBase):
 
-    _terminate_in_goal = True
     _reward_in_goal = None
+    _terminate_in_goal = True
 
     def _init_LTL_preconditions(self):
+
         self._LTL_preconditions.append(
-            LTLProperty(
-                "G ( (in_stop_region and stopped_now) U highest_priority )",
-                0))
+            LTLProperty("G ( (in_stop_region and stopped_now) and not (highest_priority and intersection_is_clear))",
+                        None, not self._enable_low_level_training_properties))  # not available in low-level training...
 
         self._LTL_preconditions.append(
             LTLProperty("G ( not (in_intersection and highest_priority) )",
                         self._penalty(self._reward_in_goal)))
 
+        self._LTL_preconditions.append(
+           LTLProperty(
+               "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150, self._enable_low_level_training_properties))
+
+        self._LTL_preconditions.append(
+           LTLProperty(
+               "G ( (lane and target_lane) or (not lane and not target_lane) )",
+               100, self._enable_low_level_training_properties))
+
     def _init_param(self):
-        ego = self.env.ego
-        self._v_ref = rd.speed_limit if self.env.ego.APs[
-            'highest_priority'] else 0
-        self._target_lane = ego.APs['lane']
-        self._ego_stop_count = 0
+        self._v_ref = 0 #if self._enable_low_level_training_properties else rd.speed_limit
+        self._target_lane = self.env.ego.APs['lane']
 
-    def _update_param(self):
-        if self.env.ego.APs['highest_priority']:
-            self._v_ref = rd.speed_limit
-            if self._enable_low_level_training_properties:
-                if self.env.n_others_with_higher_priority == 0:
-                    self._ego_stop_count += 1
+    def _low_level_manual_policy(self):
+        return (0, 0)  # Do nothing during "Wait" but just wait until the highest priority is given.
 
-    def generate_learning_scenario(self):
-        n_others = np.random.randint(0, 3)
-        self.generate_scenario(
-            enable_LTL_preconditions=True,
-            timeout=62,
-            n_others_range=(n_others, n_others),
-            ego_pos_range=rd.hlanes.stop_region,
-            n_others_stopped_in_stop_region=n_others,
-            ego_v_upper_lim=0,
-            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
-            ego_heading_towards_lane_centre=True)
+#    @staticmethod
+#    def _features_dim_reduction(features_tuple):
+#        return extract_ego_features(
+#            features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot',
+#            'pos_stop_region', 'intersection_is_clear', 'highest_priority')
 
-        max_waited_count = 0
-        for veh in self.env.vehs[1:]:
-            max_waited_count = max(max_waited_count, veh.waited_count)
 
-        self._extra_action_weights_flag = False
-        self.env.ego.waited_count = np.random.randint(0, max_waited_count + 21)
-        self.env.init_APs(False)
+class Left(ManeuverBase):
 
-        self._reward_in_goal = 200
-        self._extra_r_on_timeout = -200
-        self._enable_low_level_training_properties = True
-        self._ego_stop_count = 0
+    min_y_distance = rd.hlanes.width / 4
+
+    _terminate_in_goal = True
+    _reward_in_goal = None
+
+    def _init_param(self):
+        self._v_ref = rd.speed_limit
+        self._target_lane = False
+        self._terminate_in_goal = True
 
     @property
-    def extra_termination_condition(self):
-        if self._enable_low_level_training_properties:  # activated only for the low-level training.
-            if self._ego_stop_count >= 50:
-                self._extra_r_terminal = -200
-                return True
-            else:
-                self._extra_r_terminal = None
-                return False
+    def goal_achieved(self):
+        ego = self.env.ego
+        APs = self.env.ego.APs
+        on_other_lane = APs['lane'] == self._target_lane
+        achieved_y_displacement = np.sign(ego.y) * \
+                                  (ego.y - rd.hlanes.centres[APs['target_lane']]) >= - self.min_y_distance
+        return on_other_lane and APs['on_route'] and \
+               achieved_y_displacement and APs['parallel_to_lane']
+
+    @property
+    def extra_initiation_condition(self):
+        return self.env.ego.APs['lane']
 
     @staticmethod
     def _features_dim_reduction(features_tuple):
-        return extract_ego_features(
-            features_tuple, 'v', 'v_ref', 'psi', 'theta', 'acc', 'psi_dot',
-            'pos_stop_region', 'intersection_is_clear', 'highest_priority')
+        return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi',
+                                    'v tan(psi/L)', 'theta', 'lane', 'acc',
+                                    'psi_dot')
+
+
+class Right(Left):
+
+    def _init_param(self):
+        self._v_ref = rd.speed_limit
+        self._target_lane = True
+        self._terminate_in_goal = True
+
+    @property
+    def extra_initiation_condition(self):
+        return not self.env.ego.APs['lane']
 
 
 class ChangeLane(ManeuverBase):
@@ -183,22 +317,25 @@ class ChangeLane(ManeuverBase):
 
     _violation_penalty_in_low_level_training = None
 
-    high_level_extra_reward = -20
+    high_level_extra_reward = -50
 
     def _init_param(self):
         self._v_ref = rd.speed_limit
         self._target_lane = not self.env.ego.APs['lane']
-        self._terminate_in_goal = True
 
     def _init_LTL_preconditions(self):
         self._LTL_preconditions.append(
-            LTLProperty("G ( on_route and not over_speed_limit )",
-                        self._violation_penalty_in_low_level_training,
-                        self._enable_low_level_training_properties))
+           LTLProperty("G ( on_route and not over_speed_limit )",
+                       self._violation_penalty_in_low_level_training,
+                       self._enable_low_level_training_properties))
+
         self._LTL_preconditions.append(
-            LTLProperty("G ( not stopped_now )",
-                        self._violation_penalty_in_low_level_training,
-                        self._enable_low_level_training_properties))
+           LTLProperty("G ( not stopped_now )",
+                       100, self._enable_low_level_training_properties))
+
+        self._LTL_preconditions.append(
+           LTLProperty("G ( not in_intersection and not in_stop_region )",
+                       None, not self._enable_low_level_training_properties))  # activated only for the high-level case.
 
     @property
     def goal_achieved(self):
@@ -206,9 +343,9 @@ class ChangeLane(ManeuverBase):
         APs = self.env.ego.APs
         on_other_lane = APs['lane'] == self._target_lane
         achieved_y_displacement = np.sign(ego.y) * \
-                                  (ego.y - rd.hlanes.centres[APs['target_lane']]) >= - self.min_y_distance
+                                   (ego.y - rd.hlanes.centres[APs['target_lane']]) >= - self.min_y_distance
         return on_other_lane and APs['on_route'] and \
-               achieved_y_displacement and APs['parallel_to_lane']
+            achieved_y_displacement and APs['parallel_to_lane']
 
     def _low_level_manual_policy(self):
         return self.env.aggressive_driving_policy(EGO_INDEX)
@@ -217,16 +354,28 @@ class ChangeLane(ManeuverBase):
         self.generate_scenario(
             enable_LTL_preconditions=False,
             timeout=15,
-            ego_pos_range=(rd.hlanes.start_pos, rd.hlanes.end_pos),
+            ego_pos_range=(rd.intersection_width_w_offset, rd.hlanes.end_pos),
             ego_lane=np.random.choice([0, 1]),
-            ego_perturb_lim=(rd.hlanes.width / 5, np.pi / 6))
+            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
+            v_max_multiplier=0.75)
+
+        # print('our range was %s, %s, ego at %s' % (before_intersection, after_intersection, self.env.ego.x))
+        self._reward_in_goal = 200
+        self._violation_penalty_in_low_level_training = 150
+        self._enable_low_level_training_properties = True
+        self._extra_action_weights_flag = True
+
+    def generate_validation_scenario(self):
+        self.generate_scenario(
+            enable_LTL_preconditions=False,
+            ego_pos_range=(rd.hlanes.start_pos, rd.hlanes.end_pos),
+            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6))
+
         # print('our range was %s, %s, ego at %s' % (before_intersection, after_intersection, self.env.ego.x))
         self._reward_in_goal = 200
-        self._violation_penalty_in_low_level_training = 200
+        self._violation_penalty_in_low_level_training = 150
         self._enable_low_level_training_properties = True
 
-    # TODO: It is not a good idea to specify features by numbers, as the list
-    # of features is ever changing. We should specify them by strings.
     @staticmethod
     def _features_dim_reduction(features_tuple):
         return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi',
@@ -237,49 +386,84 @@ class ChangeLane(ManeuverBase):
 class Follow(ManeuverBase):
 
     _target_veh_i = None
-    _penalty_for_out_of_follow_range = None
+    _penalty_for_out_of_range = None
+    _penalty_for_change_lane = None
 
     def _init_LTL_preconditions(self):
         self._LTL_preconditions.append(
-            LTLProperty("G ( veh_ahead )",
-                        self._penalty_for_out_of_follow_range))
+            LTLProperty("G ( veh_ahead )", self._penalty_for_out_of_range))
 
         self._LTL_preconditions.append(
-            LTLProperty(
-                "G ( (lane and target_lane) or (not lane and not target_lane) )",
-                self._penalty_for_out_of_follow_range))
+             LTLProperty(
+                 "G ( (lane and target_lane) or (not lane and not target_lane) )",
+                 self._penalty_for_change_lane))
 
-        self._LTL_preconditions.append(
-            LTLProperty("G ( not stopped_now U veh_ahead_stopped_now)", 200,
-                        self._enable_low_level_training_properties))
+        # self._LTL_preconditions.append(
+        #    LTLProperty("G ( not stopped_now U veh_ahead_stopped_now)", 200,
+        #                self._enable_low_level_training_properties))
 
         self._LTL_preconditions.append(
-            LTLProperty("G ( not veh_ahead_too_close )", 200,
-                        self._enable_low_level_training_properties))
+           LTLProperty("G ( not veh_ahead_too_close )", self._penalty_for_out_of_range,
+                       self._enable_low_level_training_properties))
 
     def generate_learning_scenario(self):
         self.generate_scenario(
             enable_LTL_preconditions=False,
             n_others_range=(1, 1),
-            ego_perturb_lim=(rd.hlanes.width / 2, np.pi / 4),
+            v_max_multiplier=0.75,
+            ego_perturb_lim=(0, 0),
             veh_ahead_scenario=True)
         self.env._terminate_in_goal = False
-        self._penalty_for_out_of_follow_range = 200
+        self._penalty_for_out_of_range = 200
+        self._penalty_for_change_lane = 170
         self._enable_low_level_training_properties = True
+        self._extra_action_weights_flag = True
+
+    def generate_validation_scenario(self):
+        self.generate_learning_scenario()
+
+    def _init_param(self):
+        self._set_v_ref()
 
     def _update_param(self):
+        self._set_v_ref()
+
+    def _set_v_ref(self):
+        #if self._enable_low_level_training_properties:
         self._target_veh_i, _ = self.env.get_V2V_distance()
 
+        if self._target_veh_i is not None:
+            self._v_ref = self.env.vehs[self._target_veh_i].v
+        else:
+            self._v_ref = 0
+        #else:
+        #    self._v_ref = rd.speed_limit
+
     def _low_level_manual_policy(self):
         return self.env.aggressive_driving_policy(EGO_INDEX)
 
+    @property
+    def extra_termination_condition(self):
+        # APs = self.env.ego.APs
+
+        if self._target_veh_i is None:
+            return False
+
+        #elif not self._enable_low_level_training_properties:  # activated only for the high-level training.
+        # if (APs['in_stop_region'] or APs['before_but_close_to_stop_region']) \
+        #     and (self.env.vehs[self._target_veh_i].APs['in_intersection'] or
+        #          self.env.vehs[self._target_veh_i].x > 0):
+        #     return True
+        # else:
+        return False
+
     def _features_dim_reduction(self, features_tuple):
         ego_features = extract_ego_features(
-            features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)',
-            'theta', 'lane', 'e_y,lane', 'acc', 'psi_dot')
+            features_tuple, 'pos_near_stop_region', 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)',
+            'theta', 'lane', 'acc', 'psi_dot')
+
         if self._target_veh_i is not None:
             return ego_features + extract_other_veh_features(
-                features_tuple, self._target_veh_i, 'rel_x', 'rel_y', 'v',
-                'acc')
+                features_tuple, self._target_veh_i, 'rel_x', 'rel_y', 'v', 'acc')
         else:
             return ego_features + (0.0, 0.0, 0.0, 0.0)