diff --git a/backends/kerasrl_learner.py b/backends/kerasrl_learner.py
index 405a9bac8a1e6430e35b7b6330bf909ed9c2a5ad..83755da3ee50d5bfd476c10887a600710d11c475 100644
--- a/backends/kerasrl_learner.py
+++ b/backends/kerasrl_learner.py
@@ -237,7 +237,7 @@ class DQNLearner(LearnerBase):
             input_shape: Shape of observation space, e.g (10,);
             nb_actions: number of values in action space;
             model: Keras Model of actor which takes observation as input and outputs actions. Uses default if not given
-            policy: KerasRL Policy. Uses default MaxBoltzmannQPolicy if not given
+            policy: KerasRL Policy. Uses default RestrictedEpsGreedyQPolicy if not given
             memory: KerasRL Memory. Uses default SequentialMemory if not given
             **kwargs: other optional key-value arguments with defaults defined in property_defaults
         """
@@ -452,9 +452,11 @@ class RestrictedEpsGreedyQPolicy(EpsGreedyQPolicy):
 
         # every q_value is -np.inf (this sometimes inevitably happens within the fit and test functions
         # of kerasrl at the terminal stage as they force to call forward in Kerasrl-learner which calls this function.
-        # In this case, we choose a policy randomly.
+        # TODO: exception process or some more process to choose action in this exceptional case.
         if len(index) < 1:
-            action = np.random.random_integers(0, nb_actions - 1)
+            # every q_value is -np.inf, we choose action = 0
+            action = 0
+            print("Warning: no action satisfies initiation condition, action = 0 is chosen by default.")
 
         elif np.random.uniform() <= self.eps:
             action = index[np.random.random_integers(0, len(index) - 1)]
@@ -484,9 +486,11 @@ class RestrictedGreedyQPolicy(GreedyQPolicy):
         """
         assert q_values.ndim == 1
 
+        # TODO: exception process or some more process to choose action in this exceptional case.
         if np.max(q_values) == - np.inf:
             # every q_value is -np.inf, we choose action = 0
             action = 0
+            print("Warning: no action satisfies initiation condition, action = 0 is chosen by default.")
 
         else:
             action = np.argmax(q_values)
@@ -551,4 +555,4 @@ class DQNAgentOverOptions(DQNAgent):
             for node_index in invalid_node_indices:
                 q_values[node_index] = -np.inf
 
-        return q_values
+        return q_values
\ No newline at end of file
diff --git a/backends/trained_policies/immediatestop/immediatestop_weights_actor.h5f b/backends/trained_policies/halt/immediatestop_weights_actor.h5f
similarity index 100%
rename from backends/trained_policies/immediatestop/immediatestop_weights_actor.h5f
rename to backends/trained_policies/halt/immediatestop_weights_actor.h5f
diff --git a/backends/trained_policies/immediatestop/immediatestop_weights_critic.h5f b/backends/trained_policies/halt/immediatestop_weights_critic.h5f
similarity index 100%
rename from backends/trained_policies/immediatestop/immediatestop_weights_critic.h5f
rename to backends/trained_policies/halt/immediatestop_weights_critic.h5f
diff --git a/env/simple_intersection/simple_intersection_env.py b/env/simple_intersection/simple_intersection_env.py
index 62a85ce711b980f3e968ecb6fc75514c5c7cde04..58ef9a3a5b9d7a36e9ca75f12d81f9ce7b238fa2 100644
--- a/env/simple_intersection/simple_intersection_env.py
+++ b/env/simple_intersection/simple_intersection_env.py
@@ -52,6 +52,7 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase):
     #: The weight vector to calculate the cost. In the maneuver, cost_weights
     # can be set to a specific value which may be different than the default.
     # TODO: check _cost_weights in both here and ManeuverBase. The _cost_weights has to be substituted to here, but it doesn't sometimes.
+    # TODO: set a functionality of setting _cost_weights for low and high level training separately.
     _cost_weights = (10.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3,
                      100.0 * 1e-3, 0.1 * 1e-3, 0.05 * 1e-3, 0.1 * 1e-3)
 
@@ -1292,4 +1293,4 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase):
             if veh.waited_count > self.ego.waited_count:
                 n_others_with_higher_priority += 1
 
-        return n_others_with_higher_priority
+        return n_others_with_higher_priority
\ No newline at end of file
diff --git a/options/simple_intersection/maneuver_base.py b/options/simple_intersection/maneuver_base.py
index 82bae5e1023ed8deb1d4c1bd5a0e5c7dc7c0ecb2..01e210db99ad439efea33561ba53cef298bf8193 100644
--- a/options/simple_intersection/maneuver_base.py
+++ b/options/simple_intersection/maneuver_base.py
@@ -29,6 +29,7 @@ class ManeuverBase(EpisodicEnvBase):
     # _extra_action_weights_flag = True); note that a cost is defined
     # as a negative reward, so a cost will be summed up to the reward
     # with subtraction.
+    # TODO: remove or to provide additional functionality, keep _cost_weights in ManeuverBase here (see other TODOs in simple_intersection_env regarding _cost_weights).
     _cost_weights = (10.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3,
                      100.0 * 1e-3, 0.1 * 1e-3, 0.05 * 1e-3, 0.1 * 1e-3)
 
diff --git a/options/simple_intersection/maneuvers.py b/options/simple_intersection/maneuvers.py
index b6625b21ff53d3e13456b30ca2421a98cec458f2..cda94f8113ea94b49c5c1f549172161f645071e8 100644
--- a/options/simple_intersection/maneuvers.py
+++ b/options/simple_intersection/maneuvers.py
@@ -152,6 +152,7 @@ class Stop(ManeuverBase):
             LTLProperty("G ( not has_stopped_in_stop_region )",
                         self._penalty(self._reward_in_goal), not self._enable_low_level_training_properties))
 
+        # before_intersection rather than "before_but_close_to_stop_region or in_stop_region"?
         self._LTL_preconditions.append(
             LTLProperty(
                 "G ( (before_but_close_to_stop_region or in_stop_region) U has_stopped_in_stop_region )",
@@ -492,4 +493,4 @@ class Follow(ManeuverBase):
             return ego_features + extract_other_veh_features(
                 features_tuple, self._target_veh_i, 'rel_x', 'rel_y', 'v', 'acc')
         else:
-            return ego_features + (0.0, 0.0, 0.0, 0.0)
+            return ego_features + (0.0, 0.0, 0.0, 0.0)
\ No newline at end of file