From f443e3820c74aa4661f8a1f83db5c24efc216452 Mon Sep 17 00:00:00 2001
From: Jaeyoung Lee <jaeyoung.lee@uwaterloo.ca>
Date: Wed, 6 Feb 2019 13:46:44 -0500
Subject: [PATCH] Wait (and the others slightly) improved.

---
 options/simple_intersection/maneuvers.py | 43 +++++++++++++-----------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/options/simple_intersection/maneuvers.py b/options/simple_intersection/maneuvers.py
index e995a19..2cf2e7f 100644
--- a/options/simple_intersection/maneuvers.py
+++ b/options/simple_intersection/maneuvers.py
@@ -172,7 +172,7 @@ class Stop(ManeuverBase):
                 self._penalty_in_violation))
 
         self._LTL_preconditions.append(
-            LTLProperty("G ( not stopped_now U in_stop_region )", 100,
+            LTLProperty("G ( not stopped_now U in_stop_region )", 200,
                         self._enable_low_level_training_properties))
 
         self._LTL_preconditions.append(
@@ -228,7 +228,7 @@ class Stop(ManeuverBase):
 
             elif (rd.speed_limit / 5 < self._v_ref) and \
                     (self.env.ego.v < self._v_ref / 2) and self.env.ego.acc < 0:
-                self._extra_r_terminal = -100
+                self._extra_r_terminal = -200
                 return True
 
             else:
@@ -241,13 +241,13 @@ class Stop(ManeuverBase):
 class Wait(ManeuverBase):
 
     _reward_in_goal = None
-    _terminate_in_goal = True
+    _terminate_in_goal = False
 
     def _init_LTL_preconditions(self):
-
         self._LTL_preconditions.append(
-            LTLProperty("G ( (in_stop_region and stopped_now) U (highest_priority and intersection_is_clear))",
-                        None, not self._enable_low_level_training_properties))  # not available in low-level training...
+        LTLProperty(
+            "G ( (in_stop_region and has_stopped_in_stop_region) U (highest_priority and intersection_is_clear))",
+            None, not self._enable_low_level_training_properties))  # not available in low-level training...
 
         self._LTL_preconditions.append(
             LTLProperty("G ( not (in_intersection and highest_priority and intersection_is_clear) )",
@@ -255,13 +255,13 @@ class Wait(ManeuverBase):
 
         self._LTL_preconditions.append(
            LTLProperty(
-               "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150,
+               "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 200,
                self._enable_low_level_training_properties))
 
-        self._LTL_preconditions.append(
-           LTLProperty(
-               "G ( (lane and target_lane) or (not lane and not target_lane) )",
-               150, self._enable_low_level_training_properties))
+        #self._LTL_preconditions.append(
+        #    LTLProperty(
+        #        "G ( (lane and target_lane) or (not lane and not target_lane) )",
+        #        150, self._enable_low_level_training_properties))
 
     def _init_param(self):
         self._update_param()
@@ -270,8 +270,10 @@ class Wait(ManeuverBase):
     def _update_param(self):
         if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear']:
             self._v_ref = rd.speed_limit
+            self._extra_action_weights_flag = True
         else:
             self._v_ref = 0
+            self._extra_action_weights_flag = False
 
     def generate_learning_scenario(self):
         n_others = 0 if np.random.rand() <= 0 else np.random.randint(1, 4)
@@ -298,17 +300,20 @@ class Wait(ManeuverBase):
             self.env.ego.waited_count = np.random.randint(min_waited_count, max_waited_count + 21)
 
         self.env.init_APs(False)
+        self.env._terminate_in_goal = False
         self._reward_in_goal = 200
         self._enable_low_level_training_properties = True
-        self._extra_action_weights_flag = True
+        self._extra_action_weights_flag = False
 
     @property
     def extra_termination_condition(self):
         if self._enable_low_level_training_properties:  # activated only for the low-level training.
-            if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear'] \
-                    and np.random.rand() <= 0.1 and self.env.ego.v <= self._v_ref / 10 \
-                    and self.env.ego.acc < 0:
-                self._extra_r_terminal = - 100
+            if self.env.ego.APs['highest_priority'] and \
+                     self.env.ego.APs['intersection_is_clear'] and \
+                     np.random.rand() <= 0.25 and \
+                     self.env.ego.v <= self._v_ref / 10 and \
+                     self.env.ego.acc < 0:
+                self._extra_r_terminal = -200
                 return True
             else:
                 self._extra_r_terminal = None
@@ -449,6 +454,7 @@ class ChangeLane(ManeuverBase):
         self._violation_penalty_in_low_level_training = 150
         self._enable_low_level_training_properties = True
         self._extra_action_weights_flag = True
+        self.env._terminate_in_goal = False
 
     def generate_validation_scenario(self):
         self.generate_scenario(
@@ -460,6 +466,8 @@ class ChangeLane(ManeuverBase):
         self._reward_in_goal = 200
         self._violation_penalty_in_low_level_training = 150
         self._enable_low_level_training_properties = True
+        self.env._terminate_in_goal = False
+
 
     @staticmethod
     def _features_dim_reduction(features_tuple):
@@ -511,9 +519,6 @@ class Follow(ManeuverBase):
         self._enable_low_level_training_properties = True
         self._extra_action_weights_flag = True
 
-    def generate_validation_scenario(self):
-        self.generate_learning_scenario()
-
     def _init_param(self):
         self._set_v_ref()
 
-- 
GitLab