Between Tuesday August 20th, 5:00pm and Thursday August 22nd, 8:00am git.uwaterloo.ca will be down for an upgrade to version 10.8.7.

895596a8 by Ashish Gaurav

Merge branch 'Improve_Follow' into 'master'

Improve follow

See merge request !2
2 parents ea8874a8 c098c0f4
......@@ -51,7 +51,7 @@ class ControllerBase(PolicyBase):
Returns state at end of node execution, total reward, epsiode_termination_flag, info
'''
# TODO: this is never called when you test high-level policy rather than train...
# TODO: this is never called when you TEST high-level policy (w/o MCTS) rather than train...
def step_current_node(self, visualize_low_level_steps=False):
total_reward = 0
self.node_terminal_state_reached = False
......
......@@ -555,4 +555,4 @@ class DQNAgentOverOptions(DQNAgent):
for node_index in invalid_node_indices:
q_values[node_index] = -np.inf
return q_values
return q_values
\ No newline at end of file
......
......@@ -275,6 +275,8 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase):
n_others_stopped_in_stop_region = np.random.randint(
0, min(3, n_others - stopped_car_scenario))
veh_ahead_scenario = bool(np.random.randint(0, 2)) or veh_ahead_scenario
# Activate the comment below if you want to generate more veh_ahead_scenario
# veh_ahead_scenario = np.random.rand() <= 0.7 or veh_ahead_scenario
if n_others_stopped_in_stop_region > min(
n_others - stopped_car_scenario, 3):
......@@ -1291,4 +1293,4 @@ class SimpleIntersectionEnv(RoadEnv, EpisodicEnvBase):
if veh.waited_count > self.ego.waited_count:
n_others_with_higher_priority += 1
return n_others_with_higher_priority
return n_others_with_higher_priority
\ No newline at end of file
......
......@@ -385,22 +385,29 @@ class ChangeLane(ManeuverBase):
class Follow(ManeuverBase):
_reward_in_goal = None
_target_veh_i = None
_penalty_for_out_of_range = None
_penalty_for_change_lane = None
def _init_LTL_preconditions(self):
self._LTL_preconditions.append(
LTLProperty("G ( veh_ahead )", self._penalty_for_out_of_range))
LTLProperty("G ( veh_ahead U (in_stop_region or before_but_close_to_stop_region ) )", self._penalty_for_out_of_range))
self._LTL_preconditions.append(
LTLProperty(
"G ( (lane and target_lane) or (not lane and not target_lane) )",
self._penalty_for_change_lane))
# self._LTL_preconditions.append(
# LTLProperty("G ( not stopped_now U veh_ahead_stopped_now)", 200,
# self._enable_low_level_training_properties))
self._LTL_preconditions.append(
LTLProperty(
"G ( not (has_stopped_in_stop_region and in_stop_region ) )",
None))
#self._LTL_preconditions.append(
# LTLProperty("G ( not stopped_now U (veh_ahead_stopped_now or (in_stop_region and stopped_now )) )", 200,
# self._enable_low_level_training_properties))
self._LTL_preconditions.append(
LTLProperty("G ( not veh_ahead_too_close )", self._penalty_for_out_of_range,
......@@ -408,14 +415,14 @@ class Follow(ManeuverBase):
def generate_learning_scenario(self):
self.generate_scenario(
enable_LTL_preconditions=False,
n_others_range=(1, 1),
n_others_range=(1, 6),
v_max_multiplier=0.75,
ego_perturb_lim=(0, 0),
veh_ahead_scenario=True)
self.env._terminate_in_goal = False
self._penalty_for_out_of_range = 200
self._penalty_for_change_lane = 170
self._penalty_for_change_lane = 200
self._reward_in_goal = 200
self._enable_low_level_training_properties = True
self._extra_action_weights_flag = True
......@@ -433,11 +440,20 @@ class Follow(ManeuverBase):
self._target_veh_i, _ = self.env.get_V2V_distance()
if self._target_veh_i is not None:
self._v_ref = self.env.vehs[self._target_veh_i].v
if self.env.ego.APs['before_intersection'] and \
not self.env.vehs[self._target_veh_i].APs['before_intersection']:
x = self.env.ego.x
if rd.hlanes.near_stop_region >= x:
self._v_ref = rd.speed_limit
elif rd.hlanes.near_stop_region < x <= rd.hlanes.stop_region_centre:
self._v_ref = -(rd.speed_limit / abs(rd.hlanes.near_stop_region)) * (
x - rd.hlanes.stop_region_centre)
elif x > rd.hlanes.stop_region_centre:
self._v_ref = 0
else:
self._v_ref = self.env.vehs[self._target_veh_i].v
else:
self._v_ref = 0
#else:
# self._v_ref = rd.speed_limit
def _low_level_manual_policy(self):
return self.env.aggressive_driving_policy(EGO_INDEX)
......@@ -449,21 +465,32 @@ class Follow(ManeuverBase):
if self._target_veh_i is None:
return False
#elif not self._enable_low_level_training_properties: # activated only for the high-level training.
# if (APs['in_stop_region'] or APs['before_but_close_to_stop_region']) \
# and (self.env.vehs[self._target_veh_i].APs['in_intersection'] or
# self.env.vehs[self._target_veh_i].x > 0):
# return True
# else:
if self._enable_low_level_training_properties: # activated only for the low-level training.
if self.env.ego.APs['has_stopped_in_stop_region'] and \
self.env.ego.APs['in_stop_region']:
self._extra_r_terminal = None
return True
elif (rd.speed_limit / 5 < self._v_ref) and \
(self.env.ego.v < self._v_ref / 2) and \
self.env.ego.acc < 0 and \
not self.env.ego.APs['veh_ahead_stopped_now']:
self._extra_r_terminal = -100
return True
else:
self._extra_r_terminal = None
return False
return False
def _features_dim_reduction(self, features_tuple):
ego_features = extract_ego_features(
features_tuple, 'pos_near_stop_region', 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)',
'theta', 'lane', 'acc', 'psi_dot')
'theta', 'lane', 'acc', 'psi_dot', 'pos_stop_region', 'not_in_stop_region')
if self._target_veh_i is not None:
return ego_features + extract_other_veh_features(
features_tuple, self._target_veh_i, 'rel_x', 'rel_y', 'v', 'acc')
else:
return ego_features + (0.0, 0.0, 0.0, 0.0)
return ego_features + (0.0, 0.0, 0.0, 0.0)
\ No newline at end of file
......