Commit 70ad9bf5 authored by Jae Young Lee's avatar Jae Young Lee

Successful low-level policies training except Wait maneuver.

Each low level policy was retrained with better LTL conditions and rewards, some parts of which are also designed to encourage exploration (to prevent the vehicle from being stopped all the time).
parent f2171d2c
......@@ -56,8 +56,7 @@ class ControllerBase(PolicyBase):
total_reward = 0
self.node_terminal_state_reached = False
while not self.node_terminal_state_reached:
observation, reward, terminal, info = self.low_level_step_current_node(
)
observation, reward, terminal, info = self.low_level_step_current_node()
if visualize_low_level_steps:
self.env.render()
total_reward += reward
......
......@@ -29,7 +29,7 @@ class ManeuverBase(EpisodicEnvBase):
# _extra_action_weights_flag = True); note that a cost is defined
# as a negative reward, so a cost will be summed up to the reward
# with subtraction.
_cost_weights = (1.0 * 1e-3, 1.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3,
_cost_weights = (10.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3,
100.0 * 1e-3, 0.1 * 1e-3, 0.25 * 1e-3, 0.1 * 1e-3)
_extra_r_terminal = None
......@@ -292,9 +292,8 @@ class ManeuverBase(EpisodicEnvBase):
raise NotImplemented(self.__class__.__name__ +
".generate_learning_scenario is not implemented.")
def generate_validation_scenario(
self
): # Override this method in the subclass if some customization is needed.
# Override this method in the subclass if some customization is needed.
def generate_validation_scenario(self):
self.generate_learning_scenario()
self._enable_low_level_training_properties = False
......
......@@ -16,30 +16,51 @@ class KeepLane(ManeuverBase):
def _init_LTL_preconditions(self):
self._LTL_preconditions.append(LTLProperty("G ( not veh_ahead )", 0))
self._LTL_preconditions.append(
LTLProperty("G ( not stopped_now )", 200,
self._enable_low_level_training_properties))
#self._LTL_preconditions.append(
# LTLProperty("G ( not stopped_now )", 100,
# self._enable_low_level_training_properties))
self._LTL_preconditions.append(
LTLProperty(
"G ( (lane and target_lane) or (not lane and not target_lane) )",
200, self._enable_low_level_training_properties))
100, self._enable_low_level_training_properties))
def generate_learning_scenario(self):
self.generate_scenario(
enable_LTL_preconditions=False,
ego_pos_range=(rd.hlanes.start_pos, rd.hlanes.end_pos),
ego_pos_range=(rd.intersection_width_w_offset, rd.hlanes.end_pos),
ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
v_max_multiplier=0.75,
ego_heading_towards_lane_centre=True)
# the goal reward and termination is led by the SimpleIntersectionEnv
self.env._terminate_in_goal = True
self.env._reward_in_goal = 200
self.env._terminate_in_goal = False
self.env._reward_in_goal = None
self._enable_low_level_training_properties = True
def generate_validation_scenario(self):
self.generate_scenario(
enable_LTL_preconditions=False,
ego_pos_range=(rd.hlanes.start_pos, 0),
ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
ego_heading_towards_lane_centre=True)
# the goal reward and termination is led by the SimpleIntersectionEnv
self.env._terminate_in_goal = False
self.env._reward_in_goal = None
@staticmethod
def _features_dim_reduction(features_tuple):
return extract_ego_features(features_tuple, 'pos_near_stop_region',
'v', 'v_ref', 'e_y', 'psi', 'theta', 'acc',
'psi_dot')
return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot')
@property
def extra_termination_condition(self):
if self._enable_low_level_training_properties: # activated only for the low-level training.
if (self.env.ego.v < self._v_ref / 5) and self.env.ego.acc < 0:
self._extra_r_terminal = -100
return True
else:
self._extra_r_terminal = None
return False
return False
class Stop(ManeuverBase):
......@@ -47,28 +68,30 @@ class Stop(ManeuverBase):
_terminate_in_goal = True
_reward_in_goal = None
_penalty_in_violation = None
def _init_param(self):
self._set_v_ref()
self._target_lane = self.env.ego.APs['lane']
def _init_LTL_preconditions(self):
self._LTL_preconditions.append(
LTLProperty("G ( not has_stopped_in_stop_region )",
self._penalty(self._reward_in_goal)))
#self._LTL_preconditions.append(
# LTLProperty("G ( not has_stopped_in_stop_region )",
# self._penalty(self._reward_in_goal)))
self._LTL_preconditions.append(
LTLProperty(
"G ( (before_but_close_to_stop_region or in_stop_region) U has_stopped_in_stop_region )",
0))
self._penalty_in_violation))
self._LTL_preconditions.append(
LTLProperty("G ( not stopped_now U in_stop_region )", 200,
LTLProperty("G ( not stopped_now U in_stop_region )", 100,
self._enable_low_level_training_properties))
self._LTL_preconditions.append(
LTLProperty(
"G ( (lane and target_lane) or (not lane and not target_lane) )",
200, self._enable_low_level_training_properties))
100, self._enable_low_level_training_properties))
def _update_param(self):
self._set_v_ref()
......@@ -89,8 +112,10 @@ class Stop(ManeuverBase):
ego_pos_range=(rd.hlanes.near_stop_region,
-rd.intersection_width_w_offset / 2),
ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
v_max_multiplier=0.75,
ego_heading_towards_lane_centre=True)
self._reward_in_goal = 200
self._penalty_in_violation = 150
self._enable_low_level_training_properties = True
def _low_level_manual_policy(self):
......@@ -99,8 +124,32 @@ class Stop(ManeuverBase):
@staticmethod
def _features_dim_reduction(features_tuple):
return extract_ego_features(features_tuple, 'pos_near_stop_region',
'v', 'v_ref', 'e_y', 'psi', 'theta', 'acc',
'psi_dot', 'not_in_stop_region')
'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc',
'psi_dot', 'pos_stop_region', 'not_in_stop_region')
@property
def extra_termination_condition(self):
if self.env.ego.APs['has_stopped_in_stop_region']:
if self._reward_in_goal is not None:
self._extra_r_terminal = self._reward_in_goal
self._extra_r_terminal *= np.exp(- pow(self.env.ego.theta, 2)
- pow(self.env.ego.y - rd.hlanes.centres[self._target_lane], 2)
- 0.25*pow(self.env.ego.psi, 2))
else:
self._extra_r_terminal = None
return True
elif self._enable_low_level_training_properties: # activated only for the low-level training.
if (rd.speed_limit / 5 < self._v_ref) and \
(self.env.ego.v < self._v_ref / 2) and self.env.ego.acc < 0:
self._extra_r_terminal = -100
return True
else:
self._extra_r_terminal = None
return False
return False
class Wait(ManeuverBase):
......@@ -111,32 +160,38 @@ class Wait(ManeuverBase):
def _init_LTL_preconditions(self):
self._LTL_preconditions.append(
LTLProperty(
"G ( (in_stop_region and stopped_now) U highest_priority )",
0))
"G ( (in_stop_region and stopped_now) U (highest_priority and intersection_is_clear))", 0,
not self._enable_low_level_training_properties)) # not available in low-level training...
self._LTL_preconditions.append(
LTLProperty("G ( not (in_intersection and highest_priority) )",
self._penalty(self._reward_in_goal)))
self._LTL_preconditions.append(
LTLProperty(
"G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150, self._enable_low_level_training_properties))
self._LTL_preconditions.append(
LTLProperty(
"G ( (lane and target_lane) or (not lane and not target_lane) )",
100, self._enable_low_level_training_properties))
def _init_param(self):
ego = self.env.ego
self._v_ref = rd.speed_limit if self.env.ego.APs[
'highest_priority'] else 0
self._target_lane = ego.APs['lane']
self._ego_stop_count = 0
self._update_param()
self._target_lane = self.env.ego.APs['lane']
def _update_param(self):
if self.env.ego.APs['highest_priority']:
self._v_ref = rd.speed_limit
if self._enable_low_level_training_properties:
if self.env.n_others_with_higher_priority == 0:
self._ego_stop_count += 1
if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear']:
self._v_ref = rd.speed_limit / 5
else:
self._v_ref = 0
def generate_learning_scenario(self):
n_others = np.random.randint(0, 3)
n_others = 0 if np.random.rand() <= 0 else np.random.randint(1, 4)
self.generate_scenario(
enable_LTL_preconditions=True,
timeout=62,
n_others_range=(n_others, n_others),
ego_pos_range=rd.hlanes.stop_region,
n_others_stopped_in_stop_region=n_others,
......@@ -145,32 +200,46 @@ class Wait(ManeuverBase):
ego_heading_towards_lane_centre=True)
max_waited_count = 0
min_waited_count = 1
for veh in self.env.vehs[1:]:
max_waited_count = max(max_waited_count, veh.waited_count)
min_waited_count = min(min_waited_count, veh.waited_count)
min_waited_count = min(min_waited_count, max_waited_count)
self._extra_action_weights_flag = False
self.env.ego.waited_count = np.random.randint(0, max_waited_count + 21)
if np.random.rand() <= 0.2:
self.env.ego.waited_count = np.random.randint(0, min_waited_count+1)
else:
self.env.ego.waited_count = np.random.randint(min_waited_count, max_waited_count + 21)
self.env.init_APs(False)
self._reward_in_goal = 200
self._extra_r_on_timeout = -200
self._enable_low_level_training_properties = True
self._ego_stop_count = 0
def generate_validation_scenario(self):
super().generate_validation_scenario()
#self._enable_low_level_training_properties = True
@property
def extra_termination_condition(self):
if self._enable_low_level_training_properties: # activated only for the low-level training.
if self._ego_stop_count >= 50:
self._extra_r_terminal = -200
if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear'] \
and np.random.rand() <= 0.05 and self.env.ego.v <= self._v_ref / 10:
self._extra_r_terminal = - 100
return True
else:
self._extra_r_terminal = None
return False
return False
@staticmethod
def _features_dim_reduction(features_tuple):
return extract_ego_features(
features_tuple, 'v', 'v_ref', 'psi', 'theta', 'acc', 'psi_dot',
features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot',
'pos_stop_region', 'intersection_is_clear', 'highest_priority')
......@@ -195,10 +264,10 @@ class ChangeLane(ManeuverBase):
LTLProperty("G ( on_route and not over_speed_limit )",
self._violation_penalty_in_low_level_training,
self._enable_low_level_training_properties))
self._LTL_preconditions.append(
LTLProperty("G ( not stopped_now )",
self._violation_penalty_in_low_level_training,
self._enable_low_level_training_properties))
100, self._enable_low_level_training_properties))
@property
def goal_achieved(self):
......@@ -217,12 +286,25 @@ class ChangeLane(ManeuverBase):
self.generate_scenario(
enable_LTL_preconditions=False,
timeout=15,
ego_pos_range=(rd.hlanes.start_pos, rd.hlanes.end_pos),
ego_pos_range=(rd.intersection_width_w_offset, rd.hlanes.end_pos),
ego_lane=np.random.choice([0, 1]),
ego_perturb_lim=(rd.hlanes.width / 5, np.pi / 6))
ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
v_max_multiplier=0.75)
# print('our range was %s, %s, ego at %s' % (before_intersection, after_intersection, self.env.ego.x))
self._reward_in_goal = 200
self._violation_penalty_in_low_level_training = 200
self._violation_penalty_in_low_level_training = 150
self._enable_low_level_training_properties = True
def generate_validation_scenario(self):
self.generate_scenario(
enable_LTL_preconditions=False,
ego_pos_range=(rd.hlanes.start_pos, rd.hlanes.end_pos),
ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6))
# print('our range was %s, %s, ego at %s' % (before_intersection, after_intersection, self.env.ego.x))
self._reward_in_goal = 200
self._violation_penalty_in_low_level_training = 150
self._enable_low_level_training_properties = True
# TODO: It is not a good idea to specify features by numbers, as the list
......@@ -237,49 +319,77 @@ class ChangeLane(ManeuverBase):
class Follow(ManeuverBase):
_target_veh_i = None
_penalty_for_out_of_follow_range = None
_penalty_for_out_of_range = None
_penalty_for_change_lane = None
def _init_LTL_preconditions(self):
self._LTL_preconditions.append(
LTLProperty("G ( veh_ahead )",
self._penalty_for_out_of_follow_range))
LTLProperty("G ( veh_ahead )", self._penalty_for_out_of_range))
self._LTL_preconditions.append(
LTLProperty(
"G ( (lane and target_lane) or (not lane and not target_lane) )",
self._penalty_for_out_of_follow_range))
self._penalty_for_change_lane))
#self._LTL_preconditions.append(
# LTLProperty("G ( not stopped_now U veh_ahead_stopped_now)", 200,
# self._enable_low_level_training_properties))
self._LTL_preconditions.append(
LTLProperty("G ( not stopped_now U veh_ahead_stopped_now)", 200,
LTLProperty("G ( not veh_ahead_too_close )", self._penalty_for_out_of_range,
self._enable_low_level_training_properties))
self._LTL_preconditions.append(
LTLProperty("G ( not veh_ahead_too_close )", 200,
self._enable_low_level_training_properties))
LTLProperty("G ( not in_stop_region)", 0, not self._enable_low_level_training_properties))
def generate_learning_scenario(self):
self.generate_scenario(
enable_LTL_preconditions=False,
n_others_range=(1, 1),
ego_perturb_lim=(rd.hlanes.width / 2, np.pi / 4),
v_max_multiplier=0.75,
ego_perturb_lim=(0, 0),
veh_ahead_scenario=True)
self.env._terminate_in_goal = False
self._penalty_for_out_of_follow_range = 200
self._penalty_for_out_of_range = 200
self._penalty_for_change_lane = 170
self._enable_low_level_training_properties = True
def generate_validation_scenario(self):
self.generate_learning_scenario()
def _update_param(self):
self._target_veh_i, _ = self.env.get_V2V_distance()
if self._target_veh_i is not None:
self._v_ref = self.env.vehs[self._target_veh_i].v
def _low_level_manual_policy(self):
return self.env.aggressive_driving_policy(EGO_INDEX)
@property
def extra_termination_condition(self):
APs = self.env.ego.APs
if self._target_veh_i is None:
return False
elif not self._enable_low_level_training_properties: # activated only for the high-level training.
if (APs['in_stop_region'] or APs['before_but_close_to_stop_region']) \
and (self.env.vehs[self._target_veh_i].APs['in_intersection'] or
self.env.vehs[self._target_veh_i].x > 0):
return True
else:
return False
return False
def _features_dim_reduction(self, features_tuple):
ego_features = extract_ego_features(
features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)',
'theta', 'lane', 'e_y,lane', 'acc', 'psi_dot')
features_tuple, 'pos_near_stop_region', 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)',
'theta', 'lane', 'acc', 'psi_dot')
if self._target_veh_i is not None:
return ego_features + extract_other_veh_features(
features_tuple, self._target_veh_i, 'rel_x', 'rel_y', 'v',
'acc')
features_tuple, self._target_veh_i, 'rel_x', 'rel_y', 'v', 'acc')
else:
return ego_features + (0.0, 0.0, 0.0, 0.0)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment