Commit 70ad9bf5 authored by Jae Young Lee's avatar Jae Young Lee
Browse files

Successful low-level policies training except Wait maneuver.

Each low level policy was retrained with better LTL conditions and rewards, some parts of which are also designed to encourage exploration (to prevent the vehicle from being stopped all the time).
parent f2171d2c
...@@ -56,8 +56,7 @@ class ControllerBase(PolicyBase): ...@@ -56,8 +56,7 @@ class ControllerBase(PolicyBase):
total_reward = 0 total_reward = 0
self.node_terminal_state_reached = False self.node_terminal_state_reached = False
while not self.node_terminal_state_reached: while not self.node_terminal_state_reached:
observation, reward, terminal, info = self.low_level_step_current_node( observation, reward, terminal, info = self.low_level_step_current_node()
)
if visualize_low_level_steps: if visualize_low_level_steps:
self.env.render() self.env.render()
total_reward += reward total_reward += reward
......
...@@ -29,7 +29,7 @@ class ManeuverBase(EpisodicEnvBase): ...@@ -29,7 +29,7 @@ class ManeuverBase(EpisodicEnvBase):
# _extra_action_weights_flag = True); note that a cost is defined # _extra_action_weights_flag = True); note that a cost is defined
# as a negative reward, so a cost will be summed up to the reward # as a negative reward, so a cost will be summed up to the reward
# with subtraction. # with subtraction.
_cost_weights = (1.0 * 1e-3, 1.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3, _cost_weights = (10.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3,
100.0 * 1e-3, 0.1 * 1e-3, 0.25 * 1e-3, 0.1 * 1e-3) 100.0 * 1e-3, 0.1 * 1e-3, 0.25 * 1e-3, 0.1 * 1e-3)
_extra_r_terminal = None _extra_r_terminal = None
...@@ -292,9 +292,8 @@ class ManeuverBase(EpisodicEnvBase): ...@@ -292,9 +292,8 @@ class ManeuverBase(EpisodicEnvBase):
raise NotImplemented(self.__class__.__name__ + raise NotImplemented(self.__class__.__name__ +
".generate_learning_scenario is not implemented.") ".generate_learning_scenario is not implemented.")
def generate_validation_scenario( # Override this method in the subclass if some customization is needed.
self def generate_validation_scenario(self):
): # Override this method in the subclass if some customization is needed.
self.generate_learning_scenario() self.generate_learning_scenario()
self._enable_low_level_training_properties = False self._enable_low_level_training_properties = False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment