Commit 70ad9bf5 authored by Jae Young Lee's avatar Jae Young Lee

Successful low-level policies training except Wait maneuver.

Each low level policy was retrained with better LTL conditions and rewards, some parts of which are also designed to encourage exploration (to prevent the vehicle from being stopped all the time).
parent f2171d2c
......@@ -56,8 +56,7 @@ class ControllerBase(PolicyBase):
total_reward = 0
self.node_terminal_state_reached = False
while not self.node_terminal_state_reached:
observation, reward, terminal, info = self.low_level_step_current_node(
)
observation, reward, terminal, info = self.low_level_step_current_node()
if visualize_low_level_steps:
self.env.render()
total_reward += reward
......
......@@ -29,7 +29,7 @@ class ManeuverBase(EpisodicEnvBase):
# _extra_action_weights_flag = True); note that a cost is defined
# as a negative reward, so a cost will be summed up to the reward
# with subtraction.
_cost_weights = (1.0 * 1e-3, 1.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3,
_cost_weights = (10.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3,
100.0 * 1e-3, 0.1 * 1e-3, 0.25 * 1e-3, 0.1 * 1e-3)
_extra_r_terminal = None
......@@ -292,9 +292,8 @@ class ManeuverBase(EpisodicEnvBase):
raise NotImplemented(self.__class__.__name__ +
".generate_learning_scenario is not implemented.")
def generate_validation_scenario(
self
): # Override this method in the subclass if some customization is needed.
# Override this method in the subclass if some customization is needed.
def generate_validation_scenario(self):
self.generate_learning_scenario()
self._enable_low_level_training_properties = False
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment