Skip to content
Snippets Groups Projects
Commit 70ad9bf5 authored by Jae Young Lee's avatar Jae Young Lee
Browse files

Successful low-level policies training except Wait maneuver.

Each low level policy was retrained with better LTL conditions and rewards, some parts of which are also designed to encourage exploration (to prevent the vehicle from being stopped all the time).
parent f2171d2c
No related branches found
No related tags found
No related merge requests found
Showing
with 4 additions and 6 deletions
...@@ -56,8 +56,7 @@ class ControllerBase(PolicyBase): ...@@ -56,8 +56,7 @@ class ControllerBase(PolicyBase):
total_reward = 0 total_reward = 0
self.node_terminal_state_reached = False self.node_terminal_state_reached = False
while not self.node_terminal_state_reached: while not self.node_terminal_state_reached:
observation, reward, terminal, info = self.low_level_step_current_node( observation, reward, terminal, info = self.low_level_step_current_node()
)
if visualize_low_level_steps: if visualize_low_level_steps:
self.env.render() self.env.render()
total_reward += reward total_reward += reward
......
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
...@@ -29,7 +29,7 @@ class ManeuverBase(EpisodicEnvBase): ...@@ -29,7 +29,7 @@ class ManeuverBase(EpisodicEnvBase):
# _extra_action_weights_flag = True); note that a cost is defined # _extra_action_weights_flag = True); note that a cost is defined
# as a negative reward, so a cost will be summed up to the reward # as a negative reward, so a cost will be summed up to the reward
# with subtraction. # with subtraction.
_cost_weights = (1.0 * 1e-3, 1.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3, _cost_weights = (10.0 * 1e-3, 10.0 * 1e-3, 0.25 * 1e-3, 1.0 * 1e-3,
100.0 * 1e-3, 0.1 * 1e-3, 0.25 * 1e-3, 0.1 * 1e-3) 100.0 * 1e-3, 0.1 * 1e-3, 0.25 * 1e-3, 0.1 * 1e-3)
_extra_r_terminal = None _extra_r_terminal = None
...@@ -292,9 +292,8 @@ class ManeuverBase(EpisodicEnvBase): ...@@ -292,9 +292,8 @@ class ManeuverBase(EpisodicEnvBase):
raise NotImplemented(self.__class__.__name__ + raise NotImplemented(self.__class__.__name__ +
".generate_learning_scenario is not implemented.") ".generate_learning_scenario is not implemented.")
def generate_validation_scenario( # Override this method in the subclass if some customization is needed.
self def generate_validation_scenario(self):
): # Override this method in the subclass if some customization is needed.
self.generate_learning_scenario() self.generate_learning_scenario()
self._enable_low_level_training_properties = False self._enable_low_level_training_properties = False
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment