Bug fix and improve MCTS and others
6 unresolved threads
6 unresolved threads
- Fixed error with mean and std
- Improved MCTS.
- Fixed a bug of starting with start_node;
- Fixed the miscalculation of mean and std;
- Added timeout to prevent infinite-loop.
- Changed output format in mcts.py
- updated and tested mcts.py
- Refactoring (env -> worlds)
- Change AP ('before_but_close_to_stop_region' --> 'close_to_stop_region')
Merge request reports
Activity
129 129 print("Termination reason(s):") 130 130 for reason, count_list in termination_reason_list.items(): 131 131 count_list = np.array(count_list) 132 while count_list.size != nb_trials: 133 count_list = np.append(count_list,0) 134 Fixed the mean and std issue.
Edited by Jae Young Lee
73 74 debug=debug) 74 75 75 76 # Evaluate 76 print("\nConducting {} trials of {} episodes each".format( 77 nb_trials, nb_episodes)) 77 print("\nConducting {} trials of {} episodes each".format(nb_trials, nb_episodes)) 78 timeout = 40 # 40 sec. timeout for each episode 96 step_current_node(visualize_low_level_steps=visualize) 97 episode_reward += R 98 # print('Intermediate Reward: %f (ego x = %f)' % 99 # (R, options.env.vehs[0].x)) 100 # print('') 101 if terminal: 102 if 'episode_termination_reason' in info: 103 termination_reason = info['episode_termination_reason'] 104 if termination_reason in trial_termination_reason_counter: 105 trial_termination_reason_counter[termination_reason] += 1 106 else: 107 trial_termination_reason_counter[termination_reason] = 1 108 if options.controller.can_transition(): 109 options.controller.do_transition() 90 t = 0 91 while True: Changed due to the issue of starting with start node (start option).
Edited by Jae Young Lee
106 else: 107 trial_termination_reason_counter[termination_reason] = 1 108 if options.controller.can_transition(): 109 options.controller.do_transition() 90 t = 0 91 while True: 92 options.controller.do_transition() 93 94 features, R, terminal, info = options.controller.step_current_node(visualize_low_level_steps=visualize) 95 episode_reward += R 96 t += DT 97 # print('Intermediate Reward: %f (ego x = %f)' % 98 # (R, options.env.vehs[0].x)) 99 # print('') 100 101 if terminal or t > timeout: 127 129 print("Trial {} Termination reason(s):".format(num_tr)) 128 for reason, count_list in trial_termination_reason_counter.items(): 129 count_list = np.array(count_list) 130 print("{}: Avg: {}, Std: {}".format(reason, np.mean(count_list), 131 np.std(count_list))) 130 for reason, count in trial_termination_reason_counter.items(): 131 print("{}: {}".format(reason, count)) 132 132 print("\n") 133 133 134 134 overall_reward_list += reward_list 135 overall_success_accuracy += [num_successes * 1.0 / nb_episodes] 135 overall_success_percent_list += [num_successes * 100.0 / nb_episodes] 136 136 137 137 print("===========================") 138 print('Overall: Reward = (Avg: {}, Std: {}), Success = (Avg: {}, Std: {})\n'.\ 138 print('Overall: Reward = (Avg: {:.2f}, Std: {:.2f}), Success = (Avg: {:.2f}, Std: {:.2f})\n'.\ 138 print('Overall: Reward = (Avg: {}, Std: {}), Success = (Avg: {}, Std: {})\n'.\ 138 print('Overall: Reward = (Avg: {:.2f}, Std: {:.2f}), Success = (Avg: {:.2f}, Std: {:.2f})\n'.\ 139 139 format(np.mean(overall_reward_list), np.std(overall_reward_list), 140 np.mean(overall_success_accuracy), np.std(overall_success_accuracy))) 140 np.mean(overall_success_percent_list), np.std(overall_success_percent_list))) 141 141 142 142 print("Termination reason(s):") 143 143 for reason, count_list in overall_termination_reason_list.items(): 144 144 count_list = np.array(count_list) 145 print("{}: Avg: {}, Std: {}".format(reason, np.mean(count_list), 146 np.std(count_list))) 145 while count_list.size != nb_trials: 146 count_list = np.append(count_list, 0) 147 148 print("{}: Avg: {:.2f}, Std: {:.2f}".format(reason, np.mean(count_list), np.std(count_list))) 149 mentioned in commit 6b50d1d1
Please register or sign in to reply