maneuvers.py 22.1 KB
Newer Older
Aravind Bk's avatar
Aravind Bk committed
1 2 3 4 5 6 7
from .maneuver_base import ManeuverBase
from env.simple_intersection.constants import *
import env.simple_intersection.road_geokinemetry as rd
from env.simple_intersection.features import extract_ego_features, extract_other_veh_features
from model_checker.simple_intersection import LTLProperty

import numpy as np
Ashish Gaurav's avatar
Ashish Gaurav committed
8

Aravind Bk's avatar
Aravind Bk committed
9 10 11 12 13 14 15 16 17
# TODO: separate out into different files?? is it really needed?


class KeepLane(ManeuverBase):
    def _init_param(self):
        self._v_ref = rd.speed_limit
        self._target_lane = self.env.ego.APs['lane']

    def _init_LTL_preconditions(self):
18

19 20 21
        self._LTL_preconditions.append(
            LTLProperty("G ( not stopped_now )", 100,
                        self._enable_low_level_training_properties))
22

Ashish Gaurav's avatar
Ashish Gaurav committed
23 24 25
        self._LTL_preconditions.append(
            LTLProperty(
                "G ( (lane and target_lane) or (not lane and not target_lane) )",
26
                100, self._enable_low_level_training_properties))
Aravind Bk's avatar
Aravind Bk committed
27 28

    def generate_learning_scenario(self):
Ashish Gaurav's avatar
Ashish Gaurav committed
29 30
        self.generate_scenario(
            enable_LTL_preconditions=False,
31
            ego_pos_range=(rd.intersection_width_w_offset, rd.hlanes.end_pos),
Ashish Gaurav's avatar
Ashish Gaurav committed
32
            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
33
            v_max_multiplier=0.75,
Ashish Gaurav's avatar
Ashish Gaurav committed
34
            ego_heading_towards_lane_centre=True)
Aravind Bk's avatar
Aravind Bk committed
35
        # the goal reward and termination is led by the SimpleIntersectionEnv
36 37
        self.env._terminate_in_goal = False
        self.env._reward_in_goal = None
Aravind Bk's avatar
Aravind Bk committed
38
        self._enable_low_level_training_properties = True
39
        self._extra_action_weights_flag = True
Aravind Bk's avatar
Aravind Bk committed
40

41 42 43 44 45 46 47 48 49 50
    def generate_validation_scenario(self):
        self.generate_scenario(
            enable_LTL_preconditions=False,
            ego_pos_range=(rd.hlanes.start_pos, 0),
            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
            ego_heading_towards_lane_centre=True)
        # the goal reward and termination is led by the SimpleIntersectionEnv
        self.env._terminate_in_goal = False
        self.env._reward_in_goal = None

Aravind Bk's avatar
Aravind Bk committed
51 52
    @staticmethod
    def _features_dim_reduction(features_tuple):
53 54 55
        return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y',
                                    'psi', 'v tan(psi/L)', 'theta', 'lane',
                                    'acc', 'psi_dot')
56

57 58 59
    @property
    def initiation_condition(self):
        """a virtual function (property) from ManeuverBase.
60 61 62
            As KeepLane is a default maneuver, it has to be activated to be
            chosen at any time, state, and condition (refer to
            initiation_condition of ManeuverBase for the usual case).
63 64 65 66 67
            :returns True.
        """

        return True

68 69 70 71 72 73 74 75 76 77 78
    @property
    def extra_termination_condition(self):
        if self._enable_low_level_training_properties:  # activated only for the low-level training.
            if (self.env.ego.v < self._v_ref / 5) and self.env.ego.acc < 0:
                self._extra_r_terminal = -100
                return True
            else:
                self._extra_r_terminal = None
                return False

        return False
Aravind Bk's avatar
Aravind Bk committed
79 80


81
class Halt(ManeuverBase):
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117

    _terminate_in_goal = True
    _reward_in_goal = None

    _penalty_in_violation = None
    _ego_pos_range = (rd.intersection_width_w_offset, rd.hlanes.end_pos)

    def _init_param(self):
        self._v_ref = 0 if self._enable_low_level_training_properties else rd.speed_limit
        self._target_lane = self.env.ego.APs['lane']

    def _init_LTL_preconditions(self):

        self._LTL_preconditions.append(
            LTLProperty(
                "G ( (veh_ahead and before_but_close_to_stop_region) U highest_priority )",
                None, not self._enable_low_level_training_properties))

        self._LTL_preconditions.append(
            LTLProperty("G ( not stopped_now )", self._penalty(self._reward_in_goal),
                        not self._enable_low_level_training_properties))

        self._LTL_preconditions.append(
            LTLProperty(
                "G ( (lane and target_lane) or (not lane and not target_lane) )",
                100, self._enable_low_level_training_properties))

    def generate_learning_scenario(self):
        self.generate_scenario(
            ego_pos_range=self._ego_pos_range,
            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
            ego_heading_towards_lane_centre=True)
        self.env._terminate_in_goal = False
        self.env._reward_in_goal = None
        self._reward_in_goal = 200
        self._enable_low_level_training_properties = True
118
        self._extra_action_weights_flag = True
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151

    def generate_validation_scenario(self):
        self._ego_pos_range = (rd.hlanes.start_pos, rd.hlanes.end_pos)
        self.generate_learning_scenario()

    def _low_level_manual_policy(self):
        return self.env.aggressive_driving_policy(EGO_INDEX)

    @staticmethod
    def _features_dim_reduction(features_tuple):
        return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi',
                                    'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot')

    @property
    def extra_termination_condition(self):
        if self._enable_low_level_training_properties:  # activated only for the low-level training.
            if self.env.ego.APs['stopped_now']:
                if self._reward_in_goal is not None:
                    self._extra_r_terminal = self._reward_in_goal
                    self._extra_r_terminal *= np.exp(- pow(self.env.ego.theta, 2)
                                                     - pow(self.env.ego.y - rd.hlanes.centres[self._target_lane], 2)
                                                     - 0.25 * pow(self.env.ego.psi, 2))
                else:
                    self._extra_r_terminal = None
                return True

            else:
                self._extra_r_terminal = None
                return False

        return False


Aravind Bk's avatar
Aravind Bk committed
152 153 154 155 156
class Stop(ManeuverBase):

    _terminate_in_goal = True
    _reward_in_goal = None

157 158
    _penalty_in_violation = None

Aravind Bk's avatar
Aravind Bk committed
159 160 161 162 163
    def _init_param(self):
        self._set_v_ref()
        self._target_lane = self.env.ego.APs['lane']

    def _init_LTL_preconditions(self):
164
        self._LTL_preconditions.append(
165 166
            LTLProperty("G ( not has_stopped_in_stop_region )",
                        self._penalty(self._reward_in_goal), not self._enable_low_level_training_properties))
167

168
        # before_intersection rather than "before_but_close_to_stop_region or in_stop_region"?
Aravind Bk's avatar
Aravind Bk committed
169
        self._LTL_preconditions.append(
Ashish Gaurav's avatar
Ashish Gaurav committed
170 171
            LTLProperty(
                "G ( (before_but_close_to_stop_region or in_stop_region) U has_stopped_in_stop_region )",
172
                self._penalty_in_violation))
Aravind Bk's avatar
Aravind Bk committed
173 174

        self._LTL_preconditions.append(
175
            LTLProperty("G ( not stopped_now U in_stop_region )", 100,
Aravind Bk's avatar
Aravind Bk committed
176 177 178
                        self._enable_low_level_training_properties))

        self._LTL_preconditions.append(
Ashish Gaurav's avatar
Ashish Gaurav committed
179 180
            LTLProperty(
                "G ( (lane and target_lane) or (not lane and not target_lane) )",
181
                100, self._enable_low_level_training_properties))
Aravind Bk's avatar
Aravind Bk committed
182 183 184 185 186 187

    def _update_param(self):
        self._set_v_ref()

    def _set_v_ref(self):
        self._v_ref = rd.speed_limit
188
        #if self._enable_low_level_training_properties:
Aravind Bk's avatar
Aravind Bk committed
189
        x = self.env.ego.x
190 191 192
        if rd.hlanes.near_stop_region < x <= rd.hlanes.stop_region_centre:
            self._v_ref = -(rd.speed_limit / abs(rd.hlanes.near_stop_region)) * (x - rd.hlanes.stop_region_centre)
        elif x > rd.hlanes.stop_region_centre:
Aravind Bk's avatar
Aravind Bk committed
193 194 195
            self._v_ref = 0

    def generate_learning_scenario(self):
Ashish Gaurav's avatar
Ashish Gaurav committed
196 197 198 199
        self.generate_scenario(
            ego_pos_range=(rd.hlanes.near_stop_region,
                           -rd.intersection_width_w_offset / 2),
            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
200
            v_max_multiplier=0.75,
Ashish Gaurav's avatar
Ashish Gaurav committed
201
            ego_heading_towards_lane_centre=True)
Aravind Bk's avatar
Aravind Bk committed
202
        self._reward_in_goal = 200
203
        self._penalty_in_violation = 150
Aravind Bk's avatar
Aravind Bk committed
204
        self._enable_low_level_training_properties = True
205
        self._extra_action_weights_flag = True
Aravind Bk's avatar
Aravind Bk committed
206 207 208 209 210 211

    def _low_level_manual_policy(self):
        return self.env.aggressive_driving_policy(EGO_INDEX)

    @staticmethod
    def _features_dim_reduction(features_tuple):
Ashish Gaurav's avatar
Ashish Gaurav committed
212
        return extract_ego_features(features_tuple, 'pos_near_stop_region',
213 214 215 216 217
                                    'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc',
                                    'psi_dot', 'pos_stop_region', 'not_in_stop_region')

    @property
    def extra_termination_condition(self):
218 219 220 221 222 223 224 225 226 227
        if self._enable_low_level_training_properties:  # activated only for the low-level training.
            if self.env.ego.APs['has_stopped_in_stop_region']:
                if self._reward_in_goal is not None:
                    self._extra_r_terminal = self._reward_in_goal
                    self._extra_r_terminal *= np.exp(- pow(self.env.ego.theta, 2)
                                                     - pow(self.env.ego.y - rd.hlanes.centres[self._target_lane], 2)
                                                     - 0.25 * pow(self.env.ego.psi, 2))
                else:
                    self._extra_r_terminal = None
                return True
228

229
            elif (rd.speed_limit / 5 < self._v_ref) and \
230 231 232 233 234 235 236 237 238
                    (self.env.ego.v < self._v_ref / 2) and self.env.ego.acc < 0:
                self._extra_r_terminal = -100
                return True

            else:
                self._extra_r_terminal = None
                return False

        return False
Aravind Bk's avatar
Aravind Bk committed
239 240 241 242 243


class Wait(ManeuverBase):

    _reward_in_goal = None
244
    _terminate_in_goal = True
Aravind Bk's avatar
Aravind Bk committed
245 246 247

    def _init_LTL_preconditions(self):

248
        self._LTL_preconditions.append(
249
            LTLProperty("G ( (in_stop_region and stopped_now) U (highest_priority and intersection_is_clear))",
250
                        None, not self._enable_low_level_training_properties))  # not available in low-level training...
251

252
        self._LTL_preconditions.append(
253
            LTLProperty("G ( not (in_intersection and highest_priority and intersection_is_clear) )",
254
                        self._penalty(self._reward_in_goal)))
Aravind Bk's avatar
Aravind Bk committed
255

256 257
        self._LTL_preconditions.append(
           LTLProperty(
258 259
               "G ( in_stop_region U (highest_priority and intersection_is_clear) )", 150,
               self._enable_low_level_training_properties))
260

261 262 263
        self._LTL_preconditions.append(
           LTLProperty(
               "G ( (lane and target_lane) or (not lane and not target_lane) )",
264
               150, self._enable_low_level_training_properties))
265

266
    def _init_param(self):
267
        self._update_param()
268
        self._target_lane = self.env.ego.APs['lane']
Aravind Bk's avatar
Aravind Bk committed
269

270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
    def _update_param(self):
        if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear']:
            self._v_ref = rd.speed_limit
        else:
            self._v_ref = 0

    def generate_learning_scenario(self):
        n_others = 0 if np.random.rand() <= 0 else np.random.randint(1, 4)
        self.generate_scenario(
            n_others_range=(n_others, n_others),
            ego_pos_range=rd.hlanes.stop_region,
            n_others_stopped_in_stop_region=n_others,
            ego_v_upper_lim=0,
            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
            ego_heading_towards_lane_centre=True)

        max_waited_count = 0
        min_waited_count = 1
        for veh in self.env.vehs[1:]:
            max_waited_count = max(max_waited_count, veh.waited_count)
            min_waited_count = min(min_waited_count, veh.waited_count)

        min_waited_count = min(min_waited_count, max_waited_count)
        self._extra_action_weights_flag = False

        if np.random.rand() <= 0.5:
            self.env.ego.waited_count = np.random.randint(0, min_waited_count+1)
        else:
            self.env.ego.waited_count = np.random.randint(min_waited_count, max_waited_count + 21)
299

300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
        self.env.init_APs(False)
        self._reward_in_goal = 200
        self._enable_low_level_training_properties = True
        self._extra_action_weights_flag = True

    @property
    def extra_termination_condition(self):
        if self._enable_low_level_training_properties:  # activated only for the low-level training.
            if self.env.ego.APs['highest_priority'] and self.env.ego.APs['intersection_is_clear'] \
                    and np.random.rand() <= 0.1 and self.env.ego.v <= self._v_ref / 10 \
                    and self.env.ego.acc < 0:
                self._extra_r_terminal = - 100
                return True
            else:
                self._extra_r_terminal = None
                return False

        return False

    @staticmethod
    def _features_dim_reduction(features_tuple):
        return extract_ego_features(
            features_tuple, 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)', 'theta', 'lane', 'acc', 'psi_dot',
            'pos_stop_region', 'intersection_is_clear', 'highest_priority')
Aravind Bk's avatar
Aravind Bk committed
324

325

326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
class ManualWait(ManeuverBase):
    _reward_in_goal = None
    _terminate_in_goal = True

    def _init_LTL_preconditions(self):
        self._LTL_preconditions.append(
            LTLProperty("G ( (in_stop_region and stopped_now) and not (highest_priority and intersection_is_clear))",
                    None, not self._enable_low_level_training_properties))  # not available in low-level training...

        self._LTL_preconditions.append(
            LTLProperty("G ( in_stop_region U (highest_priority and intersection_is_clear) )",
                        150, self._enable_low_level_training_properties))

        self._LTL_preconditions.append(
            LTLProperty("G ( (lane and target_lane) or (not lane and not target_lane) )",
                        100, self._enable_low_level_training_properties))

    def _init_param(self):
        self._v_ref = 0
        self._target_lane = self.env.ego.APs['lane']

    def _low_level_manual_policy(self):
        return (0, 0)  # Do nothing during "Wait" but just wait until the highest priority is given.


351
class Left(ManeuverBase):
352

353
    min_y_distance = rd.hlanes.width / 4
Aravind Bk's avatar
Aravind Bk committed
354

355 356
    _terminate_in_goal = True
    _reward_in_goal = None
357

358 359 360 361
    def _init_param(self):
        self._v_ref = rd.speed_limit
        self._target_lane = False
        self._terminate_in_goal = True
362 363

    @property
364 365 366 367 368 369 370 371
    def goal_achieved(self):
        ego = self.env.ego
        APs = self.env.ego.APs
        on_other_lane = APs['lane'] == self._target_lane
        achieved_y_displacement = np.sign(ego.y) * \
                                  (ego.y - rd.hlanes.centres[APs['target_lane']]) >= - self.min_y_distance
        return on_other_lane and APs['on_route'] and \
               achieved_y_displacement and APs['parallel_to_lane']
Aravind Bk's avatar
Aravind Bk committed
372

373 374 375
    @property
    def extra_initiation_condition(self):
        return self.env.ego.APs['lane']
376

Aravind Bk's avatar
Aravind Bk committed
377 378
    @staticmethod
    def _features_dim_reduction(features_tuple):
379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
        return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi',
                                    'v tan(psi/L)', 'theta', 'lane', 'acc',
                                    'psi_dot')


class Right(Left):

    def _init_param(self):
        self._v_ref = rd.speed_limit
        self._target_lane = True
        self._terminate_in_goal = True

    @property
    def extra_initiation_condition(self):
        return not self.env.ego.APs['lane']
Aravind Bk's avatar
Aravind Bk committed
394 395 396 397 398 399 400 401 402 403 404


class ChangeLane(ManeuverBase):

    min_y_distance = rd.hlanes.width / 4

    _terminate_in_goal = True
    _reward_in_goal = None

    _violation_penalty_in_low_level_training = None

405
    high_level_extra_reward = -50
Aravind Bk's avatar
Aravind Bk committed
406 407 408 409 410 411 412

    def _init_param(self):
        self._v_ref = rd.speed_limit
        self._target_lane = not self.env.ego.APs['lane']

    def _init_LTL_preconditions(self):
        self._LTL_preconditions.append(
413 414 415
           LTLProperty("G ( on_route and not over_speed_limit )",
                       self._violation_penalty_in_low_level_training,
                       self._enable_low_level_training_properties))
416

Ashish Gaurav's avatar
Ashish Gaurav committed
417
        self._LTL_preconditions.append(
418 419 420 421 422 423
           LTLProperty("G ( not stopped_now )",
                       100, self._enable_low_level_training_properties))

        self._LTL_preconditions.append(
           LTLProperty("G ( not in_intersection and not in_stop_region )",
                       None, not self._enable_low_level_training_properties))  # activated only for the high-level case.
Ashish Gaurav's avatar
Ashish Gaurav committed
424

Aravind Bk's avatar
Aravind Bk committed
425 426 427 428 429 430
    @property
    def goal_achieved(self):
        ego = self.env.ego
        APs = self.env.ego.APs
        on_other_lane = APs['lane'] == self._target_lane
        achieved_y_displacement = np.sign(ego.y) * \
431
                                   (ego.y - rd.hlanes.centres[APs['target_lane']]) >= - self.min_y_distance
Aravind Bk's avatar
Aravind Bk committed
432
        return on_other_lane and APs['on_route'] and \
433
            achieved_y_displacement and APs['parallel_to_lane']
Aravind Bk's avatar
Aravind Bk committed
434 435 436 437 438

    def _low_level_manual_policy(self):
        return self.env.aggressive_driving_policy(EGO_INDEX)

    def generate_learning_scenario(self):
Ashish Gaurav's avatar
Ashish Gaurav committed
439 440 441
        self.generate_scenario(
            enable_LTL_preconditions=False,
            timeout=15,
442
            ego_pos_range=(rd.intersection_width_w_offset, rd.hlanes.end_pos),
Ashish Gaurav's avatar
Ashish Gaurav committed
443
            ego_lane=np.random.choice([0, 1]),
444 445 446
            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6),
            v_max_multiplier=0.75)

Aravind Bk's avatar
Aravind Bk committed
447 448
        # print('our range was %s, %s, ego at %s' % (before_intersection, after_intersection, self.env.ego.x))
        self._reward_in_goal = 200
449 450
        self._violation_penalty_in_low_level_training = 150
        self._enable_low_level_training_properties = True
451
        self._extra_action_weights_flag = True
452 453 454 455 456 457 458 459 460 461

    def generate_validation_scenario(self):
        self.generate_scenario(
            enable_LTL_preconditions=False,
            ego_pos_range=(rd.hlanes.start_pos, rd.hlanes.end_pos),
            ego_perturb_lim=(rd.hlanes.width / 4, np.pi / 6))

        # print('our range was %s, %s, ego at %s' % (before_intersection, after_intersection, self.env.ego.x))
        self._reward_in_goal = 200
        self._violation_penalty_in_low_level_training = 150
Aravind Bk's avatar
Aravind Bk committed
462 463 464 465
        self._enable_low_level_training_properties = True

    @staticmethod
    def _features_dim_reduction(features_tuple):
Ashish Gaurav's avatar
Ashish Gaurav committed
466 467 468
        return extract_ego_features(features_tuple, 'v', 'v_ref', 'e_y', 'psi',
                                    'v tan(psi/L)', 'theta', 'lane', 'acc',
                                    'psi_dot')
Aravind Bk's avatar
Aravind Bk committed
469 470


471
# TODO: In the low-level training of Follow, sometime the initial state doesn't satisfy its initiation condition.
Aravind Bk's avatar
Aravind Bk committed
472 473
class Follow(ManeuverBase):

474 475
    _reward_in_goal = None

Aravind Bk's avatar
Aravind Bk committed
476
    _target_veh_i = None
477 478
    _penalty_for_out_of_range = None
    _penalty_for_change_lane = None
Aravind Bk's avatar
Aravind Bk committed
479 480 481

    def _init_LTL_preconditions(self):
        self._LTL_preconditions.append(
482
            LTLProperty("G ( veh_ahead U (in_stop_region or before_but_close_to_stop_region ) )", self._penalty_for_out_of_range))
Aravind Bk's avatar
Aravind Bk committed
483 484

        self._LTL_preconditions.append(
485 486 487
             LTLProperty(
                 "G ( (lane and target_lane) or (not lane and not target_lane) )",
                 self._penalty_for_change_lane))
488

489 490 491 492 493 494 495 496
        self._LTL_preconditions.append(
             LTLProperty(
                 "G ( not (has_stopped_in_stop_region and in_stop_region ) )",
                 None))

        #self._LTL_preconditions.append(
        #   LTLProperty("G ( not stopped_now U (veh_ahead_stopped_now or (in_stop_region and stopped_now )) )", 200,
        #               self._enable_low_level_training_properties))
Aravind Bk's avatar
Aravind Bk committed
497 498

        self._LTL_preconditions.append(
499 500
           LTLProperty("G ( not veh_ahead_too_close )", self._penalty_for_out_of_range,
                       self._enable_low_level_training_properties))
Aravind Bk's avatar
Aravind Bk committed
501 502

    def generate_learning_scenario(self):
Ashish Gaurav's avatar
Ashish Gaurav committed
503
        self.generate_scenario(
504
            n_others_range=(1, 6),
505 506
            v_max_multiplier=0.75,
            ego_perturb_lim=(0, 0),
Ashish Gaurav's avatar
Ashish Gaurav committed
507
            veh_ahead_scenario=True)
Aravind Bk's avatar
Aravind Bk committed
508
        self.env._terminate_in_goal = False
509
        self._penalty_for_out_of_range = 200
510
        self._penalty_for_change_lane = 200
Aravind Bk's avatar
Aravind Bk committed
511
        self._enable_low_level_training_properties = True
512
        self._extra_action_weights_flag = True
Aravind Bk's avatar
Aravind Bk committed
513

514 515 516
    def generate_validation_scenario(self):
        self.generate_learning_scenario()

517 518 519
    def _init_param(self):
        self._set_v_ref()

Aravind Bk's avatar
Aravind Bk committed
520
    def _update_param(self):
521 522 523
        self._set_v_ref()

    def _set_v_ref(self):
524
        self._target_veh_i, _ = self.env.get_V2V_distance()
Aravind Bk's avatar
Aravind Bk committed
525

526
        if self._target_veh_i is not None:
527 528 529 530 531 532 533 534 535 536 537 538
            if self.env.ego.APs['before_intersection'] and \
                    not self.env.vehs[self._target_veh_i].APs['before_intersection']:
                x = self.env.ego.x
                if rd.hlanes.near_stop_region >= x:
                    self._v_ref = rd.speed_limit
                elif rd.hlanes.near_stop_region < x <= rd.hlanes.stop_region_centre:
                    self._v_ref = -(rd.speed_limit / abs(rd.hlanes.near_stop_region)) * (
                                x - rd.hlanes.stop_region_centre)
                elif x > rd.hlanes.stop_region_centre:
                    self._v_ref = 0
            else:
                self._v_ref = self.env.vehs[self._target_veh_i].v
539 540
        else:
            self._v_ref = 0
541

Aravind Bk's avatar
Aravind Bk committed
542 543 544
    def _low_level_manual_policy(self):
        return self.env.aggressive_driving_policy(EGO_INDEX)

545 546 547 548 549
    @property
    def extra_termination_condition(self):
        if self._target_veh_i is None:
            return False

550
        if self._enable_low_level_training_properties:  # activated only for the low-level training.
551 552
            APs = self.env.ego.APs
            if (rd.speed_limit / 5 < self._v_ref) and \
553 554
                    (self.env.ego.v < self._v_ref / 2) and \
                    self.env.ego.acc < 0 and \
555 556 557 558
                    not APs['veh_ahead_stopped_now'] and \
                    not APs['in_stop_region']:
                    self._extra_r_terminal = -100
                    return True
559 560 561 562 563

            else:
                self._extra_r_terminal = None
                return False

564 565
        return False

Aravind Bk's avatar
Aravind Bk committed
566
    def _features_dim_reduction(self, features_tuple):
Ashish Gaurav's avatar
Ashish Gaurav committed
567
        ego_features = extract_ego_features(
568
            features_tuple, 'pos_near_stop_region', 'v', 'v_ref', 'e_y', 'psi', 'v tan(psi/L)',
569
            'theta', 'lane', 'acc', 'psi_dot', 'pos_stop_region', 'not_in_stop_region')
570

Aravind Bk's avatar
Aravind Bk committed
571
        if self._target_veh_i is not None:
Ashish Gaurav's avatar
Ashish Gaurav committed
572
            return ego_features + extract_other_veh_features(
573
                features_tuple, self._target_veh_i, 'rel_x', 'rel_y', 'v', 'acc')
Aravind Bk's avatar
Aravind Bk committed
574
        else:
575
            return ego_features + (0.0, 0.0, 0.0, 0.0)