|  | 
| 16 | 16 |     "max_decel": 1, | 
| 17 | 17 |     # whether we use an obs space that contains adjacent lane info or just the lead obs | 
| 18 | 18 |     "lead_obs": True, | 
|  | 19 | +    # whether the reward should come from local vehicles instead of global rewards | 
|  | 20 | +    "local_reward": True | 
| 19 | 21 | } | 
| 20 | 22 | 
 | 
| 21 | 23 | 
 | 
| @@ -137,35 +139,47 @@ def compute_reward(self, rl_actions, **kwargs): | 
| 137 | 139 |             return {} | 
| 138 | 140 | 
 | 
| 139 | 141 |         rewards = {} | 
| 140 |  | -        for rl_id in self.k.vehicle.get_rl_ids(): | 
| 141 |  | -            if self.env_params.evaluate: | 
| 142 |  | -                # reward is speed of vehicle if we are in evaluation mode | 
| 143 |  | -                reward = self.k.vehicle.get_speed(rl_id) | 
| 144 |  | -            elif kwargs['fail']: | 
| 145 |  | -                # reward is 0 if a collision occurred | 
| 146 |  | -                reward = 0 | 
| 147 |  | -            else: | 
| 148 |  | -                # reward high system-level velocities | 
| 149 |  | -                cost1 = average_velocity(self, fail=kwargs['fail']) | 
| 150 |  | - | 
| 151 |  | -                # penalize small time headways | 
| 152 |  | -                cost2 = 0 | 
| 153 |  | -                t_min = 1  # smallest acceptable time headway | 
| 154 |  | - | 
| 155 |  | -                lead_id = self.k.vehicle.get_leader(rl_id) | 
| 156 |  | -                if lead_id not in ["", None] \ | 
| 157 |  | -                        and self.k.vehicle.get_speed(rl_id) > 0: | 
| 158 |  | -                    t_headway = max( | 
| 159 |  | -                        self.k.vehicle.get_headway(rl_id) / | 
| 160 |  | -                        self.k.vehicle.get_speed(rl_id), 0) | 
| 161 |  | -                    cost2 += min((t_headway - t_min) / t_min, 0) | 
| 162 |  | - | 
| 163 |  | -                # weights for cost1, cost2, and cost3, respectively | 
| 164 |  | -                eta1, eta2 = 1.00, 0.10 | 
| 165 |  | - | 
| 166 |  | -                reward = max(eta1 * cost1 + eta2 * cost2, 0) | 
| 167 |  | - | 
| 168 |  | -            rewards[rl_id] = reward | 
|  | 142 | +        if self.env_params.additional_params["local_reward"]: | 
|  | 143 | +            for rl_id in self.k.vehicle.get_rl_ids(): | 
|  | 144 | +                rewards[rl_id] = 0 | 
|  | 145 | +                speeds = [] | 
|  | 146 | +                follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id)) | 
|  | 147 | +                speeds.extend([speed for speed in follow_speed if speed >= 0]) | 
|  | 148 | +                if self.k.vehicle.get_speed(rl_id) >= 0: | 
|  | 149 | +                    speeds.append(self.k.vehicle.get_speed(rl_id)) | 
|  | 150 | +                if len(speeds) > 0: | 
|  | 151 | +                    # rescale so the q function can estimate it quickly | 
|  | 152 | +                    rewards[rl_id] = np.mean(speeds) / 500.0 | 
|  | 153 | +        else: | 
|  | 154 | +            for rl_id in self.k.vehicle.get_rl_ids(): | 
|  | 155 | +                if self.env_params.evaluate: | 
|  | 156 | +                    # reward is speed of vehicle if we are in evaluation mode | 
|  | 157 | +                    reward = self.k.vehicle.get_speed(rl_id) | 
|  | 158 | +                elif kwargs['fail']: | 
|  | 159 | +                    # reward is 0 if a collision occurred | 
|  | 160 | +                    reward = 0 | 
|  | 161 | +                else: | 
|  | 162 | +                    # reward high system-level velocities | 
|  | 163 | +                    cost1 = average_velocity(self, fail=kwargs['fail']) | 
|  | 164 | + | 
|  | 165 | +                    # penalize small time headways | 
|  | 166 | +                    cost2 = 0 | 
|  | 167 | +                    t_min = 1  # smallest acceptable time headway | 
|  | 168 | + | 
|  | 169 | +                    lead_id = self.k.vehicle.get_leader(rl_id) | 
|  | 170 | +                    if lead_id not in ["", None] \ | 
|  | 171 | +                            and self.k.vehicle.get_speed(rl_id) > 0: | 
|  | 172 | +                        t_headway = max( | 
|  | 173 | +                            self.k.vehicle.get_headway(rl_id) / | 
|  | 174 | +                            self.k.vehicle.get_speed(rl_id), 0) | 
|  | 175 | +                        cost2 += min((t_headway - t_min) / t_min, 0) | 
|  | 176 | + | 
|  | 177 | +                    # weights for cost1, cost2, and cost3, respectively | 
|  | 178 | +                    eta1, eta2 = 1.00, 0.10 | 
|  | 179 | + | 
|  | 180 | +                    reward = max(eta1 * cost1 + eta2 * cost2, 0) | 
|  | 181 | + | 
|  | 182 | +                rewards[rl_id] = reward | 
| 169 | 183 |         return rewards | 
| 170 | 184 | 
 | 
| 171 | 185 |     def additional_command(self): | 
|  | 
0 commit comments