Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 48 additions & 23 deletions gym-unity/gym_unity/envs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import itertools
import numpy as np
from typing import Any, Dict, List, Optional, Tuple, Union, Set
from typing import Any, Dict, List, Optional, Tuple, Union

import gym
from gym import error, spaces
Expand Down Expand Up @@ -74,7 +74,10 @@ def __init__(

self.visual_obs = None
self._n_agents = -1
self._done_agents: Set[int] = set()

self._gym_id_order: List[int] = []
self._done_agents_index_to_last_reward: Dict[int, float] = {}

# Save the step result from the last time all Agents requested decisions.
self._previous_step_result: BatchedStepResult = None
self._multiagent = multiagent
Expand Down Expand Up @@ -121,6 +124,7 @@ def __init__(
step_result = self._env.get_step_result(self.brain_name)
self._check_agents(step_result.n_agents())
self._previous_step_result = step_result
self._gym_id_order = list(self._previous_step_result.agent_id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way we could make _gym_id_order a dict of agent_id to index instead of a List? That way we don't have to do O(N) index operations

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably don't want to do this but it might work: https://stackoverflow.com/questions/1456373/two-way-reverse-map

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, you would have to go both ways : id to index and index to id. Having a list id implicitly a dict from index to id.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see so you'd have to do the two way dict to make it O(1) in both directions. If this code is only called when an agent is Done it might be OK. Could be horrific for 1000's of agents though.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's worth trying to remove these index calls if possible. But getting tests first is more important, then you can optimize.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added 2 tests for sanitize info


# Set observation and action spaces
if self.group_spec.is_action_discrete():
Expand Down Expand Up @@ -368,52 +372,69 @@ def _sanitize_info(self, step_result: BatchedStepResult) -> BatchedStepResult:
"The number of agents in the scene does not match the expected number."
)

# remove the done Agents
indices_to_keep: List[int] = []
for index, is_done in enumerate(step_result.done):
if not is_done:
indices_to_keep.append(index)
if step_result.n_agents() - sum(step_result.done) != self._n_agents:
raise UnityGymException(
"The number of agents in the scene does not match the expected number."
)

for index, agent_id in enumerate(step_result.agent_id):
if step_result.done[index]:
gym_index = self._gym_id_order.index(agent_id)
self._done_agents_index_to_last_reward[gym_index] = step_result.reward[
index
]
self._gym_id_order[gym_index] = -1 # no agent at that index

# Set the new AgentDone flags to True
# Note that the corresponding agent_id that gets marked done will be different
# than the original agent that was done, but this is OK since the gym interface
# only cares about the ordering.
for index, agent_id in enumerate(step_result.agent_id):
if not self._previous_step_result.contains_agent(agent_id):
# insert the id in the current id list:
original_index = self._gym_id_order.index(-1)
self._gym_id_order[original_index] = agent_id
# This is a new agent
step_result.done[index] = True
if agent_id in self._done_agents:
step_result.done[index] = True
self._done_agents = set()
# The index of the agent among not-done agents is
original_index = self._gym_id_order.index(agent_id)
step_result.reward[index] = self._done_agents_index_to_last_reward[
original_index
]
self._done_agents_index_to_last_reward = {}

self._previous_step_result = step_result # store the new original

new_id_order = []
agent_id_list = list(step_result.agent_id)
for agent_id in self._gym_id_order:
new_id_order.append(agent_id_list.index(agent_id))

_mask: Optional[List[np.array]] = None
if step_result.action_mask is not None:
_mask = []
for mask_index in range(len(step_result.action_mask)):
_mask.append(step_result.action_mask[mask_index][indices_to_keep])
_mask.append(step_result.action_mask[mask_index][new_id_order])
new_obs: List[np.array] = []
for obs_index in range(len(step_result.obs)):
new_obs.append(step_result.obs[obs_index][indices_to_keep])
new_obs.append(step_result.obs[obs_index][new_id_order])
return BatchedStepResult(
obs=new_obs,
reward=step_result.reward[indices_to_keep],
done=step_result.done[indices_to_keep],
max_step=step_result.max_step[indices_to_keep],
agent_id=step_result.agent_id[indices_to_keep],
reward=step_result.reward[new_id_order],
done=step_result.done[new_id_order],
max_step=step_result.max_step[new_id_order],
agent_id=step_result.agent_id[new_id_order],
action_mask=_mask,
)

def _sanitize_action(self, action: np.array) -> np.array:
if self._previous_step_result.n_agents() == self._n_agents:
return action
sanitized_action = np.zeros(
(self._previous_step_result.n_agents(), self.group_spec.action_size)
)
input_index = 0
for index in range(self._previous_step_result.n_agents()):
for index, agent_id in enumerate(self._previous_step_result.agent_id):
if not self._previous_step_result.done[index]:
sanitized_action[index, :] = action[input_index, :]
input_index = input_index + 1
array_index = self._gym_id_order.index(agent_id)
sanitized_action[index, :] = action[array_index, :]
return sanitized_action

def _step(self, needs_reset: bool = False) -> BatchedStepResult:
Expand All @@ -432,7 +453,11 @@ def _step(self, needs_reset: bool = False) -> BatchedStepResult:
"The environment does not have the expected amount of agents."
+ "Some agents did not request decisions at the same time."
)
self._done_agents.update(list(info.agent_id))
for agent_id, reward in zip(info.agent_id, info.reward):
gym_index = self._gym_id_order.index(agent_id)
self._done_agents_index_to_last_reward[gym_index] = reward
self._gym_id_order[gym_index] = -1 # no agent at that index

self._env.step()
info = self._env.get_step_result(self.brain_name)
return self._sanitize_info(info)
Expand Down