Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
e19b038
ghost controller
andrewcoh Feb 29, 2020
3335cc8
Merge branch 'master' into self-play-mutex
andrewcoh Mar 16, 2020
49f5cf4
Merge branch 'master' into self-play-mutex
andrewcoh Mar 18, 2020
33ff2ff
team id centric ghost trainer
andrewcoh Mar 18, 2020
3f69db7
ELO calculation done in ghost controller
andrewcoh Mar 18, 2020
e19f9e5
removed opponent elo from stat collection
andrewcoh Mar 18, 2020
4e1e139
passing all tests locally
andrewcoh Mar 19, 2020
1741c54
fixed controller behavior when first team discovered isnt 0
andrewcoh Mar 19, 2020
cc17ea1
no negative team id in docs
andrewcoh Mar 19, 2020
43417e1
save step on trainer step count/swap on ghost
andrewcoh Mar 19, 2020
124f886
urllib parse
andrewcoh Mar 19, 2020
8778cec
Update docs/Training-Self-Play.md
andrewcoh Mar 19, 2020
33c5ea9
remove whitespace
andrewcoh Mar 19, 2020
c2eea64
Merge branch 'master' into self-play-mutex
andrewcoh Mar 19, 2020
bd86108
docstrings/ghost_swap -> team_change
andrewcoh Mar 20, 2020
82bdfc4
replaced ghost_swap with team_change in tests
andrewcoh Mar 20, 2020
cb855db
docstrings for all ghost trainer functions
andrewcoh Mar 20, 2020
fb5ccd0
SELF-PLAY NOW SUPPORTS MULTIAGENT TRAINERS
andrewcoh Mar 21, 2020
c3890f5
next learning team from get step
andrewcoh Mar 21, 2020
cad0a2d
comment for self.ghost_step
andrewcoh Mar 21, 2020
f68f7aa
fixed export so both teams have current model
andrewcoh Mar 22, 2020
4c9ba86
updated self-play doc for asymmetric games/changed current_self->curr…
andrewcoh Mar 23, 2020
ffe2cfd
count trainer steps in controller by team id
andrewcoh Mar 23, 2020
c2ae207
added team_change as a yaml config
andrewcoh Mar 23, 2020
7e0ff7b
removed team-change CLI
andrewcoh Mar 23, 2020
d2dd975
fixed tests that expected old hyperparam team-change
andrewcoh Mar 23, 2020
6aae133
doc update for team_change
andrewcoh Mar 23, 2020
d560b5f
removed not max step reached as condition for ELO
andrewcoh Mar 24, 2020
2bf9271
Merge branch 'master' into self-play-mutex
andrewcoh Mar 24, 2020
29435bb
warning for team change hyperparam
andrewcoh Mar 25, 2020
97f1b7d
simple rl asymm ghost tests
andrewcoh Mar 25, 2020
d123fe7
Merge branch 'master' into self-play-mutex
andrewcoh Mar 25, 2020
2cb5a2d
renamed controller methods/doc fixes
andrewcoh Mar 25, 2020
27e924e
current_best_ratio -> latest_model_ratio
andrewcoh Mar 25, 2020
f3332c3
added Foerster paper title to doc
andrewcoh Mar 26, 2020
aca54be
doc fix
andrewcoh Mar 26, 2020
0e52b20
Merge branch 'master' into self-play-mutex
andrewcoh Mar 26, 2020
95469d2
doc fix
andrewcoh Mar 27, 2020
10bd9dd
Merge branch 'master' into self-play-mutex
andrewcoh Mar 27, 2020
01f9de3
Merge branch 'master' into self-play-mutex
andrewcoh Mar 30, 2020
61649ea
using mlagents_env.logging instead of logging
andrewcoh Mar 30, 2020
972ed63
doc fix
andrewcoh Mar 31, 2020
7e0a3ba
modified doc to not include strikers vs goalie
andrewcoh Apr 1, 2020
6c5342d
removed "unpredictable behavior"
andrewcoh Apr 1, 2020
9149413
Merge branch 'master' into self-play-mutex
andrewcoh Apr 1, 2020
02455a4
added to mig doc/address comments
andrewcoh Apr 1, 2020
df8b87f
raise warning when latest_model_ratio not btwn 0, 1
andrewcoh Apr 1, 2020
1333fb9
removed Goalie from learning environment examples
andrewcoh Apr 1, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/Training-Self-Play.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Self-play is triggered by including the self-play hyperparameter hierarchy in th

![Team ID](images/team_id.png)

See the trainer configuration and agent prefabs for our Tennis environment for an example.
***Team ID must be 0 or an integer greater than 0. Negative numbers will cause unpredictable behavior.*** See the trainer configuration and agent prefabs for our Tennis environment for an example.

## Best Practices Training with Self-Play

Expand Down
22 changes: 8 additions & 14 deletions ml-agents/mlagents/trainers/behavior_id_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from typing import Dict, NamedTuple
from typing import NamedTuple


class BehaviorIdentifiers(NamedTuple):
name_behavior_id: str
behavior_id: str
brain_name: str
behavior_ids: Dict[str, int]
team_id: int

@staticmethod
def from_name_behavior_id(name_behavior_id: str) -> "BehaviorIdentifiers":
Expand All @@ -17,20 +17,14 @@ def from_name_behavior_id(name_behavior_id: str) -> "BehaviorIdentifiers":
:returns: A BehaviorIdentifiers object.
"""

ids: Dict[str, int] = {}
team_id: int = 0
if "?" in name_behavior_id:
name, identifiers = name_behavior_id.rsplit("?", 1)
if "&" in identifiers:
list_of_identifiers = identifiers.split("&")
else:
list_of_identifiers = [identifiers]

for identifier in list_of_identifiers:
key, value = identifier.split("=")
ids[key] = int(value)
name, team_and_id = name_behavior_id.rsplit("?", 1)
_, team_id_str = team_and_id.split("=")
team_id = int(team_id_str)
else:
name = name_behavior_id

return BehaviorIdentifiers(
name_behavior_id=name_behavior_id, brain_name=name, behavior_ids=ids
behavior_id=name_behavior_id, brain_name=name, team_id=team_id
)
48 changes: 48 additions & 0 deletions ml-agents/mlagents/trainers/ghost/controller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from typing import Deque, Dict
from collections import deque
from mlagents.trainers.ghost.trainer import GhostTrainer


class GhostController(object):
def __init__(self, swap_interval: int, maxlen: int = 10):
self._swap_interval = swap_interval
self._last_swap: int = 0
self._queue: Deque[int] = deque(maxlen=maxlen)
self._learning_team: int = -1
self._ghost_trainers: Dict[int, GhostTrainer] = {}

def subscribe_team_id(self, team_id: int, trainer: GhostTrainer) -> None:
if team_id not in self._ghost_trainers:
self._queue.append(team_id)
self._ghost_trainers[team_id] = trainer
if self._learning_team < 0:
self._learning_team = team_id

def get_learning_team(self, step: int) -> int:
if step >= self._swap_interval + self._last_swap:
self._last_swap = step
self._learning_team = self._queue.popleft()
self._queue.append(self._learning_team)
return self._learning_team

# Adapted from https://github.com/Unity-Technologies/ml-agents/pull/1975 and
# https://metinmediamath.wordpress.com/2013/11/27/how-to-calculate-the-elo-rating-including-example/
# ELO calculation

def compute_elo_rating_changes(self, rating: float, result: float) -> float:
opponent_rating: float = 0.0
for team_id, trainer in self._ghost_trainers.items():
if team_id != self._learning_team:
opponent_rating = trainer.get_opponent_elo()
r1 = pow(10, rating / 400)
r2 = pow(10, opponent_rating / 400)

summed = r1 + r2
e1 = r1 / summed

change = result - e1
for team_id, trainer in self._ghost_trainers.items():
if team_id != self._learning_team:
trainer.change_opponent_elo(change)

return change
Loading