Skip to content

Commit 329aae5

Browse files
mwhoffmancopybara-github
authored andcommitted
Add a Builder class which encapsulates a full agent.
This class allows for the consituent components to be broken apart so that it can be used both for distributed and non-distributed variants. For the time-being this is only incorporated into the TF D4PG agent to allow for minimal disruption and experimentation, but should be rolled out for all agents soon. PiperOrigin-RevId: 356975846 Change-Id: I00ead33da40f4f98052ae3beb218c23788ada206
1 parent 9ef3ab5 commit 329aae5

File tree

6 files changed

+395
-126
lines changed

6 files changed

+395
-126
lines changed

acme/agents/builders.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# python3
2+
# Copyright 2018 DeepMind Technologies Limited. All rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""RL agent Builder interface."""
17+
18+
import abc
19+
from typing import Iterator, List, Optional
20+
21+
from acme import adders
22+
from acme import core
23+
from acme import specs
24+
from acme.utils import counting
25+
from acme.utils import loggers
26+
import reverb
27+
28+
29+
class ActorLearnerBuilder(abc.ABC):
30+
"""Defines an interface for defining the components of an RL agent.
31+
32+
Implementations of this interface contain a complete specification of a
33+
concrete RL agent. An instance of this class can be used to build an
34+
RL agent which interacts with the environment either locally or in a
35+
distributed setup.
36+
"""
37+
38+
@abc.abstractmethod
39+
def make_replay_tables(
40+
self,
41+
environment_spec: specs.EnvironmentSpec,
42+
) -> List[reverb.Table]:
43+
"""Create tables to insert data into."""
44+
45+
@abc.abstractmethod
46+
def make_dataset_iterator(
47+
self,
48+
replay_client: reverb.Client,
49+
) -> Iterator[reverb.ReplaySample]:
50+
"""Create a dataset iterator to use for learning/updating the agent."""
51+
52+
@abc.abstractmethod
53+
def make_adder(
54+
self,
55+
replay_client: reverb.Client,
56+
) -> Optional[adders.Adder]:
57+
"""Create an adder which records data generated by the actor/environment.
58+
59+
Args:
60+
replay_client: Reverb Client which points to the replay server.
61+
"""
62+
63+
@abc.abstractmethod
64+
def make_actor(
65+
self,
66+
policy_network,
67+
adder: Optional[adders.Adder] = None,
68+
variable_source: Optional[core.VariableSource] = None,
69+
) -> core.Actor:
70+
"""Create an actor instance.
71+
72+
Args:
73+
policy_network: Instance of a policy network; this should be a callable
74+
which takes as input observations and returns actions.
75+
adder: How data is recorded (e.g. added to replay).
76+
variable_source: A source providing the necessary actor parameters.
77+
"""
78+
79+
@abc.abstractmethod
80+
def make_learner(
81+
self,
82+
networks,
83+
dataset: Iterator[reverb.ReplaySample],
84+
replay_client: Optional[reverb.Client] = None,
85+
counter: Optional[counting.Counter] = None,
86+
# TODO(mwhoffman): consider eliminating logger and log return values.
87+
# TODO(mwhoffman): eliminate checkpoint and move it outside.
88+
logger: Optional[loggers.Logger] = None,
89+
checkpoint: bool = False,
90+
) -> core.Learner:
91+
"""Creates an instance of the learner.
92+
93+
Args:
94+
networks: struct describing the networks needed by the learner; this can
95+
be specific to the learner in question.
96+
dataset: iterator over samples from replay.
97+
replay_client: client which allows communication with replay, e.g. in
98+
order to update priorities.
99+
counter: a Counter which allows for recording of counts (learner steps,
100+
actor steps, etc.) distributed throughout the agent.
101+
logger: Logger object for logging metadata.
102+
checkpoint: bool controlling whether the learner checkpoints itself.
103+
"""

0 commit comments

Comments
 (0)