@@ -60,6 +60,52 @@ def __init__(
60
60
self ._counter = counter or counting .Counter ()
61
61
self ._logger = logger or loggers .make_default_logger (label )
62
62
63
+ def run_episode (self ) -> loggers .LoggingData :
64
+ """Run one episode.
65
+
66
+ Each episode is a loop which interacts first with the environment to get an
67
+ observation and then give that observation to the agent in order to retrieve
68
+ an action.
69
+
70
+ Returns:
71
+ An instance of `loggers.LoggingData`.
72
+ """
73
+ # Reset any counts and start the environment.
74
+ start_time = time .time ()
75
+ episode_steps = 0
76
+ episode_return = 0
77
+ timestep = self ._environment .reset ()
78
+
79
+ # Make the first observation.
80
+ self ._actor .observe_first (timestep )
81
+
82
+ # Run an episode.
83
+ while not timestep .last ():
84
+ # Generate an action from the agent's policy and step the environment.
85
+ action = self ._actor .select_action (timestep .observation )
86
+ timestep = self ._environment .step (action )
87
+
88
+ # Have the agent observe the timestep and let the actor update itself.
89
+ self ._actor .observe (action , next_timestep = timestep )
90
+ self ._actor .update ()
91
+
92
+ # Book-keeping.
93
+ episode_steps += 1
94
+ episode_return += timestep .reward
95
+
96
+ # Record counts.
97
+ counts = self ._counter .increment (episodes = 1 , steps = episode_steps )
98
+
99
+ # Collect the results and combine with counts.
100
+ steps_per_second = episode_steps / (time .time () - start_time )
101
+ result = {
102
+ 'episode_length' : episode_steps ,
103
+ 'episode_return' : episode_return ,
104
+ 'steps_per_second' : steps_per_second ,
105
+ }
106
+ result .update (counts )
107
+ return result
108
+
63
109
def run (self ,
64
110
num_episodes : Optional [int ] = None ,
65
111
num_steps : Optional [int ] = None ):
@@ -69,12 +115,10 @@ def run(self,
69
115
least `num_steps` steps (the last episode is always run until completion,
70
116
so the total number of steps may be slightly more than `num_steps`).
71
117
At least one of these two arguments has to be None.
72
- Each episode is itself a loop which interacts first with the environment to
73
- get an observation and then give that observation to the agent in order to
74
- retrieve an action. Upon termination of an episode a new episode will be
75
- started. If the number of episodes and the number of steps are not given
76
- then this will interact with the environment infinitely.
77
- If both num_episodes and num_steps are `None` (default), runs without limit.
118
+
119
+ Upon termination of an episode a new episode will be started. If the number
120
+ of episodes and the number of steps are not given then this will interact
121
+ with the environment infinitely.
78
122
79
123
Args:
80
124
num_episodes: number of episodes to run the loop for.
@@ -93,43 +137,9 @@ def should_terminate(episode_count: int, step_count: int) -> bool:
93
137
94
138
episode_count , step_count = 0 , 0
95
139
while not should_terminate (episode_count , step_count ):
96
- # Reset any counts and start the environment.
97
- start_time = time .time ()
98
- episode_steps = 0
99
- episode_return = 0
100
- timestep = self ._environment .reset ()
101
-
102
- # Make the first observation.
103
- self ._actor .observe_first (timestep )
104
-
105
- # Run an episode.
106
- while not timestep .last ():
107
- # Generate an action from the agent's policy and step the environment.
108
- action = self ._actor .select_action (timestep .observation )
109
- timestep = self ._environment .step (action )
110
-
111
- # Have the agent observe the timestep and let the actor update itself.
112
- self ._actor .observe (action , next_timestep = timestep )
113
- self ._actor .update ()
114
-
115
- # Book-keeping.
116
- episode_steps += 1
117
- episode_return += timestep .reward
118
-
119
- # Record counts.
120
- counts = self ._counter .increment (episodes = 1 , steps = episode_steps )
121
-
122
- # Collect the results and combine with counts.
123
- steps_per_second = episode_steps / (time .time () - start_time )
124
- result = {
125
- 'episode_length' : episode_steps ,
126
- 'episode_return' : episode_return ,
127
- 'steps_per_second' : steps_per_second ,
128
- }
129
- result .update (counts )
140
+ result = self .run_episode ()
130
141
episode_count += 1
131
- step_count += episode_steps
132
-
142
+ step_count += result ['episode_length' ]
133
143
# Log the given results.
134
144
self ._logger .write (result )
135
145
0 commit comments