Skip to content

Commit 6d4c9a4

Browse files
author
Marwan Mattar
authored
Merge pull request #520 from Unity-Technologies/feature-trainer-ppo-is-continuous
Feature trainer ppo is continuous
2 parents 2f80572 + 9b42a11 commit 6d4c9a4

File tree

1 file changed

+8
-7
lines changed

1 file changed

+8
-7
lines changed

python/unitytrainers/ppo/trainer.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ def __init__(self, sess, env, brain_name, trainer_parameters, training, seed):
7575
self.training_buffer = Buffer()
7676
self.cumulative_rewards = {}
7777
self.episode_steps = {}
78-
self.is_continuous = (env.brains[brain_name].vector_action_space_type == "continuous")
78+
self.is_continuous_action = (env.brains[brain_name].vector_action_space_type == "continuous")
79+
self.is_continuous_observation = (env.brains[brain_name].vector_observation_space_type == "continuous")
7980
self.use_observations = (env.brains[brain_name].number_visual_observations > 0)
8081
self.use_states = (env.brains[brain_name].vector_observation_space_size > 0)
8182
self.summary_path = trainer_parameters['summary_path']
@@ -169,7 +170,7 @@ def take_action(self, all_brain_info: AllBrainInfo):
169170
feed_dict = {self.model.batch_size: len(curr_brain_info.vector_observations), self.model.sequence_length: 1}
170171
run_list = [self.model.output, self.model.all_probs, self.model.value, self.model.entropy,
171172
self.model.learning_rate]
172-
if self.is_continuous:
173+
if self.is_continuous_action:
173174
run_list.append(self.model.epsilon)
174175
elif self.use_recurrent:
175176
feed_dict[self.model.prev_action] = np.reshape(curr_brain_info.previous_vector_actions, [-1])
@@ -183,7 +184,7 @@ def take_action(self, all_brain_info: AllBrainInfo):
183184
curr_brain_info.memories = np.zeros((len(curr_brain_info.agents), self.m_size))
184185
feed_dict[self.model.memory_in] = curr_brain_info.memories
185186
run_list += [self.model.memory_out]
186-
if (self.is_training and self.brain.vector_observation_space_type == "continuous" and
187+
if (self.is_training and self.is_continuous_observation and
187188
self.use_states and self.trainer_parameters['normalize']):
188189
new_mean, new_variance = self.running_average(
189190
curr_brain_info.vector_observations, steps, self.model.running_mean, self.model.running_variance)
@@ -240,7 +241,7 @@ def add_experiences(self, curr_all_info: AllBrainInfo, next_all_info: AllBrainIn
240241
if stored_info.memories.shape[1] == 0:
241242
stored_info.memories = np.zeros((len(stored_info.agents), self.m_size))
242243
self.training_buffer[agent_id]['memory'].append(stored_info.memories[idx])
243-
if self.is_continuous:
244+
if self.is_continuous_action:
244245
epsi = stored_take_action_outputs[self.model.epsilon]
245246
self.training_buffer[agent_id]['epsilons'].append(epsi[idx])
246247
actions = stored_take_action_outputs[self.model.output]
@@ -286,7 +287,7 @@ def process_experiences(self, all_info: AllBrainInfo):
286287
if info.memories.shape[1] == 0:
287288
info.memories = np.zeros((len(info.vector_observations), self.m_size))
288289
feed_dict[self.model.memory_in] = info.memories
289-
if not self.is_continuous and self.use_recurrent:
290+
if not self.is_continuous_action and self.use_recurrent:
290291
feed_dict[self.model.prev_action] = np.reshape(info.previous_vector_actions, [-1])
291292
value_next = self.sess.run(self.model.value, feed_dict)[l]
292293
agent_id = info.agents[l]
@@ -358,7 +359,7 @@ def update_model(self):
358359
self.model.advantage: np.array(_buffer['advantages'][start:end]).reshape([-1, 1]),
359360
self.model.all_old_probs: np.array(
360361
_buffer['action_probs'][start:end]).reshape([-1, self.brain.vector_action_space_size])}
361-
if self.is_continuous:
362+
if self.is_continuous_action:
362363
feed_dict[self.model.epsilon] = np.array(
363364
_buffer['epsilons'][start:end]).reshape([-1, self.brain.vector_action_space_size])
364365
else:
@@ -368,7 +369,7 @@ def update_model(self):
368369
feed_dict[self.model.prev_action] = np.array(
369370
_buffer['prev_action'][start:end]).reshape([-1])
370371
if self.use_states:
371-
if self.brain.vector_observation_space_type == "continuous":
372+
if self.is_continuous_observation:
372373
feed_dict[self.model.vector_in] = np.array(
373374
_buffer['states'][start:end]).reshape(
374375
[-1, self.brain.vector_observation_space_size * self.brain.num_stacked_vector_observations])

0 commit comments

Comments
 (0)