Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/Learning-Environment-Examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ If you would like to contribute environments, please see our
* -0.0005 for every step.
* +1.0 if the agent touches the goal.
* -1.0 if the agent falls off the platform.
* Brains: One brain with the following observation/action space.
* Brains: Two brains, each with the following observation/action space.
* Vector Observation space: (Continuous) 16 variables corresponding to position and velocities of agent, block, and goal, plus the height of the wall.
* Vector Action space: (Discrete) Size of 74, corresponding to 14 raycasts each detecting 4 possible objects. plus the global position of the agent and whether or not the agent is grounded.
* Visual Observations: None.
Expand Down
15 changes: 8 additions & 7 deletions python/unitytrainers/ppo/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ def __init__(self, sess, env, brain_name, trainer_parameters, training, seed):
self.training_buffer = Buffer()
self.cumulative_rewards = {}
self.episode_steps = {}
self.is_continuous = (env.brains[brain_name].vector_action_space_type == "continuous")
self.is_continuous_action = (env.brains[brain_name].vector_action_space_type == "continuous")
self.is_continuous_observation = (env.brains[brain_name].vector_observation_space_type == "continuous")
self.use_observations = (env.brains[brain_name].number_visual_observations > 0)
self.use_states = (env.brains[brain_name].vector_observation_space_size > 0)
self.summary_path = trainer_parameters['summary_path']
Expand Down Expand Up @@ -169,7 +170,7 @@ def take_action(self, all_brain_info: AllBrainInfo):
feed_dict = {self.model.batch_size: len(curr_brain_info.vector_observations), self.model.sequence_length: 1}
run_list = [self.model.output, self.model.all_probs, self.model.value, self.model.entropy,
self.model.learning_rate]
if self.is_continuous:
if self.is_continuous_action:
run_list.append(self.model.epsilon)
elif self.use_recurrent:
feed_dict[self.model.prev_action] = np.reshape(curr_brain_info.previous_vector_actions, [-1])
Expand All @@ -183,7 +184,7 @@ def take_action(self, all_brain_info: AllBrainInfo):
curr_brain_info.memories = np.zeros((len(curr_brain_info.agents), self.m_size))
feed_dict[self.model.memory_in] = curr_brain_info.memories
run_list += [self.model.memory_out]
if (self.is_training and self.brain.vector_observation_space_type == "continuous" and
if (self.is_training and self.is_continuous_observation and
self.use_states and self.trainer_parameters['normalize']):
new_mean, new_variance = self.running_average(
curr_brain_info.vector_observations, steps, self.model.running_mean, self.model.running_variance)
Expand Down Expand Up @@ -240,7 +241,7 @@ def add_experiences(self, curr_all_info: AllBrainInfo, next_all_info: AllBrainIn
if stored_info.memories.shape[1] == 0:
stored_info.memories = np.zeros((len(stored_info.agents), self.m_size))
self.training_buffer[agent_id]['memory'].append(stored_info.memories[idx])
if self.is_continuous:
if self.is_continuous_action:
epsi = stored_take_action_outputs[self.model.epsilon]
self.training_buffer[agent_id]['epsilons'].append(epsi[idx])
actions = stored_take_action_outputs[self.model.output]
Expand Down Expand Up @@ -286,7 +287,7 @@ def process_experiences(self, all_info: AllBrainInfo):
if info.memories.shape[1] == 0:
info.memories = np.zeros((len(info.vector_observations), self.m_size))
feed_dict[self.model.memory_in] = info.memories
if not self.is_continuous and self.use_recurrent:
if not self.is_continuous_action and self.use_recurrent:
feed_dict[self.model.prev_action] = np.reshape(info.previous_vector_actions, [-1])
value_next = self.sess.run(self.model.value, feed_dict)[l]
agent_id = info.agents[l]
Expand Down Expand Up @@ -358,7 +359,7 @@ def update_model(self):
self.model.advantage: np.array(_buffer['advantages'][start:end]).reshape([-1, 1]),
self.model.all_old_probs: np.array(
_buffer['action_probs'][start:end]).reshape([-1, self.brain.vector_action_space_size])}
if self.is_continuous:
if self.is_continuous_action:
feed_dict[self.model.epsilon] = np.array(
_buffer['epsilons'][start:end]).reshape([-1, self.brain.vector_action_space_size])
else:
Expand All @@ -368,7 +369,7 @@ def update_model(self):
feed_dict[self.model.prev_action] = np.array(
_buffer['prev_action'][start:end]).reshape([-1])
if self.use_states:
if self.brain.vector_observation_space_type == "continuous":
if self.is_continuous_observation:
feed_dict[self.model.vector_in] = np.array(
_buffer['states'][start:end]).reshape(
[-1, self.brain.vector_observation_space_size * self.brain.num_stacked_vector_observations])
Expand Down