Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions python/trainer_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ GoalieBrain:

Ball3DBrain:
normalize: true
batch_size: 1200
buffer_size: 12000
summary_freq: 1000
time_horizon: 1000
gamma: 0.995
beta: 0.001

BouncerBrain:
normalize: true
Expand Down
9 changes: 5 additions & 4 deletions python/unitytrainers/bc/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,13 +229,14 @@ def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo, take
self.episode_steps[agent_id] = 0
self.episode_steps[agent_id] += 1

def process_experiences(self, info: AllBrainInfo):
def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo):
"""
Checks agent histories for processing condition, and processes them as necessary.
Processing involves calculating value and advantage targets for model updating step.
:param info: Current AllBrainInfo
:param current_info: Current AllBrainInfo
:param next_info: Next AllBrainInfo
"""
info_teacher = info[self.brain_to_imitate]
info_teacher = next_info[self.brain_to_imitate]
for l in range(len(info_teacher.agents)):
if ((info_teacher.local_done[l] or
len(self.training_buffer[info_teacher.agents[l]]['actions']) > self.trainer_parameters[
Expand All @@ -246,7 +247,7 @@ def process_experiences(self, info: AllBrainInfo):
training_length=self.sequence_length)
self.training_buffer[agent_id].reset_agent()

info_student = info[self.brain_name]
info_student = next_info[self.brain_name]
for l in range(len(info_student.agents)):
if info_student.local_done[l]:
agent_id = info_student.agents[l]
Expand Down
29 changes: 17 additions & 12 deletions python/unitytrainers/ppo/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,34 +260,39 @@ def add_experiences(self, curr_all_info: AllBrainInfo, next_all_info: AllBrainIn
self.episode_steps[agent_id] = 0
self.episode_steps[agent_id] += 1


def process_experiences(self, all_info: AllBrainInfo):
def process_experiences(self, current_info: AllBrainInfo, new_info: AllBrainInfo):
"""
Checks agent histories for processing condition, and processes them as necessary.
Processing involves calculating value and advantage targets for model updating step.
:param all_info: Dictionary of all current brains and corresponding BrainInfo.
:param current_info: Dictionary of all current brains and corresponding BrainInfo.
:param new_info: Dictionary of all next brains and corresponding BrainInfo.
"""

info = all_info[self.brain_name]
info = new_info[self.brain_name]
last_info = current_info[self.brain_name]
for l in range(len(info.agents)):
agent_actions = self.training_buffer[info.agents[l]]['actions']
if ((info.local_done[l] or len(agent_actions) > self.trainer_parameters['time_horizon'])
and len(agent_actions) > 0):
if info.local_done[l] and not info.max_reached[l]:
value_next = 0.0
else:
feed_dict = {self.model.batch_size: len(info.vector_observations), self.model.sequence_length: 1}
if info.max_reached[l]:
bootstrapping_info = last_info
else:
bootstrapping_info = info
feed_dict = {self.model.batch_size: len(bootstrapping_info.vector_observations), self.model.sequence_length: 1}
if self.use_observations:
for i in range(len(info.visual_observations)):
feed_dict[self.model.visual_in[i]] = info.visual_observations[i]
for i in range(len(bootstrapping_info.visual_observations)):
feed_dict[self.model.visual_in[i]] = bootstrapping_info.visual_observations[i]
if self.use_states:
feed_dict[self.model.vector_in] = info.vector_observations
feed_dict[self.model.vector_in] = bootstrapping_info.vector_observations
if self.use_recurrent:
if info.memories.shape[1] == 0:
info.memories = np.zeros((len(info.vector_observations), self.m_size))
feed_dict[self.model.memory_in] = info.memories
if bootstrapping_info.memories.shape[1] == 0:
bootstrapping_info.memories = np.zeros((len(bootstrapping_info.vector_observations), self.m_size))
feed_dict[self.model.memory_in] = bootstrapping_info.memories
if not self.is_continuous and self.use_recurrent:
feed_dict[self.model.prev_action] = np.reshape(info.previous_vector_actions, [-1])
feed_dict[self.model.prev_action] = np.reshape(bootstrapping_info.previous_vector_actions, [-1])
value_next = self.sess.run(self.model.value, feed_dict)[l]
agent_id = info.agents[l]

Expand Down
5 changes: 3 additions & 2 deletions python/unitytrainers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,12 @@ def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo, take
"""
raise UnityTrainerException("The add_experiences method was not implemented.")

def process_experiences(self, info: AllBrainInfo):
def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo):
"""
Checks agent histories for processing condition, and processes them as necessary.
Processing involves calculating value and advantage targets for model updating step.
:param info: Dictionary of all current brains and corresponding BrainInfo.
:param current_info: Dictionary of all current-step brains and corresponding BrainInfo.
:param next_info: Dictionary of all next-step brains and corresponding BrainInfo.
"""
raise UnityTrainerException("The process_experiences method was not implemented.")

Expand Down
8 changes: 3 additions & 5 deletions python/unitytrainers/trainer_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,13 +250,11 @@ def start_learning(self):

for brain_name, trainer in self.trainers.items():
trainer.add_experiences(curr_info, new_info, take_action_outputs[brain_name])
curr_info = new_info
for brain_name, trainer in self.trainers.items():
trainer.process_experiences(curr_info)
trainer.process_experiences(curr_info, new_info)
if trainer.is_ready_update() and self.train_model and trainer.get_step <= trainer.get_max_steps:
# Perform gradient descent with experience buffer
trainer.update_model()
# Write training statistics to tensorboard.
# Write training statistics to Tensorboard.
trainer.write_summary(self.env.curriculum.lesson_number)
if self.train_model and trainer.get_step <= trainer.get_max_steps:
trainer.increment_step()
Expand All @@ -266,7 +264,7 @@ def start_learning(self):
if global_step % self.save_freq == 0 and global_step != 0 and self.train_model:
# Save Tensorflow model
self._save_model(sess, steps=global_step, saver=saver)

curr_info = new_info
# Final save Tensorflow model
if global_step != 0 and self.train_model:
self._save_model(sess, steps=global_step, saver=saver)
Expand Down