@@ -75,7 +75,8 @@ def __init__(self, sess, env, brain_name, trainer_parameters, training, seed):
7575 self .training_buffer = Buffer ()
7676 self .cumulative_rewards = {}
7777 self .episode_steps = {}
78- self .is_continuous = (env .brains [brain_name ].vector_action_space_type == "continuous" )
78+ self .is_continuous_action = (env .brains [brain_name ].vector_action_space_type == "continuous" )
79+ self .is_continuous_observation = (env .brains [brain_name ].vector_observation_space_type == "continuous" )
7980 self .use_observations = (env .brains [brain_name ].number_visual_observations > 0 )
8081 self .use_states = (env .brains [brain_name ].vector_observation_space_size > 0 )
8182 self .summary_path = trainer_parameters ['summary_path' ]
@@ -169,7 +170,7 @@ def take_action(self, all_brain_info: AllBrainInfo):
169170 feed_dict = {self .model .batch_size : len (curr_brain_info .vector_observations ), self .model .sequence_length : 1 }
170171 run_list = [self .model .output , self .model .all_probs , self .model .value , self .model .entropy ,
171172 self .model .learning_rate ]
172- if self .is_continuous :
173+ if self .is_continuous_action :
173174 run_list .append (self .model .epsilon )
174175 elif self .use_recurrent :
175176 feed_dict [self .model .prev_action ] = np .reshape (curr_brain_info .previous_vector_actions , [- 1 ])
@@ -183,7 +184,7 @@ def take_action(self, all_brain_info: AllBrainInfo):
183184 curr_brain_info .memories = np .zeros ((len (curr_brain_info .agents ), self .m_size ))
184185 feed_dict [self .model .memory_in ] = curr_brain_info .memories
185186 run_list += [self .model .memory_out ]
186- if (self .is_training and self .brain . vector_observation_space_type == "continuous" and
187+ if (self .is_training and self .is_continuous_observation and
187188 self .use_states and self .trainer_parameters ['normalize' ]):
188189 new_mean , new_variance = self .running_average (
189190 curr_brain_info .vector_observations , steps , self .model .running_mean , self .model .running_variance )
@@ -240,7 +241,7 @@ def add_experiences(self, curr_all_info: AllBrainInfo, next_all_info: AllBrainIn
240241 if stored_info .memories .shape [1 ] == 0 :
241242 stored_info .memories = np .zeros ((len (stored_info .agents ), self .m_size ))
242243 self .training_buffer [agent_id ]['memory' ].append (stored_info .memories [idx ])
243- if self .is_continuous :
244+ if self .is_continuous_action :
244245 epsi = stored_take_action_outputs [self .model .epsilon ]
245246 self .training_buffer [agent_id ]['epsilons' ].append (epsi [idx ])
246247 actions = stored_take_action_outputs [self .model .output ]
@@ -286,7 +287,7 @@ def process_experiences(self, all_info: AllBrainInfo):
286287 if info .memories .shape [1 ] == 0 :
287288 info .memories = np .zeros ((len (info .vector_observations ), self .m_size ))
288289 feed_dict [self .model .memory_in ] = info .memories
289- if not self .is_continuous and self .use_recurrent :
290+ if not self .is_continuous_action and self .use_recurrent :
290291 feed_dict [self .model .prev_action ] = np .reshape (info .previous_vector_actions , [- 1 ])
291292 value_next = self .sess .run (self .model .value , feed_dict )[l ]
292293 agent_id = info .agents [l ]
@@ -358,7 +359,7 @@ def update_model(self):
358359 self .model .advantage : np .array (_buffer ['advantages' ][start :end ]).reshape ([- 1 , 1 ]),
359360 self .model .all_old_probs : np .array (
360361 _buffer ['action_probs' ][start :end ]).reshape ([- 1 , self .brain .vector_action_space_size ])}
361- if self .is_continuous :
362+ if self .is_continuous_action :
362363 feed_dict [self .model .epsilon ] = np .array (
363364 _buffer ['epsilons' ][start :end ]).reshape ([- 1 , self .brain .vector_action_space_size ])
364365 else :
@@ -368,7 +369,7 @@ def update_model(self):
368369 feed_dict [self .model .prev_action ] = np .array (
369370 _buffer ['prev_action' ][start :end ]).reshape ([- 1 ])
370371 if self .use_states :
371- if self .brain . vector_observation_space_type == "continuous" :
372+ if self .is_continuous_observation :
372373 feed_dict [self .model .vector_in ] = np .array (
373374 _buffer ['states' ][start :end ]).reshape (
374375 [- 1 , self .brain .vector_observation_space_size * self .brain .num_stacked_vector_observations ])
0 commit comments