From e9d34cc075d13c444682b5d4f8c4040919fb9f1f Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 22 Apr 2020 17:23:11 -0700 Subject: [PATCH 01/19] Move artifacts to results/ directory --- ml-agents/mlagents/trainers/learn.py | 16 ++++++-------- .../mlagents/trainers/policy/tf_policy.py | 2 +- ml-agents/mlagents/trainers/ppo/trainer.py | 3 +-- ml-agents/mlagents/trainers/sac/trainer.py | 7 +++--- .../mlagents/trainers/trainer/trainer.py | 3 +-- .../mlagents/trainers/trainer_controller.py | 18 +++++++-------- ml-agents/mlagents/trainers/trainer_util.py | 22 +++++++------------ 7 files changed, 29 insertions(+), 42 deletions(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index a5cb4677af..f0989d2ff4 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -320,9 +320,9 @@ def run_training(run_seed: int, options: RunOptions) -> None: :param run_options: Command line arguments for training. """ with hierarchical_timer("run_training.setup"): - model_path = f"./models/{options.run_id}" + write_path = f"./results/{options.run_id}" maybe_init_path = ( - f"./models/{options.initialize_from}" if options.initialize_from else None + f"./results/{options.initialize_from}" if options.initialize_from else None ) summaries_dir = "./summaries" port = options.base_port @@ -330,16 +330,16 @@ def run_training(run_seed: int, options: RunOptions) -> None: # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( - summaries_dir, + write_path, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length", ], ) handle_existing_directories( - model_path, summaries_dir, options.resume, options.force, maybe_init_path + write_path, options.resume, options.force, maybe_init_path ) - tb_writer = TensorboardWriter(summaries_dir, clear_past_data=not options.resume) + tb_writer = TensorboardWriter(write_path, clear_past_data=not options.resume) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) @@ -368,9 +368,8 @@ def run_training(run_seed: int, options: RunOptions) -> None: ) trainer_factory = TrainerFactory( options.trainer_config, - summaries_dir, options.run_id, - model_path, + write_path, options.keep_checkpoints, not options.inference, options.resume, @@ -382,8 +381,7 @@ def run_training(run_seed: int, options: RunOptions) -> None: # Create controller and begin training. tc = TrainerController( trainer_factory, - model_path, - summaries_dir, + write_path, options.run_id, options.save_freq, maybe_meta_curriculum, diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py index 7832d60cd8..0f242140b5 100644 --- a/ml-agents/mlagents/trainers/policy/tf_policy.py +++ b/ml-agents/mlagents/trainers/policy/tf_policy.py @@ -62,7 +62,7 @@ def __init__(self, seed, brain, trainer_parameters, load=False): self.use_continuous_act = brain.vector_action_space_type == "continuous" if self.use_continuous_act: self.num_branches = self.brain.vector_action_space_size[0] - self.model_path = trainer_parameters["model_path"] + self.model_path = trainer_parameters["output_path"] self.initialize_path = trainer_parameters.get("init_path", None) self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5) self.graph = tf.Graph() diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index 81ed6ffab7..7b6e6f12d1 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -62,9 +62,8 @@ def __init__( "sequence_length", "summary_freq", "use_recurrent", - "summary_path", "memory_size", - "model_path", + "output_path", "reward_signals", ] self._check_param_keys() diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py index 7c6fcfd777..ea2aeca2d4 100644 --- a/ml-agents/mlagents/trainers/sac/trainer.py +++ b/ml-agents/mlagents/trainers/sac/trainer.py @@ -72,9 +72,8 @@ def __init__( "summary_freq", "tau", "use_recurrent", - "summary_path", "memory_size", - "model_path", + "output_path", "reward_signals", ] @@ -136,7 +135,7 @@ def save_replay_buffer(self) -> None: Save the training buffer's update buffer to a pickle file. """ filename = os.path.join( - self.trainer_parameters["model_path"], "last_replay_buffer.hdf5" + self.trainer_parameters["output_path"], "last_replay_buffer.hdf5" ) logger.info("Saving Experience Replay Buffer to {}".format(filename)) with open(filename, "wb") as file_object: @@ -147,7 +146,7 @@ def load_replay_buffer(self) -> None: Loads the last saved replay buffer from a file. """ filename = os.path.join( - self.trainer_parameters["model_path"], "last_replay_buffer.hdf5" + self.trainer_parameters["output_path"], "last_replay_buffer.hdf5" ) logger.info("Loading Experience Replay Buffer from {}".format(filename)) with open(filename, "rb+") as file_object: diff --git a/ml-agents/mlagents/trainers/trainer/trainer.py b/ml-agents/mlagents/trainers/trainer/trainer.py index 3128aee081..735b44b8e7 100644 --- a/ml-agents/mlagents/trainers/trainer/trainer.py +++ b/ml-agents/mlagents/trainers/trainer/trainer.py @@ -42,9 +42,8 @@ def __init__( self.brain_name = brain_name self.run_id = run_id self.trainer_parameters = trainer_parameters - self.summary_path = trainer_parameters["summary_path"] self._threaded = trainer_parameters.get("threaded", True) - self._stats_reporter = StatsReporter(self.summary_path) + self._stats_reporter = StatsReporter(brain_name) self.is_training = training self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap) self.policy_queues: List[AgentManagerQueue[Policy]] = [] diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py index 950d03cdf1..5780e16cc1 100644 --- a/ml-agents/mlagents/trainers/trainer_controller.py +++ b/ml-agents/mlagents/trainers/trainer_controller.py @@ -30,8 +30,7 @@ class TrainerController(object): def __init__( self, trainer_factory: TrainerFactory, - model_path: str, - summaries_dir: str, + output_path: str, run_id: str, save_freq: int, meta_curriculum: Optional[MetaCurriculum], @@ -41,7 +40,7 @@ def __init__( resampling_interval: Optional[int], ): """ - :param model_path: Path to save the model. + :param output_path: Path to save the model. :param summaries_dir: Folder to save training summaries. :param run_id: The sub-directory name for model and summary statistics :param save_freq: Frequency at which to save model @@ -55,8 +54,7 @@ def __init__( self.trainers: Dict[str, Trainer] = {} self.brain_name_to_identifier: Dict[str, Set] = defaultdict(set) self.trainer_factory = trainer_factory - self.model_path = model_path - self.summaries_dir = summaries_dir + self.output_path = output_path self.logger = get_logger(__name__) self.run_id = run_id self.save_freq = save_freq @@ -119,16 +117,16 @@ def _export_graph(self): self.trainers[brain_name].export_model(name_behavior_id) @staticmethod - def _create_model_path(model_path): + def _create_output_path(output_path): try: - if not os.path.exists(model_path): - os.makedirs(model_path) + if not os.path.exists(output_path): + os.makedirs(output_path) except Exception: raise UnityEnvironmentException( "The folder {} containing the " "generated model could not be " "accessed. Please make sure the " - "permissions are set correctly.".format(model_path) + "permissions are set correctly.".format(output_path) ) @timed @@ -207,7 +205,7 @@ def _create_trainers_and_managers( @timed def start_learning(self, env_manager: EnvManager) -> None: - self._create_model_path(self.model_path) + self._create_output_path(self.output_path) tf.reset_default_graph() global_step = 0 last_brain_behavior_ids: Set[str] = set() diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py index 3fad49a368..7f1aebbe7d 100644 --- a/ml-agents/mlagents/trainers/trainer_util.py +++ b/ml-agents/mlagents/trainers/trainer_util.py @@ -20,9 +20,8 @@ class TrainerFactory: def __init__( self, trainer_config: Any, - summaries_dir: str, run_id: str, - model_path: str, + output_path: str, keep_checkpoints: int, train_model: bool, load_model: bool, @@ -32,9 +31,8 @@ def __init__( multi_gpu: bool = False, ): self.trainer_config = trainer_config - self.summaries_dir = summaries_dir self.run_id = run_id - self.model_path = model_path + self.output_path = output_path self.init_path = init_path self.keep_checkpoints = keep_checkpoints self.train_model = train_model @@ -48,9 +46,8 @@ def generate(self, brain_name: str) -> Trainer: return initialize_trainer( self.trainer_config, brain_name, - self.summaries_dir, self.run_id, - self.model_path, + self.output_path, self.keep_checkpoints, self.train_model, self.load_model, @@ -65,9 +62,8 @@ def generate(self, brain_name: str) -> Trainer: def initialize_trainer( trainer_config: Any, brain_name: str, - summaries_dir: str, run_id: str, - model_path: str, + output_path: str, keep_checkpoints: int, train_model: bool, load_model: bool, @@ -83,9 +79,8 @@ def initialize_trainer( :param trainer_config: Original trainer configuration loaded from YAML :param brain_name: Name of the brain to be associated with trainer - :param summaries_dir: Directory to store trainer summary statistics :param run_id: Run ID to associate with this training run - :param model_path: Path to save the model + :param output_path: Path to save the model and save summary statistics :param keep_checkpoints: How many model checkpoints to keep :param train_model: Whether to train the model (vs. run inference) :param load_model: Whether to load the model or randomly initialize @@ -102,9 +97,8 @@ def initialize_trainer( ) trainer_parameters = trainer_config.get("default", {}).copy() - trainer_parameters["summary_path"] = str(run_id) + "_" + brain_name - trainer_parameters["model_path"] = "{basedir}/{name}".format( - basedir=model_path, name=brain_name + trainer_parameters["output_path"] = "{basedir}/{name}".format( + basedir=output_path, name=brain_name ) if init_path is not None: trainer_parameters["init_path"] = "{basedir}/{name}".format( @@ -209,7 +203,7 @@ def _load_config(fp: TextIO) -> Dict[str, Any]: def handle_existing_directories( - model_path: str, summary_path: str, resume: bool, force: bool, init_path: str = None + model_path: str, resume: bool, force: bool, init_path: str = None ) -> None: """ Validates that if the run_id model exists, we do not overwrite it unless --force is specified. From 6867d388d64db866315fc2081aee4d94c95d4efb Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 22 Apr 2020 17:53:15 -0700 Subject: [PATCH 02/19] Fix tests --- .../tests/test_barracuda_converter.py | 6 +- .../mlagents/trainers/tests/test_bcmodule.py | 2 +- .../mlagents/trainers/tests/test_ghost.py | 3 +- .../mlagents/trainers/tests/test_learn.py | 7 +-- .../mlagents/trainers/tests/test_nn_policy.py | 5 +- .../mlagents/trainers/tests/test_policy.py | 2 +- ml-agents/mlagents/trainers/tests/test_ppo.py | 3 +- .../trainers/tests/test_reward_signals.py | 2 +- .../trainers/tests/test_rl_trainer.py | 2 +- ml-agents/mlagents/trainers/tests/test_sac.py | 11 ++-- .../mlagents/trainers/tests/test_simple_rl.py | 6 +- .../trainers/tests/test_trainer_controller.py | 6 +- .../trainers/tests/test_trainer_util.py | 60 +++++++------------ 13 files changed, 41 insertions(+), 74 deletions(-) diff --git a/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py b/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py index c300180e5c..0b4d4d49f0 100644 --- a/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py +++ b/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py @@ -55,8 +55,7 @@ def dummy_config(): memory_size: 8 curiosity_strength: 0.0 curiosity_enc_size: 1 - summary_path: test - model_path: test + output_path: test reward_signals: extrinsic: strength: 1.0 @@ -70,8 +69,7 @@ def dummy_config(): @pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"]) def test_policy_conversion(dummy_config, tmpdir, rnn, visual, discrete): tf.reset_default_graph() - dummy_config["summary_path"] = str(tmpdir) - dummy_config["model_path"] = os.path.join(tmpdir, "test") + dummy_config["output_path"] = os.path.join(tmpdir, "test") policy = create_policy_mock( dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual ) diff --git a/ml-agents/mlagents/trainers/tests/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/test_bcmodule.py index d21c506e51..60d05f96fd 100644 --- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py +++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py @@ -43,7 +43,7 @@ def ppo_dummy_config(): def create_bc_module(mock_brain, trainer_config, use_rnn, demo_file, tanhresample): # model_path = env.external_brain_names[0] - trainer_config["model_path"] = "testpath" + trainer_config["output_path"] = "testpath" trainer_config["keep_checkpoints"] = 3 trainer_config["use_recurrent"] = use_rnn trainer_config["behavioral_cloning"]["demo_path"] = ( diff --git a/ml-agents/mlagents/trainers/tests/test_ghost.py b/ml-agents/mlagents/trainers/tests/test_ghost.py index f740f35850..000be0da9a 100644 --- a/ml-agents/mlagents/trainers/tests/test_ghost.py +++ b/ml-agents/mlagents/trainers/tests/test_ghost.py @@ -38,8 +38,7 @@ def dummy_config(): memory_size: 8 curiosity_strength: 0.0 curiosity_enc_size: 1 - summary_path: test - model_path: test + output_path: test reward_signals: extrinsic: strength: 1.0 diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py index bf8bcbcf60..885f26892e 100644 --- a/ml-agents/mlagents/trainers/tests/test_learn.py +++ b/ml-agents/mlagents/trainers/tests/test_learn.py @@ -42,8 +42,7 @@ def test_run_training( learn.run_training(0, basic_options()) mock_init.assert_called_once_with( trainer_factory_mock.return_value, - "./models/ppo", - "./summaries", + "./results/ppo", "ppo", 50000, None, @@ -52,9 +51,7 @@ def test_run_training( sampler_manager_mock.return_value, None, ) - handle_dir_mock.assert_called_once_with( - "./models/ppo", "./summaries", False, False, None - ) + handle_dir_mock.assert_called_once_with("./results/ppo", False, False, None) StatsReporter.writers.clear() # make sure there aren't any writers as added by learn.py diff --git a/ml-agents/mlagents/trainers/tests/test_nn_policy.py b/ml-agents/mlagents/trainers/tests/test_nn_policy.py index 1f669e3b45..7a918bb46c 100644 --- a/ml-agents/mlagents/trainers/tests/test_nn_policy.py +++ b/ml-agents/mlagents/trainers/tests/test_nn_policy.py @@ -39,8 +39,7 @@ def dummy_config(): memory_size: 8 curiosity_strength: 0.0 curiosity_enc_size: 1 - summary_path: test - model_path: test + output_path: test reward_signals: extrinsic: strength: 1.0 @@ -83,7 +82,7 @@ def test_load_save(dummy_config, tmp_path): path1 = os.path.join(tmp_path, "runid1") path2 = os.path.join(tmp_path, "runid2") trainer_params = dummy_config - trainer_params["model_path"] = path1 + trainer_params["output_path"] = path1 policy = create_policy_mock(trainer_params) policy.initialize_or_load() policy.save_model(2000) diff --git a/ml-agents/mlagents/trainers/tests/test_policy.py b/ml-agents/mlagents/trainers/tests/test_policy.py index c179a00bdd..497cc60db4 100644 --- a/ml-agents/mlagents/trainers/tests/test_policy.py +++ b/ml-agents/mlagents/trainers/tests/test_policy.py @@ -14,7 +14,7 @@ def basic_mock_brain(): def basic_params(): - return {"use_recurrent": False, "model_path": "my/path"} + return {"use_recurrent": False, "output_path": "my/path"} class FakePolicy(TFPolicy): diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py index 803fbd353b..162edb911b 100644 --- a/ml-agents/mlagents/trainers/tests/test_ppo.py +++ b/ml-agents/mlagents/trainers/tests/test_ppo.py @@ -45,8 +45,7 @@ def dummy_config(): memory_size: 10 curiosity_strength: 0.0 curiosity_enc_size: 1 - summary_path: test - model_path: test + output_path: test reward_signals: extrinsic: strength: 1.0 diff --git a/ml-agents/mlagents/trainers/tests/test_reward_signals.py b/ml-agents/mlagents/trainers/tests/test_reward_signals.py index 048765e196..e6acba95df 100644 --- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py +++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py @@ -105,7 +105,7 @@ def create_optimizer_mock( ) trainer_parameters = trainer_config model_path = "testpath" - trainer_parameters["model_path"] = model_path + trainer_parameters["output_path"] = model_path trainer_parameters["keep_checkpoints"] = 3 trainer_parameters["reward_signals"].update(reward_signal_config) trainer_parameters["use_recurrent"] = use_rnn diff --git a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py index 5484976743..9ee983e331 100644 --- a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py +++ b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py @@ -10,7 +10,7 @@ def dummy_config(): return yaml.safe_load( """ - summary_path: "test/" + output_path: "test/" summary_freq: 1000 max_steps: 100 reward_signals: diff --git a/ml-agents/mlagents/trainers/tests/test_sac.py b/ml-agents/mlagents/trainers/tests/test_sac.py index 2f35a8c0bd..614788ca73 100644 --- a/ml-agents/mlagents/trainers/tests/test_sac.py +++ b/ml-agents/mlagents/trainers/tests/test_sac.py @@ -65,7 +65,7 @@ def create_sac_optimizer_mock(dummy_config, use_rnn, use_discrete, use_visual): trainer_parameters = dummy_config model_path = "testmodel" - trainer_parameters["model_path"] = model_path + trainer_parameters["output_path"] = model_path trainer_parameters["keep_checkpoints"] = 3 trainer_parameters["use_recurrent"] = use_rnn policy = NNPolicy( @@ -127,8 +127,7 @@ def test_sac_save_load_buffer(tmpdir, dummy_config): discrete_action_space=DISCRETE_ACTION_SPACE, ) trainer_params = dummy_config - trainer_params["summary_path"] = str(tmpdir) - trainer_params["model_path"] = str(tmpdir) + trainer_params["output_path"] = str(tmpdir) trainer_params["save_replay_buffer"] = True trainer = SACTrainer(mock_brain.brain_name, 1, trainer_params, True, False, 0, 0) policy = trainer.create_policy(mock_brain.brain_name, mock_brain) @@ -155,8 +154,7 @@ def test_add_get_policy(sac_optimizer, dummy_config): mock_optimizer.reward_signals = {} sac_optimizer.return_value = mock_optimizer - dummy_config["summary_path"] = "./summaries/test_trainer_summary" - dummy_config["model_path"] = "./models/test_trainer_models/TestModel" + dummy_config["output_path"] = "./results/test_trainer_models/TestModel" trainer = SACTrainer(brain_params, 0, dummy_config, True, False, 0, "0") policy = mock.Mock(spec=NNPolicy) policy.get_current_step.return_value = 2000 @@ -178,8 +176,7 @@ def test_advance(dummy_config): brain_params = make_brain_parameters( discrete_action=False, visual_inputs=0, vec_obs_size=6 ) - dummy_config["summary_path"] = "./summaries/test_trainer_summary" - dummy_config["model_path"] = "./models/test_trainer_models/TestModel" + dummy_config["output_path"] = "./results/test_trainer_models/TestModel" dummy_config["steps_per_update"] = 20 trainer = SACTrainer(brain_params, 0, dummy_config, True, False, 0, "0") policy = trainer.create_policy(brain_params.brain_name, brain_params) diff --git a/ml-agents/mlagents/trainers/tests/test_simple_rl.py b/ml-agents/mlagents/trainers/tests/test_simple_rl.py index 61d658640d..a41f50c63c 100644 --- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py +++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py @@ -145,9 +145,8 @@ def _check_environment_trains( env_manager = SimpleEnvManager(env, FloatPropertiesChannel()) trainer_factory = TrainerFactory( trainer_config=trainer_config, - summaries_dir=dir, run_id=run_id, - model_path=dir, + output_path=dir, keep_checkpoints=1, train_model=True, load_model=False, @@ -158,8 +157,7 @@ def _check_environment_trains( tc = TrainerController( trainer_factory=trainer_factory, - summaries_dir=dir, - model_path=dir, + output_path=dir, run_id=run_id, meta_curriculum=meta_curriculum, train=True, diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py index bb253d4860..ba202ec83f 100644 --- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py +++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py @@ -10,8 +10,7 @@ def basic_trainer_controller(): return TrainerController( trainer_factory=None, - model_path="test_model_path", - summaries_dir="test_summaries_dir", + output_path="test_model_path", run_id="test_run_id", save_freq=100, meta_curriculum=None, @@ -28,8 +27,7 @@ def test_initialization_seed(numpy_random_seed, tensorflow_set_seed): seed = 27 TrainerController( trainer_factory=None, - model_path="", - summaries_dir="", + output_path="", run_id="1", save_freq=1, meta_curriculum=None, diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_util.py b/ml-agents/mlagents/trainers/tests/test_trainer_util.py index 677fc8087e..48649ccfc9 100644 --- a/ml-agents/mlagents/trainers/tests/test_trainer_util.py +++ b/ml-agents/mlagents/trainers/tests/test_trainer_util.py @@ -86,9 +86,8 @@ def dummy_bad_config(): def test_initialize_trainer_parameters_override_defaults( BrainParametersMock, dummy_config_with_override ): - summaries_dir = "test_dir" run_id = "testrun" - model_path = "model_dir" + output_path = "model_dir" keep_checkpoints = 1 train_model = True load_model = False @@ -97,8 +96,7 @@ def test_initialize_trainer_parameters_override_defaults( base_config = dummy_config_with_override expected_config = base_config["default"] - expected_config["summary_path"] = f"{run_id}_testbrain" - expected_config["model_path"] = model_path + "/testbrain" + expected_config["output_path"] = output_path + "/testbrain" expected_config["keep_checkpoints"] = keep_checkpoints # Override value from specific brain config @@ -122,9 +120,8 @@ def mock_constructor( with patch.object(PPOTrainer, "__init__", mock_constructor): trainer_factory = trainer_util.TrainerFactory( trainer_config=base_config, - summaries_dir=summaries_dir, run_id=run_id, - model_path=model_path, + output_path=output_path, keep_checkpoints=keep_checkpoints, train_model=train_model, load_model=load_model, @@ -144,9 +141,8 @@ def test_initialize_ppo_trainer(BrainParametersMock, dummy_config): brain_params_mock = BrainParametersMock() BrainParametersMock.return_value.brain_name = "testbrain" external_brains = {"testbrain": BrainParametersMock()} - summaries_dir = "test_dir" run_id = "testrun" - model_path = "model_dir" + output_path = "results_dir" keep_checkpoints = 1 train_model = True load_model = False @@ -155,8 +151,7 @@ def test_initialize_ppo_trainer(BrainParametersMock, dummy_config): base_config = dummy_config expected_config = base_config["default"] - expected_config["summary_path"] = f"{run_id}_testbrain" - expected_config["model_path"] = model_path + "/testbrain" + expected_config["output_path"] = output_path + "/testbrain" expected_config["keep_checkpoints"] = keep_checkpoints def mock_constructor( @@ -173,9 +168,8 @@ def mock_constructor( with patch.object(PPOTrainer, "__init__", mock_constructor): trainer_factory = trainer_util.TrainerFactory( trainer_config=base_config, - summaries_dir=summaries_dir, run_id=run_id, - model_path=model_path, + output_path=output_path, keep_checkpoints=keep_checkpoints, train_model=train_model, load_model=load_model, @@ -192,9 +186,8 @@ def mock_constructor( def test_initialize_invalid_trainer_raises_exception( BrainParametersMock, dummy_bad_config ): - summaries_dir = "test_dir" run_id = "testrun" - model_path = "model_dir" + output_path = "results_dir" keep_checkpoints = 1 train_model = True load_model = False @@ -206,9 +199,8 @@ def test_initialize_invalid_trainer_raises_exception( with pytest.raises(TrainerConfigError): trainer_factory = trainer_util.TrainerFactory( trainer_config=bad_config, - summaries_dir=summaries_dir, run_id=run_id, - model_path=model_path, + output_path=output_path, keep_checkpoints=keep_checkpoints, train_model=train_model, load_model=load_model, @@ -223,9 +215,8 @@ def test_initialize_invalid_trainer_raises_exception( with pytest.raises(TrainerConfigError): trainer_factory = trainer_util.TrainerFactory( trainer_config=bad_config, - summaries_dir=summaries_dir, run_id=run_id, - model_path=model_path, + output_path=output_path, keep_checkpoints=keep_checkpoints, train_model=train_model, load_model=load_model, @@ -240,9 +231,8 @@ def test_initialize_invalid_trainer_raises_exception( with pytest.raises(UnityTrainerException): trainer_factory = trainer_util.TrainerFactory( trainer_config=bad_config, - summaries_dir=summaries_dir, run_id=run_id, - model_path=model_path, + output_path=output_path, keep_checkpoints=keep_checkpoints, train_model=train_model, load_model=load_model, @@ -270,9 +260,8 @@ def test_handles_no_default_section(dummy_config): trainer_factory = trainer_util.TrainerFactory( trainer_config=no_default_config, - summaries_dir="test_dir", run_id="testrun", - model_path="model_dir", + output_path="output_path", keep_checkpoints=1, train_model=True, load_model=False, @@ -299,9 +288,8 @@ def test_raise_if_no_config_for_brain(dummy_config): trainer_factory = trainer_util.TrainerFactory( trainer_config=bad_config, - summaries_dir="test_dir", run_id="testrun", - model_path="model_dir", + output_path="output_path", keep_checkpoints=1, train_model=True, load_model=False, @@ -339,33 +327,27 @@ def test_load_config_invalid_yaml(): def test_existing_directories(tmp_path): - model_path = os.path.join(tmp_path, "runid") - # Unused summary path - summary_path = os.path.join(tmp_path, "runid") + output_path = os.path.join(tmp_path, "runid") # Test fresh new unused path - should do nothing. - trainer_util.handle_existing_directories(model_path, summary_path, False, False) + trainer_util.handle_existing_directories(output_path, False, False) # Test resume with fresh path - should throw an exception. with pytest.raises(UnityTrainerException): - trainer_util.handle_existing_directories(model_path, summary_path, True, False) + trainer_util.handle_existing_directories(output_path, True, False) # make a directory - os.mkdir(model_path) + os.mkdir(output_path) # Test try to train w.o. force, should complain with pytest.raises(UnityTrainerException): - trainer_util.handle_existing_directories(model_path, summary_path, False, False) + trainer_util.handle_existing_directories(output_path, False, False) # Test try to train w/ resume - should work - trainer_util.handle_existing_directories(model_path, summary_path, True, False) + trainer_util.handle_existing_directories(output_path, True, False) # Test try to train w/ force - should work - trainer_util.handle_existing_directories(model_path, summary_path, False, True) + trainer_util.handle_existing_directories(output_path, False, True) # Test initialize option init_path = os.path.join(tmp_path, "runid2") with pytest.raises(UnityTrainerException): - trainer_util.handle_existing_directories( - model_path, summary_path, False, True, init_path - ) + trainer_util.handle_existing_directories(output_path, False, True, init_path) os.mkdir(init_path) # Should pass since the directory exists now. - trainer_util.handle_existing_directories( - model_path, summary_path, False, True, init_path - ) + trainer_util.handle_existing_directories(output_path, False, True, init_path) From 187a28492393cc3cd4c1e8ae0776d21cc92656f9 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 22 Apr 2020 18:18:27 -0700 Subject: [PATCH 03/19] Fix more tests --- ml-agents/mlagents/trainers/tests/test_ghost.py | 6 ++---- ml-agents/mlagents/trainers/tests/test_nn_policy.py | 3 +-- ml-agents/mlagents/trainers/tests/test_ppo.py | 6 ++---- ml-agents/mlagents/trainers/tests/test_sac.py | 3 +-- 4 files changed, 6 insertions(+), 12 deletions(-) diff --git a/ml-agents/mlagents/trainers/tests/test_ghost.py b/ml-agents/mlagents/trainers/tests/test_ghost.py index 000be0da9a..433988467e 100644 --- a/ml-agents/mlagents/trainers/tests/test_ghost.py +++ b/ml-agents/mlagents/trainers/tests/test_ghost.py @@ -116,8 +116,7 @@ def test_process_trajectory(dummy_config): vector_action_descriptions=[], vector_action_space_type=0, ) - dummy_config["summary_path"] = "./summaries/test_trainer_summary" - dummy_config["model_path"] = "./models/test_trainer_models/TestModel" + dummy_config["output_path"] = "./results/test_trainer_models/TestModel" ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0") controller = GhostController(100) trainer = GhostTrainer( @@ -189,8 +188,7 @@ def test_publish_queue(dummy_config): vector_action_descriptions=[], vector_action_space_type=0, ) - dummy_config["summary_path"] = "./summaries/test_trainer_summary" - dummy_config["model_path"] = "./models/test_trainer_models/TestModel" + dummy_config["output_path"] = "./results/test_trainer_models/TestModel" ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0") controller = GhostController(100) trainer = GhostTrainer( diff --git a/ml-agents/mlagents/trainers/tests/test_nn_policy.py b/ml-agents/mlagents/trainers/tests/test_nn_policy.py index 7a918bb46c..17b15231bd 100644 --- a/ml-agents/mlagents/trainers/tests/test_nn_policy.py +++ b/ml-agents/mlagents/trainers/tests/test_nn_policy.py @@ -143,8 +143,7 @@ def test_normalization(dummy_config): vector_action_descriptions=[], vector_action_space_type=0, ) - dummy_config["summary_path"] = "./summaries/test_trainer_summary" - dummy_config["model_path"] = "./models/test_trainer_models/TestModel" + dummy_config["output_path"] = "./results/test_trainer_models/TestModel" time_horizon = 6 trajectory = make_fake_trajectory( diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py index 162edb911b..6c546c26c5 100644 --- a/ml-agents/mlagents/trainers/tests/test_ppo.py +++ b/ml-agents/mlagents/trainers/tests/test_ppo.py @@ -310,8 +310,7 @@ def test_process_trajectory(dummy_config): vector_action_descriptions=[], vector_action_space_type=0, ) - dummy_config["summary_path"] = "./summaries/test_trainer_summary" - dummy_config["model_path"] = "./models/test_trainer_models/TestModel" + dummy_config["output_path"] = "./results/test_trainer_models/TestModel" trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0") policy = trainer.create_policy(brain_params.brain_name, brain_params) trainer.add_policy(brain_params.brain_name, policy) @@ -369,8 +368,7 @@ def test_add_get_policy(ppo_optimizer, dummy_config): mock_optimizer.reward_signals = {} ppo_optimizer.return_value = mock_optimizer - dummy_config["summary_path"] = "./summaries/test_trainer_summary" - dummy_config["model_path"] = "./models/test_trainer_models/TestModel" + dummy_config["output_path"] = "./results/test_trainer_models/TestModel" trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0") policy = mock.Mock(spec=NNPolicy) policy.get_current_step.return_value = 2000 diff --git a/ml-agents/mlagents/trainers/tests/test_sac.py b/ml-agents/mlagents/trainers/tests/test_sac.py index 614788ca73..66756f8640 100644 --- a/ml-agents/mlagents/trainers/tests/test_sac.py +++ b/ml-agents/mlagents/trainers/tests/test_sac.py @@ -255,8 +255,7 @@ def test_bad_config(dummy_config): dummy_config["sequence_length"] = 64 dummy_config["batch_size"] = 32 dummy_config["use_recurrent"] = True - dummy_config["summary_path"] = "./summaries/test_trainer_summary" - dummy_config["model_path"] = "./models/test_trainer_models/TestModel" + dummy_config["output_path"] = "./results/test_trainer_models/TestModel" with pytest.raises(UnityTrainerException): _ = SACTrainer(brain_params, 0, dummy_config, True, False, 0, "0") From 1a7bb81cc3df500827519557ca45e4253337a7af Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 22 Apr 2020 18:18:44 -0700 Subject: [PATCH 04/19] Move location of timers json as well --- ml-agents/mlagents/trainers/learn.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index f0989d2ff4..59695426f8 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -320,11 +320,13 @@ def run_training(run_seed: int, options: RunOptions) -> None: :param run_options: Command line arguments for training. """ with hierarchical_timer("run_training.setup"): - write_path = f"./results/{options.run_id}" + base_path = "results" + write_path = os.path.join(base_path, options.run_id) maybe_init_path = ( - f"./results/{options.initialize_from}" if options.initialize_from else None + os.path.join(base_path, options.run_id) if options.initialize_from else None ) - summaries_dir = "./summaries" + run_logs_dir = os.path.join(write_path, "run_logs") + os.makedirs(run_logs_dir, exist_ok=True) port = options.base_port # Configure CSV, Tensorboard Writers and StatsReporter @@ -396,7 +398,7 @@ def run_training(run_seed: int, options: RunOptions) -> None: tc.start_learning(env_manager) finally: env_manager.close() - write_timing_tree(summaries_dir, options.run_id) + write_timing_tree(run_logs_dir, options.run_id) def write_timing_tree(summaries_dir: str, run_id: str) -> None: From 8a719979b3370d72e31dc5a332a622d5ac0228b9 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 22 Apr 2020 18:23:09 -0700 Subject: [PATCH 05/19] Write timers to timer.json --- ml-agents/mlagents/trainers/learn.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 59695426f8..2885d9cd43 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -398,11 +398,11 @@ def run_training(run_seed: int, options: RunOptions) -> None: tc.start_learning(env_manager) finally: env_manager.close() - write_timing_tree(run_logs_dir, options.run_id) + write_timing_tree(run_logs_dir) -def write_timing_tree(summaries_dir: str, run_id: str) -> None: - timing_path = f"{summaries_dir}/{run_id}_timers.json" +def write_timing_tree(summaries_dir: str) -> None: + timing_path = f"{summaries_dir}/timers.json" try: with open(timing_path, "w") as f: json.dump(get_timer_tree(), f, indent=4) From e0998946420feb9c060c99086d9086dbfc2e7714 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 22 Apr 2020 18:55:04 -0700 Subject: [PATCH 06/19] Write out run_options --- ml-agents/mlagents/trainers/learn.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 2885d9cd43..59b8fcef04 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -1,5 +1,6 @@ # # Unity ML-Agents Toolkit import argparse +import yaml import os import numpy as np @@ -398,11 +399,23 @@ def run_training(run_seed: int, options: RunOptions) -> None: tc.start_learning(env_manager) finally: env_manager.close() + write_run_options(base_path, options) write_timing_tree(run_logs_dir) -def write_timing_tree(summaries_dir: str) -> None: - timing_path = f"{summaries_dir}/timers.json" +def write_run_options(output_dir: str, run_options: RunOptions) -> None: + run_options_path = os.path.join(output_dir, "configuration.yaml") + try: + with open(run_options_path, "w") as f: + yaml.dump(dict(run_options._asdict()), f, sort_keys=False) + except FileNotFoundError: + logger.warning( + f"Unable to save configuration to {run_options_path}. Make sure the directory exists" + ) + + +def write_timing_tree(output_dir: str) -> None: + timing_path = f"{output_dir}/timers.json" try: with open(timing_path, "w") as f: json.dump(get_timer_tree(), f, indent=4) From 3de64da60a95040028e75cfd1ecaa7500870440d Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 22 Apr 2020 20:30:02 -0700 Subject: [PATCH 07/19] Upgrade YAML library fix incorrect configuration.yaml location Improve tests --- ml-agents/mlagents/trainers/learn.py | 2 +- ml-agents/mlagents/trainers/tests/test_learn.py | 13 ++++++++++--- ml-agents/setup.py | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 59b8fcef04..e1c237ccbf 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -399,7 +399,7 @@ def run_training(run_seed: int, options: RunOptions) -> None: tc.start_learning(env_manager) finally: env_manager.close() - write_run_options(base_path, options) + write_run_options(write_path, options) write_timing_tree(run_logs_dir) diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py index 885f26892e..0eda10936a 100644 --- a/ml-agents/mlagents/trainers/tests/test_learn.py +++ b/ml-agents/mlagents/trainers/tests/test_learn.py @@ -15,6 +15,8 @@ def basic_options(extra_args=None): return parse_command_line(args) +@patch("mlagents.trainers.learn.write_timing_tree") +@patch("mlagents.trainers.learn.write_run_options") @patch("mlagents.trainers.learn.handle_existing_directories") @patch("mlagents.trainers.learn.TrainerFactory") @patch("mlagents.trainers.learn.SamplerManager") @@ -28,6 +30,8 @@ def test_run_training( sampler_manager_mock, trainer_factory_mock, handle_dir_mock, + write_run_options_mock, + write_timing_tree_mock, ): mock_env = MagicMock() mock_env.external_brain_names = [] @@ -39,10 +43,11 @@ def test_run_training( mock_init = MagicMock(return_value=None) with patch.object(TrainerController, "__init__", mock_init): with patch.object(TrainerController, "start_learning", MagicMock()): - learn.run_training(0, basic_options()) + options = basic_options() + learn.run_training(0, options) mock_init.assert_called_once_with( trainer_factory_mock.return_value, - "./results/ppo", + "results/ppo", "ppo", 50000, None, @@ -51,7 +56,9 @@ def test_run_training( sampler_manager_mock.return_value, None, ) - handle_dir_mock.assert_called_once_with("./results/ppo", False, False, None) + handle_dir_mock.assert_called_once_with("results/ppo", False, False, None) + write_timing_tree_mock.assert_called_once_with("results/ppo/run_logs") + write_run_options_mock.assert_called_once_with("results/ppo", options) StatsReporter.writers.clear() # make sure there aren't any writers as added by learn.py diff --git a/ml-agents/setup.py b/ml-agents/setup.py index 2882b3cafe..db5a12c2f0 100644 --- a/ml-agents/setup.py +++ b/ml-agents/setup.py @@ -60,7 +60,7 @@ def run(self): "numpy>=1.13.3,<2.0", "Pillow>=4.2.1", "protobuf>=3.6", - "pyyaml", + "pyyaml>=5.1", "tensorflow>=1.7,<2.1", 'pypiwin32==223;platform_system=="Windows"', ], From 44aa3b58e77d06d57ea6855f6c548d3e05b5f67b Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 22 Apr 2020 20:35:03 -0700 Subject: [PATCH 08/19] Update docs --- docs/Getting-Started.md | 5 ++--- docs/Learning-Environment-Executable.md | 5 ++--- docs/Training-ML-Agents.md | 7 ++++--- docs/Training-PPO.md | 2 +- docs/Training-SAC.md | 2 +- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/docs/Getting-Started.md b/docs/Getting-Started.md index 5c09a919d4..d1a9730026 100644 --- a/docs/Getting-Started.md +++ b/docs/Getting-Started.md @@ -179,12 +179,11 @@ INFO:mlagents_envs:Hyperparameters for the PPO Trainer of brain 3DBallLearning: sequence_length: 64 summary_freq: 1000 use_recurrent: False - summary_path: ./summaries/first3DBallRun memory_size: 256 use_curiosity: False curiosity_strength: 0.01 curiosity_enc_size: 128 - model_path: ./models/first3DBallRun/3DBallLearning + output_path: ./results/first3DBallRun/3DBallLearning INFO:mlagents.trainers: first3DBallRun: 3DBallLearning: Step: 1000. Mean Reward: 1.242. Std of Reward: 0.746. Training. INFO:mlagents.trainers: first3DBallRun: 3DBallLearning: Step: 2000. Mean Reward: 1.319. Std of Reward: 0.693. Training. INFO:mlagents.trainers: first3DBallRun: 3DBallLearning: Step: 3000. Mean Reward: 1.804. Std of Reward: 1.056. Training. @@ -236,7 +235,7 @@ the same command again, appending the `--resume` flag: mlagents-learn config/trainer_config.yaml --run-id=first3DBallRun --resume ``` -Your trained model will be at `models//.nn` where +Your trained model will be at `results//.nn` where `` is the name of the `Behavior Name` of the agents corresponding to the model. This file corresponds to your model's latest checkpoint. You can now embed this trained model into your Agents by following the steps below, diff --git a/docs/Learning-Environment-Executable.md b/docs/Learning-Environment-Executable.md index b78110bcfe..26c7fa6bce 100644 --- a/docs/Learning-Environment-Executable.md +++ b/docs/Learning-Environment-Executable.md @@ -152,12 +152,11 @@ INFO:mlagents_envs:Hyperparameters for the PPO Trainer of brain Ball3DLearning: sequence_length: 64 summary_freq: 1000 use_recurrent: False - summary_path: ./summaries/first-run-0 memory_size: 256 use_curiosity: False curiosity_strength: 0.01 curiosity_enc_size: 128 - model_path: ./models/first-run-0/Ball3DLearning + output_path: ./results/first-run-0/Ball3DLearning INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 1000. Mean Reward: 1.242. Std of Reward: 0.746. Training. INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 2000. Mean Reward: 1.319. Std of Reward: 0.693. Training. INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 3000. Mean Reward: 1.804. Std of Reward: 1.056. Training. @@ -171,7 +170,7 @@ INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 10000. Mean Reward: 2 ``` You can press Ctrl+C to stop the training, and your trained model will be at -`models//.nn`, which corresponds to your model's +`results//.nn`, which corresponds to your model's latest checkpoint. (**Note:** There is a known bug on Windows that causes the saving of the model to fail when you early terminate the training, it's recommended to wait until Step has reached the max_steps parameter you set in diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index cf34a60d25..46e6934734 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -64,16 +64,17 @@ for a sample execution of the `mlagents-learn` command. Regardless of which training methods, configurations or hyperparameters you provide, the training process will always generate three artifacts: -1. Summaries (under the `summaries/` folder): these are training metrics that +1. Summaries (under the `results//` folder): + these are training metrics that are updated throughout the training process. They are helpful to monitor your training performance and may help inform how to update your hyperparameter values. See [Using TensorBoard](Using-Tensorboard.md) for more details on how to visualize the training metrics. -1. Models (under the `models/` folder): these contain the model checkpoints that +1. Models (under the `results//` folder): these contain the model checkpoints that are updated throughout training and the final model file (`.nn`). This final model file is generated once either when training completes or is interrupted. -1. Timers file (also under the `summaries/` folder): this contains aggregated +1. Timers file (also under the `results/` folder): this contains aggregated metrics on your training process, including time spent on specific code blocks. See [Profiling in Python](Profiling-Python.md) for more information on the timers generated. diff --git a/docs/Training-PPO.md b/docs/Training-PPO.md index 8f328bc4b6..70a6bf2b9b 100644 --- a/docs/Training-PPO.md +++ b/docs/Training-PPO.md @@ -294,7 +294,7 @@ Typical Range: Approximately equal to PPO's `buffer_size` `init_path` can be specified to initialize your model from a previous run before starting. Note that the prior run should have used the same trainer configurations as the current run, and have been saved with the same version of ML-Agents. You should provide the full path -to the folder where the checkpoints were saved, e.g. `./models/{run-id}/{behavior_name}`. +to the folder where the checkpoints were saved, e.g. `./results/{run-id}/{behavior_name}`. This option is provided in case you want to initialize different behaviors from different runs; in most cases, it is sufficient to use the `--initialize-from` CLI parameter to initialize diff --git a/docs/Training-SAC.md b/docs/Training-SAC.md index 54d1e921fe..d001504afe 100644 --- a/docs/Training-SAC.md +++ b/docs/Training-SAC.md @@ -295,7 +295,7 @@ Typical Range (Discrete): `32` - `512` `init_path` can be specified to initialize your model from a previous run before starting. Note that the prior run should have used the same trainer configurations as the current run, and have been saved with the same version of ML-Agents. You should provide the full path -to the folder where the checkpoints were saved, e.g. `./models/{run-id}/{behavior_name}`. +to the folder where the checkpoints were saved, e.g. `./results/{run-id}/{behavior_name}`. This option is provided in case you want to initialize different behaviors from different runs; in most cases, it is sufficient to use the `--initialize-from` CLI parameter to initialize From d5836be1ee98d8b990178892aa339586da4342f7 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 22 Apr 2020 20:35:16 -0700 Subject: [PATCH 09/19] Update Yamato --- ml-agents/tests/yamato/training_int_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py index 5900e83920..e67a5efb4d 100644 --- a/ml-agents/tests/yamato/training_int_tests.py +++ b/ml-agents/tests/yamato/training_int_tests.py @@ -21,7 +21,7 @@ def run_training(python_version, csharp_version): print( f"Running training with python={python_version or latest} and c#={csharp_version or latest}" ) - nn_file_expected = f"./models/{run_id}/3DBall.nn" + nn_file_expected = f"./results/{run_id}/3DBall.nn" if os.path.exists(nn_file_expected): # Should never happen - make sure nothing leftover from an old test. print("Artifacts from previous build found!") From c20e603306aab316f139700ebbc059895471023e Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 22 Apr 2020 20:35:32 -0700 Subject: [PATCH 10/19] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index f5a9a71633..e0ff60ec85 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Tensorflow Model Info /models /summaries +/results # Training environments /envs From 6ab3ac84089e73c62383fbd357dc77186663fe11 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 22 Apr 2020 20:36:08 -0700 Subject: [PATCH 11/19] Add comments to gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e0ff60ec85..b3aab2b943 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ -# Tensorflow Model Info +# Output Artifacts (Legacy) /models /summaries +# Output Artifacts /results # Training environments From e2ee19848df819a9f9e7ee20cfb2d8aba4375221 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 22 Apr 2020 20:37:53 -0700 Subject: [PATCH 12/19] Update tensorboard docs --- docs/Using-Tensorboard.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Using-Tensorboard.md b/docs/Using-Tensorboard.md index dd424c20a9..d6297508b5 100644 --- a/docs/Using-Tensorboard.md +++ b/docs/Using-Tensorboard.md @@ -12,7 +12,7 @@ start TensorBoard: 1. Open a terminal or console window: 1. Navigate to the directory where the ML-Agents Toolkit is installed. -1. From the command line run: `tensorboard --logdir=summaries --port=6006` +1. From the command line run: `tensorboard --logdir=results --port=6006` 1. Open a browser window and navigate to [localhost:6006](http://localhost:6006). From c6b019db00991b44d0041c93ad1119c10f536ee5 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 22 Apr 2020 20:39:53 -0700 Subject: [PATCH 13/19] Update changelog and migrating --- com.unity.ml-agents/CHANGELOG.md | 2 ++ docs/Migrating.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 189d0f042b..9588417c1f 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -44,6 +44,8 @@ and this project adheres to `UnityToGymWrapper` and no longer creates the `UnityEnvironment`. Instead, the `UnityEnvironment` must be passed as input to the constructor of `UnityToGymWrapper` +- Training artifacts (trained models, summaries) are now found in the `results/` + directory. (#3829) ### Minor Changes diff --git a/docs/Migrating.md b/docs/Migrating.md index ae659feaf3..29eeb01ccf 100644 --- a/docs/Migrating.md +++ b/docs/Migrating.md @@ -38,6 +38,8 @@ double-check that the versions are in the same. The versions can be found in `UnityToGymWrapper` and no longer creates the `UnityEnvironment`. Instead, the `UnityEnvironment` must be passed as input to the constructor of `UnityToGymWrapper` +- Training artifacts (trained models, summaries) are now found under `results/` + instead of `summaries/` and `models/`. ### Steps to Migrate From bf843cce200665f9e74c154a6a5f7970195471e7 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 22 Apr 2020 21:18:29 -0700 Subject: [PATCH 14/19] Use try-catch instead of requiring YAML 5.1 --- ml-agents/mlagents/trainers/learn.py | 5 ++++- ml-agents/setup.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index e1c237ccbf..3bc4846d3e 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -407,7 +407,10 @@ def write_run_options(output_dir: str, run_options: RunOptions) -> None: run_options_path = os.path.join(output_dir, "configuration.yaml") try: with open(run_options_path, "w") as f: - yaml.dump(dict(run_options._asdict()), f, sort_keys=False) + try: + yaml.dump(dict(run_options._asdict()), f, sort_keys=False) + except TypeError: # Older versions of pyyaml don't support sort_keys + yaml.dump(dict(run_options._asdict()), f) except FileNotFoundError: logger.warning( f"Unable to save configuration to {run_options_path}. Make sure the directory exists" diff --git a/ml-agents/setup.py b/ml-agents/setup.py index db5a12c2f0..2882b3cafe 100644 --- a/ml-agents/setup.py +++ b/ml-agents/setup.py @@ -60,7 +60,7 @@ def run(self): "numpy>=1.13.3,<2.0", "Pillow>=4.2.1", "protobuf>=3.6", - "pyyaml>=5.1", + "pyyaml", "tensorflow>=1.7,<2.1", 'pypiwin32==223;platform_system=="Windows"', ], From 0b7146e6d2642b92d261bbae1619ffda5a7bbe24 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 22 Apr 2020 21:25:56 -0700 Subject: [PATCH 15/19] Fix issue with output directory check --- ml-agents/mlagents/trainers/learn.py | 11 ++++++----- ml-agents/mlagents/trainers/trainer_util.py | 6 +++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 3bc4846d3e..f8519d585b 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -327,9 +327,13 @@ def run_training(run_seed: int, options: RunOptions) -> None: os.path.join(base_path, options.run_id) if options.initialize_from else None ) run_logs_dir = os.path.join(write_path, "run_logs") - os.makedirs(run_logs_dir, exist_ok=True) port = options.base_port - + # Check if directory exists + handle_existing_directories( + write_path, options.resume, options.force, maybe_init_path + ) + # Make run logs directory + os.makedirs(run_logs_dir, exist_ok=True) # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( @@ -339,9 +343,6 @@ def run_training(run_seed: int, options: RunOptions) -> None: "Environment/Episode Length", ], ) - handle_existing_directories( - write_path, options.resume, options.force, maybe_init_path - ) tb_writer = TensorboardWriter(write_path, clear_past_data=not options.resume) gauge_write = GaugeWriter() console_writer = ConsoleWriter() diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py index 7f1aebbe7d..8c864919ff 100644 --- a/ml-agents/mlagents/trainers/trainer_util.py +++ b/ml-agents/mlagents/trainers/trainer_util.py @@ -203,7 +203,7 @@ def _load_config(fp: TextIO) -> Dict[str, Any]: def handle_existing_directories( - model_path: str, resume: bool, force: bool, init_path: str = None + output_path: str, resume: bool, force: bool, init_path: str = None ) -> None: """ Validates that if the run_id model exists, we do not overwrite it unless --force is specified. @@ -215,9 +215,9 @@ def handle_existing_directories( :param force: Whether or not the --force flag was passed. """ - model_path_exists = os.path.isdir(model_path) + output_path_exists = os.path.isdir(output_path) - if model_path_exists: + if output_path_exists: if not resume and not force: raise UnityTrainerException( "Previous data from this run ID was found. " From 48ee399a2b8d5b949e6ce3b20c8ca985f33f43f8 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Mon, 27 Apr 2020 18:37:39 -0700 Subject: [PATCH 16/19] Clean up some pathing --- ml-agents/mlagents/trainers/learn.py | 2 +- ml-agents/mlagents/trainers/policy/tf_policy.py | 3 ++- ml-agents/mlagents/trainers/trainer_controller.py | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index f8519d585b..75b6a57e02 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -419,7 +419,7 @@ def write_run_options(output_dir: str, run_options: RunOptions) -> None: def write_timing_tree(output_dir: str) -> None: - timing_path = f"{output_dir}/timers.json" + timing_path = os.path.join(output_dir, "timers.json") try: with open(timing_path, "w") as f: json.dump(get_timer_tree(), f, indent=4) diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py index 0f242140b5..f24f7acb25 100644 --- a/ml-agents/mlagents/trainers/policy/tf_policy.py +++ b/ml-agents/mlagents/trainers/policy/tf_policy.py @@ -1,5 +1,6 @@ from typing import Any, Dict, List, Optional import abc +import os import numpy as np from mlagents.tf_utils import tf from mlagents import tf_utils @@ -366,7 +367,7 @@ def save_model(self, steps): :return: """ with self.graph.as_default(): - last_checkpoint = self.model_path + "/model-" + str(steps) + ".ckpt" + last_checkpoint = os.path.join(self.model_path, f"model-{steps}.ckpt") self.saver.save(self.sess, last_checkpoint) tf.train.write_graph( self.graph, self.model_path, "raw_graph_def.pb", as_text=False diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py index 5780e16cc1..b71f265659 100644 --- a/ml-agents/mlagents/trainers/trainer_controller.py +++ b/ml-agents/mlagents/trainers/trainer_controller.py @@ -123,10 +123,10 @@ def _create_output_path(output_path): os.makedirs(output_path) except Exception: raise UnityEnvironmentException( - "The folder {} containing the " + f"The folder {output_path} containing the " "generated model could not be " "accessed. Please make sure the " - "permissions are set correctly.".format(output_path) + "permissions are set correctly." ) @timed From 2fc8b063ef85b24119b4c2878b236e4e153e8dff Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Mon, 27 Apr 2020 18:38:00 -0700 Subject: [PATCH 17/19] Clean up comment --- ml-agents/mlagents/trainers/trainer_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py index 8c864919ff..0657a9fd9b 100644 --- a/ml-agents/mlagents/trainers/trainer_util.py +++ b/ml-agents/mlagents/trainers/trainer_util.py @@ -80,7 +80,7 @@ def initialize_trainer( :param trainer_config: Original trainer configuration loaded from YAML :param brain_name: Name of the brain to be associated with trainer :param run_id: Run ID to associate with this training run - :param output_path: Path to save the model and save summary statistics + :param output_path: Path to save the model and summary statistics :param keep_checkpoints: How many model checkpoints to keep :param train_model: Whether to train the model (vs. run inference) :param load_model: Whether to load the model or randomly initialize From 7a8f8fc384ced62577300cd50e740db02b0aa43e Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Mon, 27 Apr 2020 18:39:09 -0700 Subject: [PATCH 18/19] Clean up pathing in trainer_util --- ml-agents/mlagents/trainers/trainer_util.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py index 0657a9fd9b..11ca04b797 100644 --- a/ml-agents/mlagents/trainers/trainer_util.py +++ b/ml-agents/mlagents/trainers/trainer_util.py @@ -97,13 +97,9 @@ def initialize_trainer( ) trainer_parameters = trainer_config.get("default", {}).copy() - trainer_parameters["output_path"] = "{basedir}/{name}".format( - basedir=output_path, name=brain_name - ) + trainer_parameters["output_path"] = os.path.join(output_path, brain_name) if init_path is not None: - trainer_parameters["init_path"] = "{basedir}/{name}".format( - basedir=init_path, name=brain_name - ) + trainer_parameters["init_path"] = os.path.join(init_path, brain_name) trainer_parameters["keep_checkpoints"] = keep_checkpoints if brain_name in trainer_config: _brain_key: Any = brain_name From f73f0d336ac61551f66297ed12359a6eb1b8598c Mon Sep 17 00:00:00 2001 From: Jonathan Harper Date: Mon, 27 Apr 2020 18:50:02 -0700 Subject: [PATCH 19/19] Add Player.log to results folder (#3877) Since the default Player.log path would be overwritten on subsequent runs, we should keep the Unity Player logs in the results folder for a training run. This change uses the -logFile CLI option to the Unity Player to set the path. --- com.unity.ml-agents/CHANGELOG.md | 1 + ml-agents-envs/mlagents_envs/environment.py | 30 ++++++++++++++----- .../mlagents_envs/tests/test_envs.py | 12 ++++++++ ml-agents/mlagents/trainers/learn.py | 11 +++++-- .../mlagents/trainers/tests/test_learn.py | 1 + ml-agents/tests/yamato/scripts/run_llapi.py | 17 ++++++++--- 6 files changed, 58 insertions(+), 14 deletions(-) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 9588417c1f..1112a3cc3c 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -76,6 +76,7 @@ and this project adheres to added in `CollectObservations()`. (#3825) - Model updates can now happen asynchronously with environment steps for better performance. (#3690) - `num_updates` and `train_interval` for SAC were replaced with `steps_per_update`. (#3690) +- Unity Player logs are now written out to the results directory. (#3877) ### Bug Fixes diff --git a/ml-agents-envs/mlagents_envs/environment.py b/ml-agents-envs/mlagents_envs/environment.py index 178ebe1eb6..d5ba9a3bd6 100644 --- a/ml-agents-envs/mlagents_envs/environment.py +++ b/ml-agents-envs/mlagents_envs/environment.py @@ -120,8 +120,9 @@ def __init__( seed: int = 0, no_graphics: bool = False, timeout_wait: int = 60, - args: Optional[List[str]] = None, + additional_args: Optional[List[str]] = None, side_channels: Optional[List[SideChannel]] = None, + log_folder: Optional[str] = None, ): """ Starts a new unity environment and establishes a connection with the environment. @@ -136,9 +137,11 @@ def __init__( :int timeout_wait: Time (in seconds) to wait for connection from environment. :list args: Addition Unity command line arguments :list side_channels: Additional side channel for no-rl communication with Unity + :str log_folder: Optional folder to write the Unity Player log file into. Requires absolute path. """ - args = args or [] atexit.register(self._close) + self.additional_args = additional_args or [] + self.no_graphics = no_graphics # If base port is not specified, use BASE_ENVIRONMENT_PORT if we have # an environment, otherwise DEFAULT_EDITOR_PORT if base_port is None: @@ -164,6 +167,7 @@ def __init__( ) ) self.side_channels[_sc.channel_id] = _sc + self.log_folder = log_folder # If the environment name is None, a new environment will not be launched # and the communicator will directly try to connect to an existing unity environment. @@ -174,7 +178,7 @@ def __init__( "the worker-id must be 0 in order to connect with the Editor." ) if file_name is not None: - self.executable_launcher(file_name, no_graphics, args) + self.executable_launcher(file_name, no_graphics, additional_args) else: logger.info( f"Listening on port {self.port}. " @@ -268,6 +272,20 @@ def validate_environment_path(env_path: str) -> Optional[str]: launch_string = candidates[0] return launch_string + def executable_args(self) -> List[str]: + args: List[str] = [] + if self.no_graphics: + args += ["-nographics", "-batchmode"] + args += [UnityEnvironment.PORT_COMMAND_LINE_ARG, str(self.port)] + if self.log_folder: + log_file_path = os.path.join( + self.log_folder, f"Player-{self.worker_id}.log" + ) + args += ["-logFile", log_file_path] + # Add in arguments passed explicitly by the user. + args += self.additional_args + return args + def executable_launcher(self, file_name, no_graphics, args): launch_string = self.validate_environment_path(file_name) if launch_string is None: @@ -278,11 +296,7 @@ def executable_launcher(self, file_name, no_graphics, args): else: logger.debug("This is the launch string {}".format(launch_string)) # Launch Unity environment - subprocess_args = [launch_string] - if no_graphics: - subprocess_args += ["-nographics", "-batchmode"] - subprocess_args += [UnityEnvironment.PORT_COMMAND_LINE_ARG, str(self.port)] - subprocess_args += args + subprocess_args = [launch_string] + self.executable_args() try: self.proc1 = subprocess.Popen( subprocess_args, diff --git a/ml-agents-envs/mlagents_envs/tests/test_envs.py b/ml-agents-envs/mlagents_envs/tests/test_envs.py index 1f1fc5dd99..2d0faef9a1 100755 --- a/ml-agents-envs/mlagents_envs/tests/test_envs.py +++ b/ml-agents-envs/mlagents_envs/tests/test_envs.py @@ -49,6 +49,18 @@ def test_port_defaults( assert expected == env.port +@mock.patch("mlagents_envs.environment.UnityEnvironment.executable_launcher") +@mock.patch("mlagents_envs.environment.UnityEnvironment.get_communicator") +def test_log_file_path_is_set(mock_communicator, mock_launcher): + mock_communicator.return_value = MockCommunicator() + env = UnityEnvironment( + file_name="myfile", worker_id=0, log_folder="./some-log-folder-path" + ) + args = env.executable_args() + log_file_index = args.index("-logFile") + assert args[log_file_index + 1] == "./some-log-folder-path/Player-0.log" + + @mock.patch("mlagents_envs.environment.UnityEnvironment.executable_launcher") @mock.patch("mlagents_envs.environment.UnityEnvironment.get_communicator") def test_reset(mock_communicator, mock_launcher): diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 75b6a57e02..1844f0b234 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -354,7 +354,12 @@ def run_training(run_seed: int, options: RunOptions) -> None: if options.env_path is None: port = UnityEnvironment.DEFAULT_EDITOR_PORT env_factory = create_environment_factory( - options.env_path, options.no_graphics, run_seed, port, options.env_args + options.env_path, + options.no_graphics, + run_seed, + port, + options.env_args, + os.path.abspath(run_logs_dir), # Unity environment requires absolute path ) engine_config = EngineConfig( options.width, @@ -470,6 +475,7 @@ def create_environment_factory( seed: int, start_port: int, env_args: Optional[List[str]], + log_folder: str, ) -> Callable[[int, List[SideChannel]], BaseEnv]: if env_path is not None: launch_string = UnityEnvironment.validate_environment_path(env_path) @@ -489,8 +495,9 @@ def create_unity_environment( seed=env_seed, no_graphics=no_graphics, base_port=start_port, - args=env_args, + additional_args=env_args, side_channels=side_channels, + log_folder=log_folder, ) return create_unity_environment diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py index 0eda10936a..1b37f84aee 100644 --- a/ml-agents/mlagents/trainers/tests/test_learn.py +++ b/ml-agents/mlagents/trainers/tests/test_learn.py @@ -70,6 +70,7 @@ def test_bad_env_path(): seed=None, start_port=8000, env_args=None, + log_folder="results/log_folder", ) diff --git a/ml-agents/tests/yamato/scripts/run_llapi.py b/ml-agents/tests/yamato/scripts/run_llapi.py index d298bc9be2..591407f163 100644 --- a/ml-agents/tests/yamato/scripts/run_llapi.py +++ b/ml-agents/tests/yamato/scripts/run_llapi.py @@ -17,7 +17,7 @@ def test_run_environment(env_name): file_name=env_name, side_channels=[engine_configuration_channel], no_graphics=True, - args=["-logFile", "-"], + additional_args=["-logFile", "-"], ) try: @@ -94,14 +94,23 @@ def test_closing(env_name): """ try: env1 = UnityEnvironment( - file_name=env_name, base_port=5006, no_graphics=True, args=["-logFile", "-"] + file_name=env_name, + base_port=5006, + no_graphics=True, + additional_args=["-logFile", "-"], ) env1.close() env1 = UnityEnvironment( - file_name=env_name, base_port=5006, no_graphics=True, args=["-logFile", "-"] + file_name=env_name, + base_port=5006, + no_graphics=True, + additional_args=["-logFile", "-"], ) env2 = UnityEnvironment( - file_name=env_name, base_port=5007, no_graphics=True, args=["-logFile", "-"] + file_name=env_name, + base_port=5007, + no_graphics=True, + additional_args=["-logFile", "-"], ) env2.reset() finally: