Unity-Technologies · ervteng · Apr 29, 2020 · Apr 23, 2020 · Apr 23, 2020 · Apr 23, 2020
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,8 @@
-# Tensorflow Model Info
+# Output Artifacts (Legacy)
 /models
 /summaries
+# Output Artifacts
+/results
 
 # Training environments
 /envs

diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -44,6 +44,8 @@ and this project adheres to
   `UnityToGymWrapper` and no longer creates the `UnityEnvironment`.
   Instead, the `UnityEnvironment` must be passed as input to the
   constructor of `UnityToGymWrapper`
+- Training artifacts (trained models, summaries) are now found in the `results/`
+  directory. (#3829)
 
 ### Minor Changes
 

diff --git a/docs/Getting-Started.md b/docs/Getting-Started.md
@@ -179,12 +179,11 @@ INFO:mlagents_envs:Hyperparameters for the PPO Trainer of brain 3DBallLearning:
         sequence_length:     64
         summary_freq:        1000
         use_recurrent:       False
-        summary_path:        ./summaries/first3DBallRun
         memory_size:         256
         use_curiosity:       False
         curiosity_strength:  0.01
         curiosity_enc_size:  128
-        model_path: ./models/first3DBallRun/3DBallLearning
+        output_path: ./results/first3DBallRun/3DBallLearning
 INFO:mlagents.trainers: first3DBallRun: 3DBallLearning: Step: 1000. Mean Reward: 1.242. Std of Reward: 0.746. Training.
 INFO:mlagents.trainers: first3DBallRun: 3DBallLearning: Step: 2000. Mean Reward: 1.319. Std of Reward: 0.693. Training.
 INFO:mlagents.trainers: first3DBallRun: 3DBallLearning: Step: 3000. Mean Reward: 1.804. Std of Reward: 1.056. Training.
@@ -236,7 +235,7 @@ the same command again, appending the `--resume` flag:
 mlagents-learn config/trainer_config.yaml --run-id=first3DBallRun --resume
 ```
 
-Your trained model will be at `models/<run-identifier>/<behavior_name>.nn` where
+Your trained model will be at `results/<run-identifier>/<behavior_name>.nn` where
 `<behavior_name>` is the name of the `Behavior Name` of the agents corresponding
 to the model. This file corresponds to your model's latest checkpoint. You can
 now embed this trained model into your Agents by following the steps below,

diff --git a/docs/Learning-Environment-Executable.md b/docs/Learning-Environment-Executable.md
@@ -152,12 +152,11 @@ INFO:mlagents_envs:Hyperparameters for the PPO Trainer of brain Ball3DLearning:
         sequence_length:     64
         summary_freq:        1000
         use_recurrent:       False
-        summary_path:        ./summaries/first-run-0
         memory_size:         256
         use_curiosity:       False
         curiosity_strength:  0.01
         curiosity_enc_size:  128
-        model_path: ./models/first-run-0/Ball3DLearning
+        output_path: ./results/first-run-0/Ball3DLearning
 INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 1000. Mean Reward: 1.242. Std of Reward: 0.746. Training.
 INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 2000. Mean Reward: 1.319. Std of Reward: 0.693. Training.
 INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 3000. Mean Reward: 1.804. Std of Reward: 1.056. Training.
@@ -171,7 +170,7 @@ INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 10000. Mean Reward: 2
 ```
 
 You can press Ctrl+C to stop the training, and your trained model will be at
-`models/<run-identifier>/<behavior_name>.nn`, which corresponds to your model's
+`results/<run-identifier>/<behavior_name>.nn`, which corresponds to your model's
 latest checkpoint. (**Note:** There is a known bug on Windows that causes the
 saving of the model to fail when you early terminate the training, it's
 recommended to wait until Step has reached the max_steps parameter you set in

diff --git a/docs/Migrating.md b/docs/Migrating.md
@@ -38,6 +38,8 @@ double-check that the versions are in the same. The versions can be found in
   `UnityToGymWrapper` and no longer creates the `UnityEnvironment`. Instead,
   the `UnityEnvironment` must be passed as input to the
   constructor of `UnityToGymWrapper`
+- Training artifacts (trained models, summaries) are now found under `results/`
+  instead of `summaries/` and `models/`.
 
 ### Steps to Migrate
 

diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md
@@ -64,16 +64,17 @@ for a sample execution of the `mlagents-learn` command.
 Regardless of which training methods, configurations or hyperparameters you
 provide, the training process will always generate three artifacts:
 
-1. Summaries (under the `summaries/` folder): these are training metrics that
+1. Summaries (under the `results/<run-identifier>/<behavior-name>` folder):
+   these are training metrics that
    are updated throughout the training process. They are helpful to monitor your
    training performance and may help inform how to update your hyperparameter
    values. See [Using TensorBoard](Using-Tensorboard.md) for more details on how
    to visualize the training metrics.
-1. Models (under the `models/` folder): these contain the model checkpoints that
+1. Models (under the `results/<run-identifier>/` folder): these contain the model checkpoints that
    are updated throughout training and the final model file (`.nn`). This final
    model file is generated once either when training completes or is
    interrupted.
-1. Timers file (also under the `summaries/` folder): this contains aggregated
+1. Timers file (also under the `results/<run-identifier>` folder): this contains aggregated
    metrics on your training process, including time spent on specific code
    blocks. See [Profiling in Python](Profiling-Python.md) for more information
    on the timers generated.

diff --git a/docs/Training-PPO.md b/docs/Training-PPO.md
@@ -294,7 +294,7 @@ Typical Range: Approximately equal to PPO's `buffer_size`
 `init_path` can be specified to initialize your model from a previous run before starting.
 Note that the prior run should have used the same trainer configurations as the current run,
 and have been saved with the same version of ML-Agents. You should provide the full path
-to the folder where the checkpoints were saved, e.g. `./models/{run-id}/{behavior_name}`.
+to the folder where the checkpoints were saved, e.g. `./results/{run-id}/{behavior_name}`.
 
 This option is provided in case you want to initialize different behaviors from different runs;
 in most cases, it is sufficient to use the `--initialize-from` CLI parameter to initialize

diff --git a/docs/Training-SAC.md b/docs/Training-SAC.md
@@ -295,7 +295,7 @@ Typical Range (Discrete): `32` - `512`
 `init_path` can be specified to initialize your model from a previous run before starting.
 Note that the prior run should have used the same trainer configurations as the current run,
 and have been saved with the same version of ML-Agents. You should provide the full path
-to the folder where the checkpoints were saved, e.g. `./models/{run-id}/{behavior_name}`.
+to the folder where the checkpoints were saved, e.g. `./results/{run-id}/{behavior_name}`.
 
 This option is provided in case you want to initialize different behaviors from different runs;
 in most cases, it is sufficient to use the `--initialize-from` CLI parameter to initialize

diff --git a/docs/Using-Tensorboard.md b/docs/Using-Tensorboard.md
@@ -12,7 +12,7 @@ start TensorBoard:
 
 1. Open a terminal or console window:
 1. Navigate to the directory where the ML-Agents Toolkit is installed.
-1. From the command line run: `tensorboard --logdir=summaries --port=6006`
+1. From the command line run: `tensorboard --logdir=results --port=6006`
 1. Open a browser window and navigate to
    [localhost:6006](http://localhost:6006).
 

diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
@@ -1,5 +1,6 @@
 # # Unity ML-Agents Toolkit
 import argparse
+import yaml
 
 import os
 import numpy as np
@@ -320,26 +321,29 @@ def run_training(run_seed: int, options: RunOptions) -> None:
     :param run_options: Command line arguments for training.
     """
     with hierarchical_timer("run_training.setup"):
-        model_path = f"./models/{options.run_id}"
+        base_path = "results"
+        write_path = os.path.join(base_path, options.run_id)
         maybe_init_path = (
-            f"./models/{options.initialize_from}" if options.initialize_from else None
+            os.path.join(base_path, options.run_id) if options.initialize_from else None
         )
-        summaries_dir = "./summaries"
+        run_logs_dir = os.path.join(write_path, "run_logs")
         port = options.base_port
-
+        # Check if directory exists
+        handle_existing_directories(
+            write_path, options.resume, options.force, maybe_init_path
+        )
+        # Make run logs directory
+        os.makedirs(run_logs_dir, exist_ok=True)
         # Configure CSV, Tensorboard Writers and StatsReporter
         # We assume reward and episode length are needed in the CSV.
         csv_writer = CSVWriter(
-            summaries_dir,
+            write_path,
             required_fields=[
                 "Environment/Cumulative Reward",
                 "Environment/Episode Length",
             ],
         )
-        handle_existing_directories(
-            model_path, summaries_dir, options.resume, options.force, maybe_init_path
-        )
-        tb_writer = TensorboardWriter(summaries_dir, clear_past_data=not options.resume)
+        tb_writer = TensorboardWriter(write_path, clear_past_data=not options.resume)
         gauge_write = GaugeWriter()
         console_writer = ConsoleWriter()
         StatsReporter.add_writer(tb_writer)
@@ -368,9 +372,8 @@ def run_training(run_seed: int, options: RunOptions) -> None:
         )
         trainer_factory = TrainerFactory(
             options.trainer_config,
-            summaries_dir,
             options.run_id,
-            model_path,
+            write_path,
             options.keep_checkpoints,
             not options.inference,
             options.resume,
@@ -382,8 +385,7 @@ def run_training(run_seed: int, options: RunOptions) -> None:
         # Create controller and begin training.
         tc = TrainerController(
             trainer_factory,
-            model_path,
-            summaries_dir,
+            write_path,
             options.run_id,
             options.save_freq,
             maybe_meta_curriculum,
@@ -398,11 +400,26 @@ def run_training(run_seed: int, options: RunOptions) -> None:
         tc.start_learning(env_manager)
     finally:
         env_manager.close()
-        write_timing_tree(summaries_dir, options.run_id)
+        write_run_options(write_path, options)
+        write_timing_tree(run_logs_dir)
+
+
+def write_run_options(output_dir: str, run_options: RunOptions) -> None:
+    run_options_path = os.path.join(output_dir, "configuration.yaml")
+    try:
+        with open(run_options_path, "w") as f:
+            try:
+                yaml.dump(dict(run_options._asdict()), f, sort_keys=False)
+            except TypeError:  # Older versions of pyyaml don't support sort_keys
+                yaml.dump(dict(run_options._asdict()), f)
+    except FileNotFoundError:
+        logger.warning(
+            f"Unable to save configuration to {run_options_path}. Make sure the directory exists"
+        )
 
 
-def write_timing_tree(summaries_dir: str, run_id: str) -> None:
-    timing_path = f"{summaries_dir}/{run_id}_timers.json"
+def write_timing_tree(output_dir: str) -> None:
+    timing_path = f"{output_dir}/timers.json"
     try:
         with open(timing_path, "w") as f:
             json.dump(get_timer_tree(), f, indent=4)

diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py
@@ -62,7 +62,7 @@ def __init__(self, seed, brain, trainer_parameters, load=False):
         self.use_continuous_act = brain.vector_action_space_type == "continuous"
         if self.use_continuous_act:
             self.num_branches = self.brain.vector_action_space_size[0]
-        self.model_path = trainer_parameters["model_path"]
+        self.model_path = trainer_parameters["output_path"]
         self.initialize_path = trainer_parameters.get("init_path", None)
         self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5)
         self.graph = tf.Graph()

diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -62,9 +62,8 @@ def __init__(
             "sequence_length",
             "summary_freq",
             "use_recurrent",
-            "summary_path",
             "memory_size",
-            "model_path",
+            "output_path",
             "reward_signals",
         ]
         self._check_param_keys()

diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py
@@ -72,9 +72,8 @@ def __init__(
             "summary_freq",
             "tau",
             "use_recurrent",
-            "summary_path",
             "memory_size",
-            "model_path",
+            "output_path",
             "reward_signals",
         ]
 
@@ -136,7 +135,7 @@ def save_replay_buffer(self) -> None:
         Save the training buffer's update buffer to a pickle file.
         """
         filename = os.path.join(
-            self.trainer_parameters["model_path"], "last_replay_buffer.hdf5"
+            self.trainer_parameters["output_path"], "last_replay_buffer.hdf5"
         )
         logger.info("Saving Experience Replay Buffer to {}".format(filename))
         with open(filename, "wb") as file_object:
@@ -147,7 +146,7 @@ def load_replay_buffer(self) -> None:
         Loads the last saved replay buffer from a file.
         """
         filename = os.path.join(
-            self.trainer_parameters["model_path"], "last_replay_buffer.hdf5"
+            self.trainer_parameters["output_path"], "last_replay_buffer.hdf5"
         )
         logger.info("Loading Experience Replay Buffer from {}".format(filename))
         with open(filename, "rb+") as file_object:

diff --git a/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py b/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
@@ -55,8 +55,7 @@ def dummy_config():
         memory_size: 8
         curiosity_strength: 0.0
         curiosity_enc_size: 1
-        summary_path: test
-        model_path: test
+        output_path: test
         reward_signals:
           extrinsic:
             strength: 1.0
@@ -70,8 +69,7 @@ def dummy_config():
 @pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
 def test_policy_conversion(dummy_config, tmpdir, rnn, visual, discrete):
     tf.reset_default_graph()
-    dummy_config["summary_path"] = str(tmpdir)
-    dummy_config["model_path"] = os.path.join(tmpdir, "test")
+    dummy_config["output_path"] = os.path.join(tmpdir, "test")
     policy = create_policy_mock(
         dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
     )

diff --git a/ml-agents/mlagents/trainers/tests/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
@@ -43,7 +43,7 @@ def ppo_dummy_config():
 
 def create_bc_module(mock_brain, trainer_config, use_rnn, demo_file, tanhresample):
     # model_path = env.external_brain_names[0]
-    trainer_config["model_path"] = "testpath"
+    trainer_config["output_path"] = "testpath"
     trainer_config["keep_checkpoints"] = 3
     trainer_config["use_recurrent"] = use_rnn
     trainer_config["behavioral_cloning"]["demo_path"] = (

diff --git a/ml-agents/mlagents/trainers/tests/test_ghost.py b/ml-agents/mlagents/trainers/tests/test_ghost.py
@@ -38,8 +38,7 @@ def dummy_config():
         memory_size: 8
         curiosity_strength: 0.0
         curiosity_enc_size: 1
-        summary_path: test
-        model_path: test
+        output_path: test
         reward_signals:
           extrinsic:
             strength: 1.0
@@ -117,8 +116,7 @@ def test_process_trajectory(dummy_config):
         vector_action_descriptions=[],
         vector_action_space_type=0,
     )
-    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
-    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
+    dummy_config["output_path"] = "./results/test_trainer_models/TestModel"
     ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0")
     controller = GhostController(100)
     trainer = GhostTrainer(
@@ -190,8 +188,7 @@ def test_publish_queue(dummy_config):
         vector_action_descriptions=[],
         vector_action_space_type=0,
     )
-    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
-    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
+    dummy_config["output_path"] = "./results/test_trainer_models/TestModel"
     ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0")
     controller = GhostController(100)
     trainer = GhostTrainer(

diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py
@@ -15,6 +15,8 @@ def basic_options(extra_args=None):
     return parse_command_line(args)
 
 
+@patch("mlagents.trainers.learn.write_timing_tree")
+@patch("mlagents.trainers.learn.write_run_options")
 @patch("mlagents.trainers.learn.handle_existing_directories")
 @patch("mlagents.trainers.learn.TrainerFactory")
 @patch("mlagents.trainers.learn.SamplerManager")
@@ -28,6 +30,8 @@ def test_run_training(
     sampler_manager_mock,
     trainer_factory_mock,
     handle_dir_mock,
+    write_run_options_mock,
+    write_timing_tree_mock,
 ):
     mock_env = MagicMock()
     mock_env.external_brain_names = []
@@ -39,11 +43,11 @@ def test_run_training(
     mock_init = MagicMock(return_value=None)
     with patch.object(TrainerController, "__init__", mock_init):
         with patch.object(TrainerController, "start_learning", MagicMock()):
-            learn.run_training(0, basic_options())
+            options = basic_options()
+            learn.run_training(0, options)
             mock_init.assert_called_once_with(
                 trainer_factory_mock.return_value,
-                "./models/ppo",
-                "./summaries",
+                "results/ppo",
                 "ppo",
                 50000,
                 None,
@@ -52,9 +56,9 @@ def test_run_training(
                 sampler_manager_mock.return_value,
                 None,
             )
-            handle_dir_mock.assert_called_once_with(
-                "./models/ppo", "./summaries", False, False, None
-            )
+            handle_dir_mock.assert_called_once_with("results/ppo", False, False, None)
+            write_timing_tree_mock.assert_called_once_with("results/ppo/run_logs")
+            write_run_options_mock.assert_called_once_with("results/ppo", options)
     StatsReporter.writers.clear()  # make sure there aren't any writers as added by learn.py