From e9d34cc075d13c444682b5d4f8c4040919fb9f1f Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 22 Apr 2020 17:23:11 -0700
Subject: [PATCH 01/19] Move artifacts to results/ directory

---
 ml-agents/mlagents/trainers/learn.py          | 16 ++++++--------
 .../mlagents/trainers/policy/tf_policy.py     |  2 +-
 ml-agents/mlagents/trainers/ppo/trainer.py    |  3 +--
 ml-agents/mlagents/trainers/sac/trainer.py    |  7 +++---
 .../mlagents/trainers/trainer/trainer.py      |  3 +--
 .../mlagents/trainers/trainer_controller.py   | 18 +++++++--------
 ml-agents/mlagents/trainers/trainer_util.py   | 22 +++++++------------
 7 files changed, 29 insertions(+), 42 deletions(-)

diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
index a5cb4677af..f0989d2ff4 100644
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
@@ -320,9 +320,9 @@ def run_training(run_seed: int, options: RunOptions) -> None:
     :param run_options: Command line arguments for training.
     """
     with hierarchical_timer("run_training.setup"):
-        model_path = f"./models/{options.run_id}"
+        write_path = f"./results/{options.run_id}"
         maybe_init_path = (
-            f"./models/{options.initialize_from}" if options.initialize_from else None
+            f"./results/{options.initialize_from}" if options.initialize_from else None
         )
         summaries_dir = "./summaries"
         port = options.base_port
@@ -330,16 +330,16 @@ def run_training(run_seed: int, options: RunOptions) -> None:
         # Configure CSV, Tensorboard Writers and StatsReporter
         # We assume reward and episode length are needed in the CSV.
         csv_writer = CSVWriter(
-            summaries_dir,
+            write_path,
             required_fields=[
                 "Environment/Cumulative Reward",
                 "Environment/Episode Length",
             ],
         )
         handle_existing_directories(
-            model_path, summaries_dir, options.resume, options.force, maybe_init_path
+            write_path, options.resume, options.force, maybe_init_path
         )
-        tb_writer = TensorboardWriter(summaries_dir, clear_past_data=not options.resume)
+        tb_writer = TensorboardWriter(write_path, clear_past_data=not options.resume)
         gauge_write = GaugeWriter()
         console_writer = ConsoleWriter()
         StatsReporter.add_writer(tb_writer)
@@ -368,9 +368,8 @@ def run_training(run_seed: int, options: RunOptions) -> None:
         )
         trainer_factory = TrainerFactory(
             options.trainer_config,
-            summaries_dir,
             options.run_id,
-            model_path,
+            write_path,
             options.keep_checkpoints,
             not options.inference,
             options.resume,
@@ -382,8 +381,7 @@ def run_training(run_seed: int, options: RunOptions) -> None:
         # Create controller and begin training.
         tc = TrainerController(
             trainer_factory,
-            model_path,
-            summaries_dir,
+            write_path,
             options.run_id,
             options.save_freq,
             maybe_meta_curriculum,
diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py
index 7832d60cd8..0f242140b5 100644
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
@@ -62,7 +62,7 @@ def __init__(self, seed, brain, trainer_parameters, load=False):
         self.use_continuous_act = brain.vector_action_space_type == "continuous"
         if self.use_continuous_act:
             self.num_branches = self.brain.vector_action_space_size[0]
-        self.model_path = trainer_parameters["model_path"]
+        self.model_path = trainer_parameters["output_path"]
         self.initialize_path = trainer_parameters.get("init_path", None)
         self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5)
         self.graph = tf.Graph()
diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index 81ed6ffab7..7b6e6f12d1 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -62,9 +62,8 @@ def __init__(
             "sequence_length",
             "summary_freq",
             "use_recurrent",
-            "summary_path",
             "memory_size",
-            "model_path",
+            "output_path",
             "reward_signals",
         ]
         self._check_param_keys()
diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py
index 7c6fcfd777..ea2aeca2d4 100644
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
@@ -72,9 +72,8 @@ def __init__(
             "summary_freq",
             "tau",
             "use_recurrent",
-            "summary_path",
             "memory_size",
-            "model_path",
+            "output_path",
             "reward_signals",
         ]
 
@@ -136,7 +135,7 @@ def save_replay_buffer(self) -> None:
         Save the training buffer's update buffer to a pickle file.
         """
         filename = os.path.join(
-            self.trainer_parameters["model_path"], "last_replay_buffer.hdf5"
+            self.trainer_parameters["output_path"], "last_replay_buffer.hdf5"
         )
         logger.info("Saving Experience Replay Buffer to {}".format(filename))
         with open(filename, "wb") as file_object:
@@ -147,7 +146,7 @@ def load_replay_buffer(self) -> None:
         Loads the last saved replay buffer from a file.
         """
         filename = os.path.join(
-            self.trainer_parameters["model_path"], "last_replay_buffer.hdf5"
+            self.trainer_parameters["output_path"], "last_replay_buffer.hdf5"
         )
         logger.info("Loading Experience Replay Buffer from {}".format(filename))
         with open(filename, "rb+") as file_object:
diff --git a/ml-agents/mlagents/trainers/trainer/trainer.py b/ml-agents/mlagents/trainers/trainer/trainer.py
index 3128aee081..735b44b8e7 100644
--- a/ml-agents/mlagents/trainers/trainer/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/trainer.py
@@ -42,9 +42,8 @@ def __init__(
         self.brain_name = brain_name
         self.run_id = run_id
         self.trainer_parameters = trainer_parameters
-        self.summary_path = trainer_parameters["summary_path"]
         self._threaded = trainer_parameters.get("threaded", True)
-        self._stats_reporter = StatsReporter(self.summary_path)
+        self._stats_reporter = StatsReporter(brain_name)
         self.is_training = training
         self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
         self.policy_queues: List[AgentManagerQueue[Policy]] = []
diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py
index 950d03cdf1..5780e16cc1 100644
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
@@ -30,8 +30,7 @@ class TrainerController(object):
     def __init__(
         self,
         trainer_factory: TrainerFactory,
-        model_path: str,
-        summaries_dir: str,
+        output_path: str,
         run_id: str,
         save_freq: int,
         meta_curriculum: Optional[MetaCurriculum],
@@ -41,7 +40,7 @@ def __init__(
         resampling_interval: Optional[int],
     ):
         """
-        :param model_path: Path to save the model.
+        :param output_path: Path to save the model.
         :param summaries_dir: Folder to save training summaries.
         :param run_id: The sub-directory name for model and summary statistics
         :param save_freq: Frequency at which to save model
@@ -55,8 +54,7 @@ def __init__(
         self.trainers: Dict[str, Trainer] = {}
         self.brain_name_to_identifier: Dict[str, Set] = defaultdict(set)
         self.trainer_factory = trainer_factory
-        self.model_path = model_path
-        self.summaries_dir = summaries_dir
+        self.output_path = output_path
         self.logger = get_logger(__name__)
         self.run_id = run_id
         self.save_freq = save_freq
@@ -119,16 +117,16 @@ def _export_graph(self):
                 self.trainers[brain_name].export_model(name_behavior_id)
 
     @staticmethod
-    def _create_model_path(model_path):
+    def _create_output_path(output_path):
         try:
-            if not os.path.exists(model_path):
-                os.makedirs(model_path)
+            if not os.path.exists(output_path):
+                os.makedirs(output_path)
         except Exception:
             raise UnityEnvironmentException(
                 "The folder {} containing the "
                 "generated model could not be "
                 "accessed. Please make sure the "
-                "permissions are set correctly.".format(model_path)
+                "permissions are set correctly.".format(output_path)
             )
 
     @timed
@@ -207,7 +205,7 @@ def _create_trainers_and_managers(
 
     @timed
     def start_learning(self, env_manager: EnvManager) -> None:
-        self._create_model_path(self.model_path)
+        self._create_output_path(self.output_path)
         tf.reset_default_graph()
         global_step = 0
         last_brain_behavior_ids: Set[str] = set()
diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py
index 3fad49a368..7f1aebbe7d 100644
--- a/ml-agents/mlagents/trainers/trainer_util.py
+++ b/ml-agents/mlagents/trainers/trainer_util.py
@@ -20,9 +20,8 @@ class TrainerFactory:
     def __init__(
         self,
         trainer_config: Any,
-        summaries_dir: str,
         run_id: str,
-        model_path: str,
+        output_path: str,
         keep_checkpoints: int,
         train_model: bool,
         load_model: bool,
@@ -32,9 +31,8 @@ def __init__(
         multi_gpu: bool = False,
     ):
         self.trainer_config = trainer_config
-        self.summaries_dir = summaries_dir
         self.run_id = run_id
-        self.model_path = model_path
+        self.output_path = output_path
         self.init_path = init_path
         self.keep_checkpoints = keep_checkpoints
         self.train_model = train_model
@@ -48,9 +46,8 @@ def generate(self, brain_name: str) -> Trainer:
         return initialize_trainer(
             self.trainer_config,
             brain_name,
-            self.summaries_dir,
             self.run_id,
-            self.model_path,
+            self.output_path,
             self.keep_checkpoints,
             self.train_model,
             self.load_model,
@@ -65,9 +62,8 @@ def generate(self, brain_name: str) -> Trainer:
 def initialize_trainer(
     trainer_config: Any,
     brain_name: str,
-    summaries_dir: str,
     run_id: str,
-    model_path: str,
+    output_path: str,
     keep_checkpoints: int,
     train_model: bool,
     load_model: bool,
@@ -83,9 +79,8 @@ def initialize_trainer(
 
     :param trainer_config: Original trainer configuration loaded from YAML
     :param brain_name: Name of the brain to be associated with trainer
-    :param summaries_dir: Directory to store trainer summary statistics
     :param run_id: Run ID to associate with this training run
-    :param model_path: Path to save the model
+    :param output_path: Path to save the model and save summary statistics
     :param keep_checkpoints: How many model checkpoints to keep
     :param train_model: Whether to train the model (vs. run inference)
     :param load_model: Whether to load the model or randomly initialize
@@ -102,9 +97,8 @@ def initialize_trainer(
         )
 
     trainer_parameters = trainer_config.get("default", {}).copy()
-    trainer_parameters["summary_path"] = str(run_id) + "_" + brain_name
-    trainer_parameters["model_path"] = "{basedir}/{name}".format(
-        basedir=model_path, name=brain_name
+    trainer_parameters["output_path"] = "{basedir}/{name}".format(
+        basedir=output_path, name=brain_name
     )
     if init_path is not None:
         trainer_parameters["init_path"] = "{basedir}/{name}".format(
@@ -209,7 +203,7 @@ def _load_config(fp: TextIO) -> Dict[str, Any]:
 
 
 def handle_existing_directories(
-    model_path: str, summary_path: str, resume: bool, force: bool, init_path: str = None
+    model_path: str, resume: bool, force: bool, init_path: str = None
 ) -> None:
     """
     Validates that if the run_id model exists, we do not overwrite it unless --force is specified.

From 6867d388d64db866315fc2081aee4d94c95d4efb Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 22 Apr 2020 17:53:15 -0700
Subject: [PATCH 02/19] Fix tests

---
 .../tests/test_barracuda_converter.py         |  6 +-
 .../mlagents/trainers/tests/test_bcmodule.py  |  2 +-
 .../mlagents/trainers/tests/test_ghost.py     |  3 +-
 .../mlagents/trainers/tests/test_learn.py     |  7 +--
 .../mlagents/trainers/tests/test_nn_policy.py |  5 +-
 .../mlagents/trainers/tests/test_policy.py    |  2 +-
 ml-agents/mlagents/trainers/tests/test_ppo.py |  3 +-
 .../trainers/tests/test_reward_signals.py     |  2 +-
 .../trainers/tests/test_rl_trainer.py         |  2 +-
 ml-agents/mlagents/trainers/tests/test_sac.py | 11 ++--
 .../mlagents/trainers/tests/test_simple_rl.py |  6 +-
 .../trainers/tests/test_trainer_controller.py |  6 +-
 .../trainers/tests/test_trainer_util.py       | 60 +++++++------------
 13 files changed, 41 insertions(+), 74 deletions(-)

diff --git a/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py b/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
index c300180e5c..0b4d4d49f0 100644
--- a/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
+++ b/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
@@ -55,8 +55,7 @@ def dummy_config():
         memory_size: 8
         curiosity_strength: 0.0
         curiosity_enc_size: 1
-        summary_path: test
-        model_path: test
+        output_path: test
         reward_signals:
           extrinsic:
             strength: 1.0
@@ -70,8 +69,7 @@ def dummy_config():
 @pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
 def test_policy_conversion(dummy_config, tmpdir, rnn, visual, discrete):
     tf.reset_default_graph()
-    dummy_config["summary_path"] = str(tmpdir)
-    dummy_config["model_path"] = os.path.join(tmpdir, "test")
+    dummy_config["output_path"] = os.path.join(tmpdir, "test")
     policy = create_policy_mock(
         dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
     )
diff --git a/ml-agents/mlagents/trainers/tests/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
index d21c506e51..60d05f96fd 100644
--- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py
+++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
@@ -43,7 +43,7 @@ def ppo_dummy_config():
 
 def create_bc_module(mock_brain, trainer_config, use_rnn, demo_file, tanhresample):
     # model_path = env.external_brain_names[0]
-    trainer_config["model_path"] = "testpath"
+    trainer_config["output_path"] = "testpath"
     trainer_config["keep_checkpoints"] = 3
     trainer_config["use_recurrent"] = use_rnn
     trainer_config["behavioral_cloning"]["demo_path"] = (
diff --git a/ml-agents/mlagents/trainers/tests/test_ghost.py b/ml-agents/mlagents/trainers/tests/test_ghost.py
index f740f35850..000be0da9a 100644
--- a/ml-agents/mlagents/trainers/tests/test_ghost.py
+++ b/ml-agents/mlagents/trainers/tests/test_ghost.py
@@ -38,8 +38,7 @@ def dummy_config():
         memory_size: 8
         curiosity_strength: 0.0
         curiosity_enc_size: 1
-        summary_path: test
-        model_path: test
+        output_path: test
         reward_signals:
           extrinsic:
             strength: 1.0
diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py
index bf8bcbcf60..885f26892e 100644
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py
@@ -42,8 +42,7 @@ def test_run_training(
             learn.run_training(0, basic_options())
             mock_init.assert_called_once_with(
                 trainer_factory_mock.return_value,
-                "./models/ppo",
-                "./summaries",
+                "./results/ppo",
                 "ppo",
                 50000,
                 None,
@@ -52,9 +51,7 @@ def test_run_training(
                 sampler_manager_mock.return_value,
                 None,
             )
-            handle_dir_mock.assert_called_once_with(
-                "./models/ppo", "./summaries", False, False, None
-            )
+            handle_dir_mock.assert_called_once_with("./results/ppo", False, False, None)
     StatsReporter.writers.clear()  # make sure there aren't any writers as added by learn.py
 
 
diff --git a/ml-agents/mlagents/trainers/tests/test_nn_policy.py b/ml-agents/mlagents/trainers/tests/test_nn_policy.py
index 1f669e3b45..7a918bb46c 100644
--- a/ml-agents/mlagents/trainers/tests/test_nn_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_nn_policy.py
@@ -39,8 +39,7 @@ def dummy_config():
         memory_size: 8
         curiosity_strength: 0.0
         curiosity_enc_size: 1
-        summary_path: test
-        model_path: test
+        output_path: test
         reward_signals:
           extrinsic:
             strength: 1.0
@@ -83,7 +82,7 @@ def test_load_save(dummy_config, tmp_path):
     path1 = os.path.join(tmp_path, "runid1")
     path2 = os.path.join(tmp_path, "runid2")
     trainer_params = dummy_config
-    trainer_params["model_path"] = path1
+    trainer_params["output_path"] = path1
     policy = create_policy_mock(trainer_params)
     policy.initialize_or_load()
     policy.save_model(2000)
diff --git a/ml-agents/mlagents/trainers/tests/test_policy.py b/ml-agents/mlagents/trainers/tests/test_policy.py
index c179a00bdd..497cc60db4 100644
--- a/ml-agents/mlagents/trainers/tests/test_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_policy.py
@@ -14,7 +14,7 @@ def basic_mock_brain():
 
 
 def basic_params():
-    return {"use_recurrent": False, "model_path": "my/path"}
+    return {"use_recurrent": False, "output_path": "my/path"}
 
 
 class FakePolicy(TFPolicy):
diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py
index 803fbd353b..162edb911b 100644
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
@@ -45,8 +45,7 @@ def dummy_config():
         memory_size: 10
         curiosity_strength: 0.0
         curiosity_enc_size: 1
-        summary_path: test
-        model_path: test
+        output_path: test
         reward_signals:
           extrinsic:
             strength: 1.0
diff --git a/ml-agents/mlagents/trainers/tests/test_reward_signals.py b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
index 048765e196..e6acba95df 100644
--- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py
+++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
@@ -105,7 +105,7 @@ def create_optimizer_mock(
     )
     trainer_parameters = trainer_config
     model_path = "testpath"
-    trainer_parameters["model_path"] = model_path
+    trainer_parameters["output_path"] = model_path
     trainer_parameters["keep_checkpoints"] = 3
     trainer_parameters["reward_signals"].update(reward_signal_config)
     trainer_parameters["use_recurrent"] = use_rnn
diff --git a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
index 5484976743..9ee983e331 100644
--- a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
+++ b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
@@ -10,7 +10,7 @@
 def dummy_config():
     return yaml.safe_load(
         """
-        summary_path: "test/"
+        output_path: "test/"
         summary_freq: 1000
         max_steps: 100
         reward_signals:
diff --git a/ml-agents/mlagents/trainers/tests/test_sac.py b/ml-agents/mlagents/trainers/tests/test_sac.py
index 2f35a8c0bd..614788ca73 100644
--- a/ml-agents/mlagents/trainers/tests/test_sac.py
+++ b/ml-agents/mlagents/trainers/tests/test_sac.py
@@ -65,7 +65,7 @@ def create_sac_optimizer_mock(dummy_config, use_rnn, use_discrete, use_visual):
 
     trainer_parameters = dummy_config
     model_path = "testmodel"
-    trainer_parameters["model_path"] = model_path
+    trainer_parameters["output_path"] = model_path
     trainer_parameters["keep_checkpoints"] = 3
     trainer_parameters["use_recurrent"] = use_rnn
     policy = NNPolicy(
@@ -127,8 +127,7 @@ def test_sac_save_load_buffer(tmpdir, dummy_config):
         discrete_action_space=DISCRETE_ACTION_SPACE,
     )
     trainer_params = dummy_config
-    trainer_params["summary_path"] = str(tmpdir)
-    trainer_params["model_path"] = str(tmpdir)
+    trainer_params["output_path"] = str(tmpdir)
     trainer_params["save_replay_buffer"] = True
     trainer = SACTrainer(mock_brain.brain_name, 1, trainer_params, True, False, 0, 0)
     policy = trainer.create_policy(mock_brain.brain_name, mock_brain)
@@ -155,8 +154,7 @@ def test_add_get_policy(sac_optimizer, dummy_config):
     mock_optimizer.reward_signals = {}
     sac_optimizer.return_value = mock_optimizer
 
-    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
-    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
+    dummy_config["output_path"] = "./results/test_trainer_models/TestModel"
     trainer = SACTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
     policy = mock.Mock(spec=NNPolicy)
     policy.get_current_step.return_value = 2000
@@ -178,8 +176,7 @@ def test_advance(dummy_config):
     brain_params = make_brain_parameters(
         discrete_action=False, visual_inputs=0, vec_obs_size=6
     )
-    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
-    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
+    dummy_config["output_path"] = "./results/test_trainer_models/TestModel"
     dummy_config["steps_per_update"] = 20
     trainer = SACTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
     policy = trainer.create_policy(brain_params.brain_name, brain_params)
diff --git a/ml-agents/mlagents/trainers/tests/test_simple_rl.py b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
index 61d658640d..a41f50c63c 100644
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
@@ -145,9 +145,8 @@ def _check_environment_trains(
             env_manager = SimpleEnvManager(env, FloatPropertiesChannel())
         trainer_factory = TrainerFactory(
             trainer_config=trainer_config,
-            summaries_dir=dir,
             run_id=run_id,
-            model_path=dir,
+            output_path=dir,
             keep_checkpoints=1,
             train_model=True,
             load_model=False,
@@ -158,8 +157,7 @@ def _check_environment_trains(
 
         tc = TrainerController(
             trainer_factory=trainer_factory,
-            summaries_dir=dir,
-            model_path=dir,
+            output_path=dir,
             run_id=run_id,
             meta_curriculum=meta_curriculum,
             train=True,
diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
index bb253d4860..ba202ec83f 100644
--- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
@@ -10,8 +10,7 @@
 def basic_trainer_controller():
     return TrainerController(
         trainer_factory=None,
-        model_path="test_model_path",
-        summaries_dir="test_summaries_dir",
+        output_path="test_model_path",
         run_id="test_run_id",
         save_freq=100,
         meta_curriculum=None,
@@ -28,8 +27,7 @@ def test_initialization_seed(numpy_random_seed, tensorflow_set_seed):
     seed = 27
     TrainerController(
         trainer_factory=None,
-        model_path="",
-        summaries_dir="",
+        output_path="",
         run_id="1",
         save_freq=1,
         meta_curriculum=None,
diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_util.py b/ml-agents/mlagents/trainers/tests/test_trainer_util.py
index 677fc8087e..48649ccfc9 100644
--- a/ml-agents/mlagents/trainers/tests/test_trainer_util.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_util.py
@@ -86,9 +86,8 @@ def dummy_bad_config():
 def test_initialize_trainer_parameters_override_defaults(
     BrainParametersMock, dummy_config_with_override
 ):
-    summaries_dir = "test_dir"
     run_id = "testrun"
-    model_path = "model_dir"
+    output_path = "model_dir"
     keep_checkpoints = 1
     train_model = True
     load_model = False
@@ -97,8 +96,7 @@ def test_initialize_trainer_parameters_override_defaults(
 
     base_config = dummy_config_with_override
     expected_config = base_config["default"]
-    expected_config["summary_path"] = f"{run_id}_testbrain"
-    expected_config["model_path"] = model_path + "/testbrain"
+    expected_config["output_path"] = output_path + "/testbrain"
     expected_config["keep_checkpoints"] = keep_checkpoints
 
     # Override value from specific brain config
@@ -122,9 +120,8 @@ def mock_constructor(
     with patch.object(PPOTrainer, "__init__", mock_constructor):
         trainer_factory = trainer_util.TrainerFactory(
             trainer_config=base_config,
-            summaries_dir=summaries_dir,
             run_id=run_id,
-            model_path=model_path,
+            output_path=output_path,
             keep_checkpoints=keep_checkpoints,
             train_model=train_model,
             load_model=load_model,
@@ -144,9 +141,8 @@ def test_initialize_ppo_trainer(BrainParametersMock, dummy_config):
     brain_params_mock = BrainParametersMock()
     BrainParametersMock.return_value.brain_name = "testbrain"
     external_brains = {"testbrain": BrainParametersMock()}
-    summaries_dir = "test_dir"
     run_id = "testrun"
-    model_path = "model_dir"
+    output_path = "results_dir"
     keep_checkpoints = 1
     train_model = True
     load_model = False
@@ -155,8 +151,7 @@ def test_initialize_ppo_trainer(BrainParametersMock, dummy_config):
 
     base_config = dummy_config
     expected_config = base_config["default"]
-    expected_config["summary_path"] = f"{run_id}_testbrain"
-    expected_config["model_path"] = model_path + "/testbrain"
+    expected_config["output_path"] = output_path + "/testbrain"
     expected_config["keep_checkpoints"] = keep_checkpoints
 
     def mock_constructor(
@@ -173,9 +168,8 @@ def mock_constructor(
     with patch.object(PPOTrainer, "__init__", mock_constructor):
         trainer_factory = trainer_util.TrainerFactory(
             trainer_config=base_config,
-            summaries_dir=summaries_dir,
             run_id=run_id,
-            model_path=model_path,
+            output_path=output_path,
             keep_checkpoints=keep_checkpoints,
             train_model=train_model,
             load_model=load_model,
@@ -192,9 +186,8 @@ def mock_constructor(
 def test_initialize_invalid_trainer_raises_exception(
     BrainParametersMock, dummy_bad_config
 ):
-    summaries_dir = "test_dir"
     run_id = "testrun"
-    model_path = "model_dir"
+    output_path = "results_dir"
     keep_checkpoints = 1
     train_model = True
     load_model = False
@@ -206,9 +199,8 @@ def test_initialize_invalid_trainer_raises_exception(
     with pytest.raises(TrainerConfigError):
         trainer_factory = trainer_util.TrainerFactory(
             trainer_config=bad_config,
-            summaries_dir=summaries_dir,
             run_id=run_id,
-            model_path=model_path,
+            output_path=output_path,
             keep_checkpoints=keep_checkpoints,
             train_model=train_model,
             load_model=load_model,
@@ -223,9 +215,8 @@ def test_initialize_invalid_trainer_raises_exception(
     with pytest.raises(TrainerConfigError):
         trainer_factory = trainer_util.TrainerFactory(
             trainer_config=bad_config,
-            summaries_dir=summaries_dir,
             run_id=run_id,
-            model_path=model_path,
+            output_path=output_path,
             keep_checkpoints=keep_checkpoints,
             train_model=train_model,
             load_model=load_model,
@@ -240,9 +231,8 @@ def test_initialize_invalid_trainer_raises_exception(
     with pytest.raises(UnityTrainerException):
         trainer_factory = trainer_util.TrainerFactory(
             trainer_config=bad_config,
-            summaries_dir=summaries_dir,
             run_id=run_id,
-            model_path=model_path,
+            output_path=output_path,
             keep_checkpoints=keep_checkpoints,
             train_model=train_model,
             load_model=load_model,
@@ -270,9 +260,8 @@ def test_handles_no_default_section(dummy_config):
 
     trainer_factory = trainer_util.TrainerFactory(
         trainer_config=no_default_config,
-        summaries_dir="test_dir",
         run_id="testrun",
-        model_path="model_dir",
+        output_path="output_path",
         keep_checkpoints=1,
         train_model=True,
         load_model=False,
@@ -299,9 +288,8 @@ def test_raise_if_no_config_for_brain(dummy_config):
 
     trainer_factory = trainer_util.TrainerFactory(
         trainer_config=bad_config,
-        summaries_dir="test_dir",
         run_id="testrun",
-        model_path="model_dir",
+        output_path="output_path",
         keep_checkpoints=1,
         train_model=True,
         load_model=False,
@@ -339,33 +327,27 @@ def test_load_config_invalid_yaml():
 
 
 def test_existing_directories(tmp_path):
-    model_path = os.path.join(tmp_path, "runid")
-    # Unused summary path
-    summary_path = os.path.join(tmp_path, "runid")
+    output_path = os.path.join(tmp_path, "runid")
     # Test fresh new unused path - should do nothing.
-    trainer_util.handle_existing_directories(model_path, summary_path, False, False)
+    trainer_util.handle_existing_directories(output_path, False, False)
     # Test resume with fresh path - should throw an exception.
     with pytest.raises(UnityTrainerException):
-        trainer_util.handle_existing_directories(model_path, summary_path, True, False)
+        trainer_util.handle_existing_directories(output_path, True, False)
 
     # make a directory
-    os.mkdir(model_path)
+    os.mkdir(output_path)
     # Test try to train w.o. force, should complain
     with pytest.raises(UnityTrainerException):
-        trainer_util.handle_existing_directories(model_path, summary_path, False, False)
+        trainer_util.handle_existing_directories(output_path, False, False)
     # Test try to train w/ resume - should work
-    trainer_util.handle_existing_directories(model_path, summary_path, True, False)
+    trainer_util.handle_existing_directories(output_path, True, False)
     # Test try to train w/ force - should work
-    trainer_util.handle_existing_directories(model_path, summary_path, False, True)
+    trainer_util.handle_existing_directories(output_path, False, True)
 
     # Test initialize option
     init_path = os.path.join(tmp_path, "runid2")
     with pytest.raises(UnityTrainerException):
-        trainer_util.handle_existing_directories(
-            model_path, summary_path, False, True, init_path
-        )
+        trainer_util.handle_existing_directories(output_path, False, True, init_path)
     os.mkdir(init_path)
     # Should pass since the directory exists now.
-    trainer_util.handle_existing_directories(
-        model_path, summary_path, False, True, init_path
-    )
+    trainer_util.handle_existing_directories(output_path, False, True, init_path)

From 187a28492393cc3cd4c1e8ae0776d21cc92656f9 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 22 Apr 2020 18:18:27 -0700
Subject: [PATCH 03/19] Fix more tests

---
 ml-agents/mlagents/trainers/tests/test_ghost.py     | 6 ++----
 ml-agents/mlagents/trainers/tests/test_nn_policy.py | 3 +--
 ml-agents/mlagents/trainers/tests/test_ppo.py       | 6 ++----
 ml-agents/mlagents/trainers/tests/test_sac.py       | 3 +--
 4 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/ml-agents/mlagents/trainers/tests/test_ghost.py b/ml-agents/mlagents/trainers/tests/test_ghost.py
index 000be0da9a..433988467e 100644
--- a/ml-agents/mlagents/trainers/tests/test_ghost.py
+++ b/ml-agents/mlagents/trainers/tests/test_ghost.py
@@ -116,8 +116,7 @@ def test_process_trajectory(dummy_config):
         vector_action_descriptions=[],
         vector_action_space_type=0,
     )
-    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
-    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
+    dummy_config["output_path"] = "./results/test_trainer_models/TestModel"
     ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0")
     controller = GhostController(100)
     trainer = GhostTrainer(
@@ -189,8 +188,7 @@ def test_publish_queue(dummy_config):
         vector_action_descriptions=[],
         vector_action_space_type=0,
     )
-    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
-    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
+    dummy_config["output_path"] = "./results/test_trainer_models/TestModel"
     ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, "0")
     controller = GhostController(100)
     trainer = GhostTrainer(
diff --git a/ml-agents/mlagents/trainers/tests/test_nn_policy.py b/ml-agents/mlagents/trainers/tests/test_nn_policy.py
index 7a918bb46c..17b15231bd 100644
--- a/ml-agents/mlagents/trainers/tests/test_nn_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_nn_policy.py
@@ -143,8 +143,7 @@ def test_normalization(dummy_config):
         vector_action_descriptions=[],
         vector_action_space_type=0,
     )
-    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
-    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
+    dummy_config["output_path"] = "./results/test_trainer_models/TestModel"
 
     time_horizon = 6
     trajectory = make_fake_trajectory(
diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py
index 162edb911b..6c546c26c5 100644
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
@@ -310,8 +310,7 @@ def test_process_trajectory(dummy_config):
         vector_action_descriptions=[],
         vector_action_space_type=0,
     )
-    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
-    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
+    dummy_config["output_path"] = "./results/test_trainer_models/TestModel"
     trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
     policy = trainer.create_policy(brain_params.brain_name, brain_params)
     trainer.add_policy(brain_params.brain_name, policy)
@@ -369,8 +368,7 @@ def test_add_get_policy(ppo_optimizer, dummy_config):
     mock_optimizer.reward_signals = {}
     ppo_optimizer.return_value = mock_optimizer
 
-    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
-    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
+    dummy_config["output_path"] = "./results/test_trainer_models/TestModel"
     trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
     policy = mock.Mock(spec=NNPolicy)
     policy.get_current_step.return_value = 2000
diff --git a/ml-agents/mlagents/trainers/tests/test_sac.py b/ml-agents/mlagents/trainers/tests/test_sac.py
index 614788ca73..66756f8640 100644
--- a/ml-agents/mlagents/trainers/tests/test_sac.py
+++ b/ml-agents/mlagents/trainers/tests/test_sac.py
@@ -255,8 +255,7 @@ def test_bad_config(dummy_config):
     dummy_config["sequence_length"] = 64
     dummy_config["batch_size"] = 32
     dummy_config["use_recurrent"] = True
-    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
-    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
+    dummy_config["output_path"] = "./results/test_trainer_models/TestModel"
     with pytest.raises(UnityTrainerException):
         _ = SACTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
 

From 1a7bb81cc3df500827519557ca45e4253337a7af Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 22 Apr 2020 18:18:44 -0700
Subject: [PATCH 04/19] Move location of timers json as well

---
 ml-agents/mlagents/trainers/learn.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
index f0989d2ff4..59695426f8 100644
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
@@ -320,11 +320,13 @@ def run_training(run_seed: int, options: RunOptions) -> None:
     :param run_options: Command line arguments for training.
     """
     with hierarchical_timer("run_training.setup"):
-        write_path = f"./results/{options.run_id}"
+        base_path = "results"
+        write_path = os.path.join(base_path, options.run_id)
         maybe_init_path = (
-            f"./results/{options.initialize_from}" if options.initialize_from else None
+            os.path.join(base_path, options.run_id) if options.initialize_from else None
         )
-        summaries_dir = "./summaries"
+        run_logs_dir = os.path.join(write_path, "run_logs")
+        os.makedirs(run_logs_dir, exist_ok=True)
         port = options.base_port
 
         # Configure CSV, Tensorboard Writers and StatsReporter
@@ -396,7 +398,7 @@ def run_training(run_seed: int, options: RunOptions) -> None:
         tc.start_learning(env_manager)
     finally:
         env_manager.close()
-        write_timing_tree(summaries_dir, options.run_id)
+        write_timing_tree(run_logs_dir, options.run_id)
 
 
 def write_timing_tree(summaries_dir: str, run_id: str) -> None:

From 8a719979b3370d72e31dc5a332a622d5ac0228b9 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 22 Apr 2020 18:23:09 -0700
Subject: [PATCH 05/19] Write timers to timer.json

---
 ml-agents/mlagents/trainers/learn.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
index 59695426f8..2885d9cd43 100644
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
@@ -398,11 +398,11 @@ def run_training(run_seed: int, options: RunOptions) -> None:
         tc.start_learning(env_manager)
     finally:
         env_manager.close()
-        write_timing_tree(run_logs_dir, options.run_id)
+        write_timing_tree(run_logs_dir)
 
 
-def write_timing_tree(summaries_dir: str, run_id: str) -> None:
-    timing_path = f"{summaries_dir}/{run_id}_timers.json"
+def write_timing_tree(summaries_dir: str) -> None:
+    timing_path = f"{summaries_dir}/timers.json"
     try:
         with open(timing_path, "w") as f:
             json.dump(get_timer_tree(), f, indent=4)

From e0998946420feb9c060c99086d9086dbfc2e7714 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 22 Apr 2020 18:55:04 -0700
Subject: [PATCH 06/19] Write out run_options

---
 ml-agents/mlagents/trainers/learn.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
index 2885d9cd43..59b8fcef04 100644
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
@@ -1,5 +1,6 @@
 # # Unity ML-Agents Toolkit
 import argparse
+import yaml
 
 import os
 import numpy as np
@@ -398,11 +399,23 @@ def run_training(run_seed: int, options: RunOptions) -> None:
         tc.start_learning(env_manager)
     finally:
         env_manager.close()
+        write_run_options(base_path, options)
         write_timing_tree(run_logs_dir)
 
 
-def write_timing_tree(summaries_dir: str) -> None:
-    timing_path = f"{summaries_dir}/timers.json"
+def write_run_options(output_dir: str, run_options: RunOptions) -> None:
+    run_options_path = os.path.join(output_dir, "configuration.yaml")
+    try:
+        with open(run_options_path, "w") as f:
+            yaml.dump(dict(run_options._asdict()), f, sort_keys=False)
+    except FileNotFoundError:
+        logger.warning(
+            f"Unable to save configuration to {run_options_path}. Make sure the directory exists"
+        )
+
+
+def write_timing_tree(output_dir: str) -> None:
+    timing_path = f"{output_dir}/timers.json"
     try:
         with open(timing_path, "w") as f:
             json.dump(get_timer_tree(), f, indent=4)

From 3de64da60a95040028e75cfd1ecaa7500870440d Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 22 Apr 2020 20:30:02 -0700
Subject: [PATCH 07/19] Upgrade YAML library fix incorrect configuration.yaml
 location Improve tests

---
 ml-agents/mlagents/trainers/learn.py            |  2 +-
 ml-agents/mlagents/trainers/tests/test_learn.py | 13 ++++++++++---
 ml-agents/setup.py                              |  2 +-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
index 59b8fcef04..e1c237ccbf 100644
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
@@ -399,7 +399,7 @@ def run_training(run_seed: int, options: RunOptions) -> None:
         tc.start_learning(env_manager)
     finally:
         env_manager.close()
-        write_run_options(base_path, options)
+        write_run_options(write_path, options)
         write_timing_tree(run_logs_dir)
 
 
diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py
index 885f26892e..0eda10936a 100644
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py
@@ -15,6 +15,8 @@ def basic_options(extra_args=None):
     return parse_command_line(args)
 
 
+@patch("mlagents.trainers.learn.write_timing_tree")
+@patch("mlagents.trainers.learn.write_run_options")
 @patch("mlagents.trainers.learn.handle_existing_directories")
 @patch("mlagents.trainers.learn.TrainerFactory")
 @patch("mlagents.trainers.learn.SamplerManager")
@@ -28,6 +30,8 @@ def test_run_training(
     sampler_manager_mock,
     trainer_factory_mock,
     handle_dir_mock,
+    write_run_options_mock,
+    write_timing_tree_mock,
 ):
     mock_env = MagicMock()
     mock_env.external_brain_names = []
@@ -39,10 +43,11 @@ def test_run_training(
     mock_init = MagicMock(return_value=None)
     with patch.object(TrainerController, "__init__", mock_init):
         with patch.object(TrainerController, "start_learning", MagicMock()):
-            learn.run_training(0, basic_options())
+            options = basic_options()
+            learn.run_training(0, options)
             mock_init.assert_called_once_with(
                 trainer_factory_mock.return_value,
-                "./results/ppo",
+                "results/ppo",
                 "ppo",
                 50000,
                 None,
@@ -51,7 +56,9 @@ def test_run_training(
                 sampler_manager_mock.return_value,
                 None,
             )
-            handle_dir_mock.assert_called_once_with("./results/ppo", False, False, None)
+            handle_dir_mock.assert_called_once_with("results/ppo", False, False, None)
+            write_timing_tree_mock.assert_called_once_with("results/ppo/run_logs")
+            write_run_options_mock.assert_called_once_with("results/ppo", options)
     StatsReporter.writers.clear()  # make sure there aren't any writers as added by learn.py
 
 
diff --git a/ml-agents/setup.py b/ml-agents/setup.py
index 2882b3cafe..db5a12c2f0 100644
--- a/ml-agents/setup.py
+++ b/ml-agents/setup.py
@@ -60,7 +60,7 @@ def run(self):
         "numpy>=1.13.3,<2.0",
         "Pillow>=4.2.1",
         "protobuf>=3.6",
-        "pyyaml",
+        "pyyaml>=5.1",
         "tensorflow>=1.7,<2.1",
         'pypiwin32==223;platform_system=="Windows"',
     ],

From 44aa3b58e77d06d57ea6855f6c548d3e05b5f67b Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 22 Apr 2020 20:35:03 -0700
Subject: [PATCH 08/19] Update docs

---
 docs/Getting-Started.md                 | 5 ++---
 docs/Learning-Environment-Executable.md | 5 ++---
 docs/Training-ML-Agents.md              | 7 ++++---
 docs/Training-PPO.md                    | 2 +-
 docs/Training-SAC.md                    | 2 +-
 5 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/docs/Getting-Started.md b/docs/Getting-Started.md
index 5c09a919d4..d1a9730026 100644
--- a/docs/Getting-Started.md
+++ b/docs/Getting-Started.md
@@ -179,12 +179,11 @@ INFO:mlagents_envs:Hyperparameters for the PPO Trainer of brain 3DBallLearning:
         sequence_length:     64
         summary_freq:        1000
         use_recurrent:       False
-        summary_path:        ./summaries/first3DBallRun
         memory_size:         256
         use_curiosity:       False
         curiosity_strength:  0.01
         curiosity_enc_size:  128
-        model_path: ./models/first3DBallRun/3DBallLearning
+        output_path: ./results/first3DBallRun/3DBallLearning
 INFO:mlagents.trainers: first3DBallRun: 3DBallLearning: Step: 1000. Mean Reward: 1.242. Std of Reward: 0.746. Training.
 INFO:mlagents.trainers: first3DBallRun: 3DBallLearning: Step: 2000. Mean Reward: 1.319. Std of Reward: 0.693. Training.
 INFO:mlagents.trainers: first3DBallRun: 3DBallLearning: Step: 3000. Mean Reward: 1.804. Std of Reward: 1.056. Training.
@@ -236,7 +235,7 @@ the same command again, appending the `--resume` flag:
 mlagents-learn config/trainer_config.yaml --run-id=first3DBallRun --resume
 ```
 
-Your trained model will be at `models/<run-identifier>/<behavior_name>.nn` where
+Your trained model will be at `results/<run-identifier>/<behavior_name>.nn` where
 `<behavior_name>` is the name of the `Behavior Name` of the agents corresponding
 to the model. This file corresponds to your model's latest checkpoint. You can
 now embed this trained model into your Agents by following the steps below,
diff --git a/docs/Learning-Environment-Executable.md b/docs/Learning-Environment-Executable.md
index b78110bcfe..26c7fa6bce 100644
--- a/docs/Learning-Environment-Executable.md
+++ b/docs/Learning-Environment-Executable.md
@@ -152,12 +152,11 @@ INFO:mlagents_envs:Hyperparameters for the PPO Trainer of brain Ball3DLearning:
         sequence_length:     64
         summary_freq:        1000
         use_recurrent:       False
-        summary_path:        ./summaries/first-run-0
         memory_size:         256
         use_curiosity:       False
         curiosity_strength:  0.01
         curiosity_enc_size:  128
-        model_path: ./models/first-run-0/Ball3DLearning
+        output_path: ./results/first-run-0/Ball3DLearning
 INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 1000. Mean Reward: 1.242. Std of Reward: 0.746. Training.
 INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 2000. Mean Reward: 1.319. Std of Reward: 0.693. Training.
 INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 3000. Mean Reward: 1.804. Std of Reward: 1.056. Training.
@@ -171,7 +170,7 @@ INFO:mlagents.trainers: first-run-0: Ball3DLearning: Step: 10000. Mean Reward: 2
 ```
 
 You can press Ctrl+C to stop the training, and your trained model will be at
-`models/<run-identifier>/<behavior_name>.nn`, which corresponds to your model's
+`results/<run-identifier>/<behavior_name>.nn`, which corresponds to your model's
 latest checkpoint. (**Note:** There is a known bug on Windows that causes the
 saving of the model to fail when you early terminate the training, it's
 recommended to wait until Step has reached the max_steps parameter you set in
diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md
index cf34a60d25..46e6934734 100644
--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md
@@ -64,16 +64,17 @@ for a sample execution of the `mlagents-learn` command.
 Regardless of which training methods, configurations or hyperparameters you
 provide, the training process will always generate three artifacts:
 
-1. Summaries (under the `summaries/` folder): these are training metrics that
+1. Summaries (under the `results/<run-identifier>/<behavior-name>` folder):
+   these are training metrics that
    are updated throughout the training process. They are helpful to monitor your
    training performance and may help inform how to update your hyperparameter
    values. See [Using TensorBoard](Using-Tensorboard.md) for more details on how
    to visualize the training metrics.
-1. Models (under the `models/` folder): these contain the model checkpoints that
+1. Models (under the `results/<run-identifier>/` folder): these contain the model checkpoints that
    are updated throughout training and the final model file (`.nn`). This final
    model file is generated once either when training completes or is
    interrupted.
-1. Timers file (also under the `summaries/` folder): this contains aggregated
+1. Timers file (also under the `results/<run-identifier>` folder): this contains aggregated
    metrics on your training process, including time spent on specific code
    blocks. See [Profiling in Python](Profiling-Python.md) for more information
    on the timers generated.
diff --git a/docs/Training-PPO.md b/docs/Training-PPO.md
index 8f328bc4b6..70a6bf2b9b 100644
--- a/docs/Training-PPO.md
+++ b/docs/Training-PPO.md
@@ -294,7 +294,7 @@ Typical Range: Approximately equal to PPO's `buffer_size`
 `init_path` can be specified to initialize your model from a previous run before starting.
 Note that the prior run should have used the same trainer configurations as the current run,
 and have been saved with the same version of ML-Agents. You should provide the full path
-to the folder where the checkpoints were saved, e.g. `./models/{run-id}/{behavior_name}`.
+to the folder where the checkpoints were saved, e.g. `./results/{run-id}/{behavior_name}`.
 
 This option is provided in case you want to initialize different behaviors from different runs;
 in most cases, it is sufficient to use the `--initialize-from` CLI parameter to initialize
diff --git a/docs/Training-SAC.md b/docs/Training-SAC.md
index 54d1e921fe..d001504afe 100644
--- a/docs/Training-SAC.md
+++ b/docs/Training-SAC.md
@@ -295,7 +295,7 @@ Typical Range (Discrete): `32` - `512`
 `init_path` can be specified to initialize your model from a previous run before starting.
 Note that the prior run should have used the same trainer configurations as the current run,
 and have been saved with the same version of ML-Agents. You should provide the full path
-to the folder where the checkpoints were saved, e.g. `./models/{run-id}/{behavior_name}`.
+to the folder where the checkpoints were saved, e.g. `./results/{run-id}/{behavior_name}`.
 
 This option is provided in case you want to initialize different behaviors from different runs;
 in most cases, it is sufficient to use the `--initialize-from` CLI parameter to initialize

From d5836be1ee98d8b990178892aa339586da4342f7 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 22 Apr 2020 20:35:16 -0700
Subject: [PATCH 09/19] Update Yamato

---
 ml-agents/tests/yamato/training_int_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py
index 5900e83920..e67a5efb4d 100644
--- a/ml-agents/tests/yamato/training_int_tests.py
+++ b/ml-agents/tests/yamato/training_int_tests.py
@@ -21,7 +21,7 @@ def run_training(python_version, csharp_version):
     print(
         f"Running training with python={python_version or latest} and c#={csharp_version or latest}"
     )
-    nn_file_expected = f"./models/{run_id}/3DBall.nn"
+    nn_file_expected = f"./results/{run_id}/3DBall.nn"
     if os.path.exists(nn_file_expected):
         # Should never happen - make sure nothing leftover from an old test.
         print("Artifacts from previous build found!")

From c20e603306aab316f139700ebbc059895471023e Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 22 Apr 2020 20:35:32 -0700
Subject: [PATCH 10/19] Update .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index f5a9a71633..e0ff60ec85 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 # Tensorflow Model Info
 /models
 /summaries
+/results
 
 # Training environments
 /envs

From 6ab3ac84089e73c62383fbd357dc77186663fe11 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 22 Apr 2020 20:36:08 -0700
Subject: [PATCH 11/19] Add comments to gitignore

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index e0ff60ec85..b3aab2b943 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
-# Tensorflow Model Info
+# Output Artifacts (Legacy)
 /models
 /summaries
+# Output Artifacts
 /results
 
 # Training environments

From e2ee19848df819a9f9e7ee20cfb2d8aba4375221 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 22 Apr 2020 20:37:53 -0700
Subject: [PATCH 12/19] Update tensorboard docs

---
 docs/Using-Tensorboard.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/Using-Tensorboard.md b/docs/Using-Tensorboard.md
index dd424c20a9..d6297508b5 100644
--- a/docs/Using-Tensorboard.md
+++ b/docs/Using-Tensorboard.md
@@ -12,7 +12,7 @@ start TensorBoard:
 
 1. Open a terminal or console window:
 1. Navigate to the directory where the ML-Agents Toolkit is installed.
-1. From the command line run: `tensorboard --logdir=summaries --port=6006`
+1. From the command line run: `tensorboard --logdir=results --port=6006`
 1. Open a browser window and navigate to
    [localhost:6006](http://localhost:6006).
 

From c6b019db00991b44d0041c93ad1119c10f536ee5 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 22 Apr 2020 20:39:53 -0700
Subject: [PATCH 13/19] Update changelog and migrating

---
 com.unity.ml-agents/CHANGELOG.md | 2 ++
 docs/Migrating.md                | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
index 189d0f042b..9588417c1f 100755
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
@@ -44,6 +44,8 @@ and this project adheres to
   `UnityToGymWrapper` and no longer creates the `UnityEnvironment`.
   Instead, the `UnityEnvironment` must be passed as input to the
   constructor of `UnityToGymWrapper`
+- Training artifacts (trained models, summaries) are now found in the `results/`
+  directory. (#3829)
 
 ### Minor Changes
 
diff --git a/docs/Migrating.md b/docs/Migrating.md
index ae659feaf3..29eeb01ccf 100644
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
@@ -38,6 +38,8 @@ double-check that the versions are in the same. The versions can be found in
   `UnityToGymWrapper` and no longer creates the `UnityEnvironment`. Instead,
   the `UnityEnvironment` must be passed as input to the
   constructor of `UnityToGymWrapper`
+- Training artifacts (trained models, summaries) are now found under `results/`
+  instead of `summaries/` and `models/`.
 
 ### Steps to Migrate
 

From bf843cce200665f9e74c154a6a5f7970195471e7 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 22 Apr 2020 21:18:29 -0700
Subject: [PATCH 14/19] Use try-catch instead of requiring YAML 5.1

---
 ml-agents/mlagents/trainers/learn.py | 5 ++++-
 ml-agents/setup.py                   | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
index e1c237ccbf..3bc4846d3e 100644
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
@@ -407,7 +407,10 @@ def write_run_options(output_dir: str, run_options: RunOptions) -> None:
     run_options_path = os.path.join(output_dir, "configuration.yaml")
     try:
         with open(run_options_path, "w") as f:
-            yaml.dump(dict(run_options._asdict()), f, sort_keys=False)
+            try:
+                yaml.dump(dict(run_options._asdict()), f, sort_keys=False)
+            except TypeError:  # Older versions of pyyaml don't support sort_keys
+                yaml.dump(dict(run_options._asdict()), f)
     except FileNotFoundError:
         logger.warning(
             f"Unable to save configuration to {run_options_path}. Make sure the directory exists"
diff --git a/ml-agents/setup.py b/ml-agents/setup.py
index db5a12c2f0..2882b3cafe 100644
--- a/ml-agents/setup.py
+++ b/ml-agents/setup.py
@@ -60,7 +60,7 @@ def run(self):
         "numpy>=1.13.3,<2.0",
         "Pillow>=4.2.1",
         "protobuf>=3.6",
-        "pyyaml>=5.1",
+        "pyyaml",
         "tensorflow>=1.7,<2.1",
         'pypiwin32==223;platform_system=="Windows"',
     ],

From 0b7146e6d2642b92d261bbae1619ffda5a7bbe24 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 22 Apr 2020 21:25:56 -0700
Subject: [PATCH 15/19] Fix issue with output directory check

---
 ml-agents/mlagents/trainers/learn.py        | 11 ++++++-----
 ml-agents/mlagents/trainers/trainer_util.py |  6 +++---
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
index 3bc4846d3e..f8519d585b 100644
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
@@ -327,9 +327,13 @@ def run_training(run_seed: int, options: RunOptions) -> None:
             os.path.join(base_path, options.run_id) if options.initialize_from else None
         )
         run_logs_dir = os.path.join(write_path, "run_logs")
-        os.makedirs(run_logs_dir, exist_ok=True)
         port = options.base_port
-
+        # Check if directory exists
+        handle_existing_directories(
+            write_path, options.resume, options.force, maybe_init_path
+        )
+        # Make run logs directory
+        os.makedirs(run_logs_dir, exist_ok=True)
         # Configure CSV, Tensorboard Writers and StatsReporter
         # We assume reward and episode length are needed in the CSV.
         csv_writer = CSVWriter(
@@ -339,9 +343,6 @@ def run_training(run_seed: int, options: RunOptions) -> None:
                 "Environment/Episode Length",
             ],
         )
-        handle_existing_directories(
-            write_path, options.resume, options.force, maybe_init_path
-        )
         tb_writer = TensorboardWriter(write_path, clear_past_data=not options.resume)
         gauge_write = GaugeWriter()
         console_writer = ConsoleWriter()
diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py
index 7f1aebbe7d..8c864919ff 100644
--- a/ml-agents/mlagents/trainers/trainer_util.py
+++ b/ml-agents/mlagents/trainers/trainer_util.py
@@ -203,7 +203,7 @@ def _load_config(fp: TextIO) -> Dict[str, Any]:
 
 
 def handle_existing_directories(
-    model_path: str, resume: bool, force: bool, init_path: str = None
+    output_path: str, resume: bool, force: bool, init_path: str = None
 ) -> None:
     """
     Validates that if the run_id model exists, we do not overwrite it unless --force is specified.
@@ -215,9 +215,9 @@ def handle_existing_directories(
     :param force: Whether or not the --force flag was passed.
     """
 
-    model_path_exists = os.path.isdir(model_path)
+    output_path_exists = os.path.isdir(output_path)
 
-    if model_path_exists:
+    if output_path_exists:
         if not resume and not force:
             raise UnityTrainerException(
                 "Previous data from this run ID was found. "

From 48ee399a2b8d5b949e6ce3b20c8ca985f33f43f8 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Mon, 27 Apr 2020 18:37:39 -0700
Subject: [PATCH 16/19] Clean up some pathing

---
 ml-agents/mlagents/trainers/learn.py              | 2 +-
 ml-agents/mlagents/trainers/policy/tf_policy.py   | 3 ++-
 ml-agents/mlagents/trainers/trainer_controller.py | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
index f8519d585b..75b6a57e02 100644
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
@@ -419,7 +419,7 @@ def write_run_options(output_dir: str, run_options: RunOptions) -> None:
 
 
 def write_timing_tree(output_dir: str) -> None:
-    timing_path = f"{output_dir}/timers.json"
+    timing_path = os.path.join(output_dir, "timers.json")
     try:
         with open(timing_path, "w") as f:
             json.dump(get_timer_tree(), f, indent=4)
diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py
index 0f242140b5..f24f7acb25 100644
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
@@ -1,5 +1,6 @@
 from typing import Any, Dict, List, Optional
 import abc
+import os
 import numpy as np
 from mlagents.tf_utils import tf
 from mlagents import tf_utils
@@ -366,7 +367,7 @@ def save_model(self, steps):
         :return:
         """
         with self.graph.as_default():
-            last_checkpoint = self.model_path + "/model-" + str(steps) + ".ckpt"
+            last_checkpoint = os.path.join(self.model_path, f"model-{steps}.ckpt")
             self.saver.save(self.sess, last_checkpoint)
             tf.train.write_graph(
                 self.graph, self.model_path, "raw_graph_def.pb", as_text=False
diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py
index 5780e16cc1..b71f265659 100644
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
@@ -123,10 +123,10 @@ def _create_output_path(output_path):
                 os.makedirs(output_path)
         except Exception:
             raise UnityEnvironmentException(
-                "The folder {} containing the "
+                f"The folder {output_path} containing the "
                 "generated model could not be "
                 "accessed. Please make sure the "
-                "permissions are set correctly.".format(output_path)
+                "permissions are set correctly."
             )
 
     @timed

From 2fc8b063ef85b24119b4c2878b236e4e153e8dff Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Mon, 27 Apr 2020 18:38:00 -0700
Subject: [PATCH 17/19] Clean up comment

---
 ml-agents/mlagents/trainers/trainer_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py
index 8c864919ff..0657a9fd9b 100644
--- a/ml-agents/mlagents/trainers/trainer_util.py
+++ b/ml-agents/mlagents/trainers/trainer_util.py
@@ -80,7 +80,7 @@ def initialize_trainer(
     :param trainer_config: Original trainer configuration loaded from YAML
     :param brain_name: Name of the brain to be associated with trainer
     :param run_id: Run ID to associate with this training run
-    :param output_path: Path to save the model and save summary statistics
+    :param output_path: Path to save the model and summary statistics
     :param keep_checkpoints: How many model checkpoints to keep
     :param train_model: Whether to train the model (vs. run inference)
     :param load_model: Whether to load the model or randomly initialize

From 7a8f8fc384ced62577300cd50e740db02b0aa43e Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Mon, 27 Apr 2020 18:39:09 -0700
Subject: [PATCH 18/19] Clean up pathing in trainer_util

---
 ml-agents/mlagents/trainers/trainer_util.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py
index 0657a9fd9b..11ca04b797 100644
--- a/ml-agents/mlagents/trainers/trainer_util.py
+++ b/ml-agents/mlagents/trainers/trainer_util.py
@@ -97,13 +97,9 @@ def initialize_trainer(
         )
 
     trainer_parameters = trainer_config.get("default", {}).copy()
-    trainer_parameters["output_path"] = "{basedir}/{name}".format(
-        basedir=output_path, name=brain_name
-    )
+    trainer_parameters["output_path"] = os.path.join(output_path, brain_name)
     if init_path is not None:
-        trainer_parameters["init_path"] = "{basedir}/{name}".format(
-            basedir=init_path, name=brain_name
-        )
+        trainer_parameters["init_path"] = os.path.join(init_path, brain_name)
     trainer_parameters["keep_checkpoints"] = keep_checkpoints
     if brain_name in trainer_config:
         _brain_key: Any = brain_name

From f73f0d336ac61551f66297ed12359a6eb1b8598c Mon Sep 17 00:00:00 2001
From: Jonathan Harper <jharper+moar@unity3d.com>
Date: Mon, 27 Apr 2020 18:50:02 -0700
Subject: [PATCH 19/19] Add Player.log to results folder (#3877)

Since the default Player.log path would be overwritten on subsequent
runs, we should keep the Unity Player logs in the results folder
for a training run.  This change uses the -logFile CLI option to the
Unity Player to set the path.
---
 com.unity.ml-agents/CHANGELOG.md              |  1 +
 ml-agents-envs/mlagents_envs/environment.py   | 30 ++++++++++++++-----
 .../mlagents_envs/tests/test_envs.py          | 12 ++++++++
 ml-agents/mlagents/trainers/learn.py          | 11 +++++--
 .../mlagents/trainers/tests/test_learn.py     |  1 +
 ml-agents/tests/yamato/scripts/run_llapi.py   | 17 ++++++++---
 6 files changed, 58 insertions(+), 14 deletions(-)

diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
index 9588417c1f..1112a3cc3c 100755
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
@@ -76,6 +76,7 @@ and this project adheres to
   added in `CollectObservations()`. (#3825)
 - Model updates can now happen asynchronously with environment steps for better performance. (#3690)
 - `num_updates` and `train_interval` for SAC were replaced with `steps_per_update`. (#3690)
+- Unity Player logs are now written out to the results directory. (#3877)
 
 ### Bug Fixes
 
diff --git a/ml-agents-envs/mlagents_envs/environment.py b/ml-agents-envs/mlagents_envs/environment.py
index 178ebe1eb6..d5ba9a3bd6 100644
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
@@ -120,8 +120,9 @@ def __init__(
         seed: int = 0,
         no_graphics: bool = False,
         timeout_wait: int = 60,
-        args: Optional[List[str]] = None,
+        additional_args: Optional[List[str]] = None,
         side_channels: Optional[List[SideChannel]] = None,
+        log_folder: Optional[str] = None,
     ):
         """
         Starts a new unity environment and establishes a connection with the environment.
@@ -136,9 +137,11 @@ def __init__(
         :int timeout_wait: Time (in seconds) to wait for connection from environment.
         :list args: Addition Unity command line arguments
         :list side_channels: Additional side channel for no-rl communication with Unity
+        :str log_folder: Optional folder to write the Unity Player log file into.  Requires absolute path.
         """
-        args = args or []
         atexit.register(self._close)
+        self.additional_args = additional_args or []
+        self.no_graphics = no_graphics
         # If base port is not specified, use BASE_ENVIRONMENT_PORT if we have
         # an environment, otherwise DEFAULT_EDITOR_PORT
         if base_port is None:
@@ -164,6 +167,7 @@ def __init__(
                         )
                     )
                 self.side_channels[_sc.channel_id] = _sc
+        self.log_folder = log_folder
 
         # If the environment name is None, a new environment will not be launched
         # and the communicator will directly try to connect to an existing unity environment.
@@ -174,7 +178,7 @@ def __init__(
                 "the worker-id must be 0 in order to connect with the Editor."
             )
         if file_name is not None:
-            self.executable_launcher(file_name, no_graphics, args)
+            self.executable_launcher(file_name, no_graphics, additional_args)
         else:
             logger.info(
                 f"Listening on port {self.port}. "
@@ -268,6 +272,20 @@ def validate_environment_path(env_path: str) -> Optional[str]:
                 launch_string = candidates[0]
         return launch_string
 
+    def executable_args(self) -> List[str]:
+        args: List[str] = []
+        if self.no_graphics:
+            args += ["-nographics", "-batchmode"]
+        args += [UnityEnvironment.PORT_COMMAND_LINE_ARG, str(self.port)]
+        if self.log_folder:
+            log_file_path = os.path.join(
+                self.log_folder, f"Player-{self.worker_id}.log"
+            )
+            args += ["-logFile", log_file_path]
+        # Add in arguments passed explicitly by the user.
+        args += self.additional_args
+        return args
+
     def executable_launcher(self, file_name, no_graphics, args):
         launch_string = self.validate_environment_path(file_name)
         if launch_string is None:
@@ -278,11 +296,7 @@ def executable_launcher(self, file_name, no_graphics, args):
         else:
             logger.debug("This is the launch string {}".format(launch_string))
             # Launch Unity environment
-            subprocess_args = [launch_string]
-            if no_graphics:
-                subprocess_args += ["-nographics", "-batchmode"]
-            subprocess_args += [UnityEnvironment.PORT_COMMAND_LINE_ARG, str(self.port)]
-            subprocess_args += args
+            subprocess_args = [launch_string] + self.executable_args()
             try:
                 self.proc1 = subprocess.Popen(
                     subprocess_args,
diff --git a/ml-agents-envs/mlagents_envs/tests/test_envs.py b/ml-agents-envs/mlagents_envs/tests/test_envs.py
index 1f1fc5dd99..2d0faef9a1 100755
--- a/ml-agents-envs/mlagents_envs/tests/test_envs.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_envs.py
@@ -49,6 +49,18 @@ def test_port_defaults(
     assert expected == env.port
 
 
+@mock.patch("mlagents_envs.environment.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents_envs.environment.UnityEnvironment.get_communicator")
+def test_log_file_path_is_set(mock_communicator, mock_launcher):
+    mock_communicator.return_value = MockCommunicator()
+    env = UnityEnvironment(
+        file_name="myfile", worker_id=0, log_folder="./some-log-folder-path"
+    )
+    args = env.executable_args()
+    log_file_index = args.index("-logFile")
+    assert args[log_file_index + 1] == "./some-log-folder-path/Player-0.log"
+
+
 @mock.patch("mlagents_envs.environment.UnityEnvironment.executable_launcher")
 @mock.patch("mlagents_envs.environment.UnityEnvironment.get_communicator")
 def test_reset(mock_communicator, mock_launcher):
diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
index 75b6a57e02..1844f0b234 100644
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
@@ -354,7 +354,12 @@ def run_training(run_seed: int, options: RunOptions) -> None:
         if options.env_path is None:
             port = UnityEnvironment.DEFAULT_EDITOR_PORT
         env_factory = create_environment_factory(
-            options.env_path, options.no_graphics, run_seed, port, options.env_args
+            options.env_path,
+            options.no_graphics,
+            run_seed,
+            port,
+            options.env_args,
+            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
         )
         engine_config = EngineConfig(
             options.width,
@@ -470,6 +475,7 @@ def create_environment_factory(
     seed: int,
     start_port: int,
     env_args: Optional[List[str]],
+    log_folder: str,
 ) -> Callable[[int, List[SideChannel]], BaseEnv]:
     if env_path is not None:
         launch_string = UnityEnvironment.validate_environment_path(env_path)
@@ -489,8 +495,9 @@ def create_unity_environment(
             seed=env_seed,
             no_graphics=no_graphics,
             base_port=start_port,
-            args=env_args,
+            additional_args=env_args,
             side_channels=side_channels,
+            log_folder=log_folder,
         )
 
     return create_unity_environment
diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py
index 0eda10936a..1b37f84aee 100644
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py
@@ -70,6 +70,7 @@ def test_bad_env_path():
             seed=None,
             start_port=8000,
             env_args=None,
+            log_folder="results/log_folder",
         )
 
 
diff --git a/ml-agents/tests/yamato/scripts/run_llapi.py b/ml-agents/tests/yamato/scripts/run_llapi.py
index d298bc9be2..591407f163 100644
--- a/ml-agents/tests/yamato/scripts/run_llapi.py
+++ b/ml-agents/tests/yamato/scripts/run_llapi.py
@@ -17,7 +17,7 @@ def test_run_environment(env_name):
         file_name=env_name,
         side_channels=[engine_configuration_channel],
         no_graphics=True,
-        args=["-logFile", "-"],
+        additional_args=["-logFile", "-"],
     )
 
     try:
@@ -94,14 +94,23 @@ def test_closing(env_name):
     """
     try:
         env1 = UnityEnvironment(
-            file_name=env_name, base_port=5006, no_graphics=True, args=["-logFile", "-"]
+            file_name=env_name,
+            base_port=5006,
+            no_graphics=True,
+            additional_args=["-logFile", "-"],
         )
         env1.close()
         env1 = UnityEnvironment(
-            file_name=env_name, base_port=5006, no_graphics=True, args=["-logFile", "-"]
+            file_name=env_name,
+            base_port=5006,
+            no_graphics=True,
+            additional_args=["-logFile", "-"],
         )
         env2 = UnityEnvironment(
-            file_name=env_name, base_port=5007, no_graphics=True, args=["-logFile", "-"]
+            file_name=env_name,
+            base_port=5007,
+            no_graphics=True,
+            additional_args=["-logFile", "-"],
         )
         env2.reset()
     finally: