From 2195bc0ee4c3cd76c897d46b603bdc9c03e58a1e Mon Sep 17 00:00:00 2001 From: Jonathan Harper Date: Thu, 2 Jan 2020 16:20:22 -0800 Subject: [PATCH] Remove the --num-runs option The "num-runs" command-line option provides the ability to run multiple identically-configured training runs in separate processes by running mlagents-learn only once. This is a rarely used ML-Agents feature, but it adds complexity to other parts of the system by adding the need to support multiprocessing and managing of ports for the parallel training runs. It also doesn't provide truly reproducible experiments, since there is no guarantee of resource isolation between the trials. This commit removes the --num-runs option, with the idea that users will manage parallel or sequential runs of the same experiment themselves in the future. --- docs/Training-ML-Agents.md | 4 - ml-agents/mlagents/trainers/learn.py | 74 +++---------------- .../mlagents/trainers/tests/test_learn.py | 11 ++- 3 files changed, 16 insertions(+), 73 deletions(-) diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 823f7dc021..e221fd696c 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -114,10 +114,6 @@ environment, you can set the following command line options when invoking the oldest checkpoint is deleted when saving a new checkpoint. Defaults to 5. * `--lesson=`: Specify which lesson to start with when performing curriculum training. Defaults to 0. -* `--num-runs=`: Sets the number of concurrent training sessions to perform. - Default is set to 1. Set to higher values when benchmarking performance and - multiple training sessions is desired. Training sessions are independent, and - do not improve learning performance. * `--num-envs=`: Specifies the number of concurrent Unity environment instances to collect experiences from when training. Defaults to 1. * `--run-id=`: Specifies an identifier for each training run. This diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 25210092da..b2ef55cec5 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -2,7 +2,6 @@ import logging import argparse -from multiprocessing import Process, Queue import os import glob import shutil @@ -14,7 +13,6 @@ import mlagents_envs from mlagents import tf_utils from mlagents.trainers.trainer_controller import TrainerController -from mlagents.trainers.exception import TrainerError from mlagents.trainers.meta_curriculum import MetaCurriculum from mlagents.trainers.trainer_util import load_config, TrainerFactory from mlagents.trainers.stats import TensorboardWriter, CSVWriter, StatsReporter @@ -29,7 +27,6 @@ class CommandLineOptions(NamedTuple): debug: bool - num_runs: int seed: int env_path: str run_id: str @@ -109,9 +106,6 @@ def parse_command_line(argv: Optional[List[str]] = None) -> CommandLineOptions: default="ppo", help="The directory name for model and summary statistics", ) - parser.add_argument( - "--num-runs", default=1, type=int, help="Number of concurrent training sessions" - ) parser.add_argument( "--save-freq", default=50000, type=int, help="Frequency at which to save model" ) @@ -209,13 +203,9 @@ def parse_command_line(argv: Optional[List[str]] = None) -> CommandLineOptions: return CommandLineOptions.from_argparse(args) -def run_training( - sub_id: int, run_seed: int, options: CommandLineOptions, process_queue: Queue -) -> None: +def run_training(run_seed: int, options: CommandLineOptions) -> None: """ Launches training session. - :param process_queue: Queue used to send signal back to main. - :param sub_id: Unique id for training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. @@ -225,30 +215,16 @@ def run_training( curriculum_folder = options.curriculum_folder # Recognize and use docker volume if one is passed as an argument if not options.docker_target_name: - model_path = "./models/{run_id}-{sub_id}".format( - run_id=options.run_id, sub_id=sub_id - ) + model_path = f"./models/{options.run_id}" summaries_dir = "./summaries" else: - trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format( - docker_target_name=options.docker_target_name, - trainer_config_path=trainer_config_path, - ) + trainer_config_path = f"/{options.docker_target_name}/{trainer_config_path}" if curriculum_folder is not None: - curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format( - docker_target_name=options.docker_target_name, - curriculum_folder=curriculum_folder, - ) - model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format( - docker_target_name=options.docker_target_name, - run_id=options.run_id, - sub_id=sub_id, - ) - summaries_dir = "/{docker_target_name}/summaries".format( - docker_target_name=options.docker_target_name - ) + curriculum_folder = f"/{options.docker_target_name}/{curriculum_folder}" + model_path = f"/{options.docker_target_name}/models/{options.run_id}" + summaries_dir = f"/{options.docker_target_name}/summaries" trainer_config = load_config(trainer_config_path) - port = options.base_port + (sub_id * options.num_envs) + port = options.base_port # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. @@ -301,7 +277,7 @@ def run_training( trainer_factory, model_path, summaries_dir, - options.run_id + "-" + str(sub_id), + options.run_id, options.save_freq, maybe_meta_curriculum, options.train_model, @@ -309,8 +285,6 @@ def run_training( sampler_manager, resampling_interval, ) - # Signal that environment has been launched. - process_queue.put(True) # Begin training try: tc.start_learning(env_manager) @@ -461,40 +435,14 @@ def main(): else: # disable noisy warnings from tensorflow. tf_utils.set_warnings_enabled(False) - if options.env_path is None and options.num_runs > 1: - raise TrainerError( - "It is not possible to launch more than one concurrent training session " - "when training from the editor." - ) - jobs = [] run_seed = options.seed if options.cpu: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" - if options.num_runs == 1: - if options.seed == -1: - run_seed = np.random.randint(0, 10000) - run_training(0, run_seed, options, Queue()) - else: - for i in range(options.num_runs): - if options.seed == -1: - run_seed = np.random.randint(0, 10000) - process_queue = Queue() - p = Process(target=run_training, args=(i, run_seed, options, process_queue)) - jobs.append(p) - p.start() - # Wait for signal that environment has successfully launched - while process_queue.get() is not True: - continue - - # Wait for jobs to complete. Otherwise we'll have an extra - # unhandled KeyboardInterrupt if we end early. - try: - for job in jobs: - job.join() - except KeyboardInterrupt: - pass + if options.seed == -1: + run_seed = np.random.randint(0, 10000) + run_training(run_seed, options) # For python debugger to directly run this script diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py index b00d5957aa..1ef93ea834 100644 --- a/ml-agents/mlagents/trainers/tests/test_learn.py +++ b/ml-agents/mlagents/trainers/tests/test_learn.py @@ -35,12 +35,12 @@ def test_run_training( mock_init = MagicMock(return_value=None) with patch.object(TrainerController, "__init__", mock_init): with patch.object(TrainerController, "start_learning", MagicMock()): - learn.run_training(0, 0, basic_options(), MagicMock()) + learn.run_training(0, basic_options()) mock_init.assert_called_once_with( trainer_factory_mock.return_value, - "./models/ppo-0", + "./models/ppo", "./summaries", - "ppo-0", + "ppo", 50000, None, False, @@ -69,9 +69,9 @@ def test_docker_target_path( mock_init = MagicMock(return_value=None) with patch.object(TrainerController, "__init__", mock_init): with patch.object(TrainerController, "start_learning", MagicMock()): - learn.run_training(0, 0, options_with_docker_target, MagicMock()) + learn.run_training(0, options_with_docker_target) mock_init.assert_called_once() - assert mock_init.call_args[0][1] == "/dockertarget/models/ppo-0" + assert mock_init.call_args[0][1] == "/dockertarget/models/ppo" assert mock_init.call_args[0][2] == "/dockertarget/summaries" @@ -111,7 +111,6 @@ def test_commandline_args(): "--lesson=3", "--load", "--run-id=myawesomerun", - "--num-runs=3", "--save-freq=123456", "--seed=7890", "--train",