diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index fd74b8d4e8..87d0e3f8be 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to #### ml-agents / ml-agents-envs / gym-unity (Python) ### Minor Changes #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#) +- Added the capacity to initialize behaviors from any checkpoint and not just the latest one (#5525) #### ml-agents / ml-agents-envs / gym-unity (Python) ### Bug Fixes #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#) diff --git a/docs/Training-Configuration-File.md b/docs/Training-Configuration-File.md index 46d5fcf29c..6968ea1671 100644 --- a/docs/Training-Configuration-File.md +++ b/docs/Training-Configuration-File.md @@ -33,7 +33,7 @@ choice of the trainer (which we review on subsequent sections). | `max_steps` | (default = `500000`) Total number of steps (i.e., observation collected and action taken) that must be taken in the environment (or across all environments if using multiple in parallel) before ending the training process. If you have multiple agents with the same behavior name within your environment, all steps taken by those agents will contribute to the same `max_steps` count.

Typical range: `5e5` - `1e7` | | `keep_checkpoints` | (default = `5`) The maximum number of model checkpoints to keep. Checkpoints are saved after the number of steps specified by the checkpoint_interval option. Once the maximum number of checkpoints has been reached, the oldest checkpoint is deleted when saving a new checkpoint. | | `checkpoint_interval` | (default = `500000`) The number of experiences collected between each checkpoint by the trainer. A maximum of `keep_checkpoints` checkpoints are saved before old ones are deleted. Each checkpoint saves the `.onnx` files in `results/` folder.| -| `init_path` | (default = None) Initialize trainer from a previously saved model. Note that the prior run should have used the same trainer configurations as the current run, and have been saved with the same version of ML-Agents.

You should provide the full path to the folder where the checkpoints were saved, e.g. `./models/{run-id}/{behavior_name}`. This option is provided in case you want to initialize different behaviors from different runs; in most cases, it is sufficient to use the `--initialize-from` CLI parameter to initialize all models from the same run. | +| `init_path` | (default = None) Initialize trainer from a previously saved model. Note that the prior run should have used the same trainer configurations as the current run, and have been saved with the same version of ML-Agents.

You can provide either the file name or the full path to the checkpoint, e.g. `{checkpoint_name.pt}` or `./models/{run-id}/{behavior_name}/{checkpoint_name.pt}`. This option is provided in case you want to initialize different behaviors from different runs or initialize from an older checkpoint; in most cases, it is sufficient to use the `--initialize-from` CLI parameter to initialize all models from the same run. | | `threaded` | (default = `false`) Allow environments to step while updating the model. This might result in a training speedup, especially when using SAC. For best performance, leave setting to `false` when using self-play. | | `hyperparameters -> learning_rate` | (default = `3e-4`) Initial learning rate for gradient descent. Corresponds to the strength of each gradient descent update step. This should typically be decreased if training is unstable, and the reward does not consistently increase.

Typical range: `1e-5` - `1e-3` | | `hyperparameters -> batch_size` | Number of experiences in each iteration of gradient descent. **This should always be multiple times smaller than `buffer_size`**. If you are using continuous actions, this value should be large (on the order of 1000s). If you are using only discrete actions, this value should be smaller (on the order of 10s).

Typical range: (Continuous - PPO): `512` - `5120`; (Continuous - SAC): `128` - `1024`; (Discrete, PPO & SAC): `32` - `512`. | diff --git a/ml-agents/mlagents/trainers/directory_utils.py b/ml-agents/mlagents/trainers/directory_utils.py index 0e728ddc9d..80379d81e9 100644 --- a/ml-agents/mlagents/trainers/directory_utils.py +++ b/ml-agents/mlagents/trainers/directory_utils.py @@ -1,5 +1,7 @@ import os from mlagents.trainers.exception import UnityTrainerException +from mlagents.trainers.settings import TrainerSettings +from mlagents.trainers.model_saver.torch_model_saver import DEFAULT_CHECKPOINT_NAME def validate_existing_directories( @@ -13,6 +15,7 @@ def validate_existing_directories( :param summary_path: The summary path to be used. :param resume: Whether or not the --resume flag was passed. :param force: Whether or not the --force flag was passed. + :param init_path: Path to run-id dir to initialize from """ output_path_exists = os.path.isdir(output_path) @@ -40,3 +43,34 @@ def validate_existing_directories( init_path ) ) + + +def setup_init_path( + behaviors: TrainerSettings.DefaultTrainerDict, init_dir: str +) -> None: + """ + For each behavior, setup full init_path to checkpoint file to initialize policy from + :param behaviors: mapping from behavior_name to TrainerSettings + :param init_dir: Path to run-id dir to initialize from + """ + for behavior_name, ts in behaviors.items(): + if ts.init_path is None: + # set default if None + ts.init_path = os.path.join( + init_dir, behavior_name, DEFAULT_CHECKPOINT_NAME + ) + elif not os.path.dirname(ts.init_path): + # update to full path if just the file name + ts.init_path = os.path.join(init_dir, behavior_name, ts.init_path) + _validate_init_full_path(ts.init_path) + + +def _validate_init_full_path(init_file: str) -> None: + """ + Validate initialization path to be a .pt file + :param init_file: full path to initialization checkpoint file + """ + if not (os.path.isfile(init_file) and init_file.endswith(".pt")): + raise UnityTrainerException( + f"Could not initialize from {init_file}. file does not exists or is not a `.pt` file" + ) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 2a61b0ef5c..e3015ffddc 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -13,7 +13,10 @@ from mlagents.trainers.trainer_controller import TrainerController from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager from mlagents.trainers.trainer import TrainerFactory -from mlagents.trainers.directory_utils import validate_existing_directories +from mlagents.trainers.directory_utils import ( + validate_existing_directories, + setup_init_path, +) from mlagents.trainers.stats import StatsReporter from mlagents.trainers.cli_utils import parser from mlagents_envs.environment import UnityEnvironment @@ -72,11 +75,14 @@ def run_training(run_seed: int, options: RunOptions) -> None: ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) - # Load any needed states + # Load any needed states in case of resume if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json") ) + # In case of initialization, set full init_path for all behaviors + elif checkpoint_settings.maybe_init_path is not None: + setup_init_path(options.behaviors, checkpoint_settings.maybe_init_path) # Configure Tensorboard Writers and StatsReporter stats_writers = register_stats_writer_plugins(options) diff --git a/ml-agents/mlagents/trainers/model_saver/torch_model_saver.py b/ml-agents/mlagents/trainers/model_saver/torch_model_saver.py index d9e1459eab..e75e50d4cd 100644 --- a/ml-agents/mlagents/trainers/model_saver/torch_model_saver.py +++ b/ml-agents/mlagents/trainers/model_saver/torch_model_saver.py @@ -12,6 +12,7 @@ logger = get_logger(__name__) +DEFAULT_CHECKPOINT_NAME = "checkpoint.pt" class TorchModelSaver(BaseModelSaver): @@ -55,7 +56,7 @@ def save_checkpoint(self, behavior_name: str, step: int) -> Tuple[str, List[str] pytorch_ckpt_path = f"{checkpoint_path}.pt" export_ckpt_path = f"{checkpoint_path}.onnx" torch.save(state_dict, f"{checkpoint_path}.pt") - torch.save(state_dict, os.path.join(self.model_path, "checkpoint.pt")) + torch.save(state_dict, os.path.join(self.model_path, DEFAULT_CHECKPOINT_NAME)) self.export(checkpoint_path, behavior_name) return export_ckpt_path, [pytorch_ckpt_path] @@ -75,7 +76,11 @@ def initialize_or_load(self, policy: Optional[TorchPolicy] = None) -> None: ) elif self.load: logger.info(f"Resuming from {self.model_path}.") - self._load_model(self.model_path, policy, reset_global_steps=reset_steps) + self._load_model( + os.path.join(self.model_path, DEFAULT_CHECKPOINT_NAME), + policy, + reset_global_steps=reset_steps, + ) def _load_model( self, @@ -83,8 +88,7 @@ def _load_model( policy: Optional[TorchPolicy] = None, reset_global_steps: bool = False, ) -> None: - model_path = os.path.join(load_path, "checkpoint.pt") - saved_state_dict = torch.load(model_path) + saved_state_dict = torch.load(load_path) if policy is None: modules = self.modules policy = self.policy diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_util.py b/ml-agents/mlagents/trainers/tests/test_trainer_util.py index e8459d0752..eeaaaa7b94 100644 --- a/ml-agents/mlagents/trainers/tests/test_trainer_util.py +++ b/ml-agents/mlagents/trainers/tests/test_trainer_util.py @@ -1,6 +1,7 @@ import pytest import io import os +import yaml from unittest.mock import patch from mlagents.trainers.trainer import TrainerFactory @@ -10,7 +11,10 @@ from mlagents.trainers.settings import RunOptions from mlagents.trainers.tests.dummy_config import ppo_dummy_config from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager -from mlagents.trainers.directory_utils import validate_existing_directories +from mlagents.trainers.directory_utils import ( + validate_existing_directories, + setup_init_path, +) @pytest.fixture @@ -137,3 +141,47 @@ def test_existing_directories(tmp_path): os.mkdir(init_path) # Should pass since the directory exists now. validate_existing_directories(output_path, False, True, init_path) + + +@pytest.mark.parametrize("dir_exists", [True, False]) +def test_setup_init_path(tmpdir, dir_exists): + """ + + :return: + """ + test_yaml = """ + behaviors: + BigWallJump: + init_path: BigWallJump-6540981.pt #full path + trainer_type: ppo + MediumWallJump: + init_path: {}/test_setup_init_path_results/test_run_id/MediumWallJump/checkpoint.pt + trainer_type: ppo + SmallWallJump: + trainer_type: ppo + checkpoint_settings: + run_id: test_run_id + initialize_from: test_run_id + """.format( + tmpdir + ) + run_options = RunOptions.from_dict(yaml.safe_load(test_yaml)) + if dir_exists: + init_path = tmpdir.mkdir("test_setup_init_path_results").mkdir("test_run_id") + big = init_path.mkdir("BigWallJump").join("BigWallJump-6540981.pt") + big.write("content") + med = init_path.mkdir("MediumWallJump").join("checkpoint.pt") + med.write("content") + small = init_path.mkdir("SmallWallJump").join("checkpoint.pt") + small.write("content") + + setup_init_path(run_options.behaviors, init_path) + assert run_options.behaviors["BigWallJump"].init_path == big + assert run_options.behaviors["MediumWallJump"].init_path == med + assert run_options.behaviors["SmallWallJump"].init_path == small + else: + # don't make dirs and fail + with pytest.raises(UnityTrainerException): + setup_init_path( + run_options.behaviors, run_options.checkpoint_settings.maybe_init_path + ) diff --git a/ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py b/ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py index d8d3467654..ac89c56ca7 100644 --- a/ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py +++ b/ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py @@ -9,7 +9,10 @@ from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer from mlagents.trainers.sac.optimizer_torch import TorchSACOptimizer from mlagents.trainers.poca.optimizer_torch import TorchPOCAOptimizer -from mlagents.trainers.model_saver.torch_model_saver import TorchModelSaver +from mlagents.trainers.model_saver.torch_model_saver import ( + TorchModelSaver, + DEFAULT_CHECKPOINT_NAME, +) from mlagents.trainers.settings import ( TrainerSettings, NetworkSettings, @@ -62,7 +65,7 @@ def test_load_save_policy(tmp_path): assert policy2.get_current_step() == 2000 # Try initialize from path 1 - trainer_params.init_path = path1 + trainer_params.init_path = os.path.join(path1, DEFAULT_CHECKPOINT_NAME) model_saver3 = TorchModelSaver(trainer_params, path2) policy3 = create_policy_mock(trainer_params) model_saver3.register(policy3) diff --git a/ml-agents/mlagents/trainers/trainer/trainer_factory.py b/ml-agents/mlagents/trainers/trainer/trainer_factory.py index 9d351e3b5c..90f1aabef0 100644 --- a/ml-agents/mlagents/trainers/trainer/trainer_factory.py +++ b/ml-agents/mlagents/trainers/trainer/trainer_factory.py @@ -66,7 +66,6 @@ def generate(self, behavior_name: str) -> Trainer: self.ghost_controller, self.seed, self.param_manager, - self.init_path, self.multi_gpu, ) @@ -80,7 +79,6 @@ def _initialize_trainer( ghost_controller: GhostController, seed: int, param_manager: EnvironmentParameterManager, - init_path: str = None, multi_gpu: bool = False, ) -> Trainer: """ @@ -96,12 +94,9 @@ def _initialize_trainer( :param ghost_controller: The object that coordinates ghost trainers :param seed: The random seed to use :param param_manager: EnvironmentParameterManager, used to determine a reward buffer length for PPOTrainer - :param init_path: Path from which to load model, if different from model_path. :return: """ trainer_artifact_path = os.path.join(output_path, brain_name) - if init_path is not None: - trainer_settings.init_path = os.path.join(init_path, brain_name) min_lesson_length = param_manager.get_minimum_reward_buffer_size(brain_name)