From c5ad54a9c9000ce0a43d890e7c47db4537718e59 Mon Sep 17 00:00:00 2001
From: Corentin Dancette <corentin.dancette@gmail.com>
Date: Wed, 5 Feb 2020 15:08:56 +0100
Subject: [PATCH] Add grid search script and test.

Grid search is called like this:
python -m bootstrap.grid -o <path> --gpu 0.5 --cpu 10

This will run training on all available gpus and cpus, with 10 cpus per
training and 0.5 gpus assigned for each training (2 jobs per gpu).

The option file must contain a new option: "gridsearch:"
which contains a list of options to modify, and their
associated values should be a list containing all the values
to test.

See the example file in tests/options-grid.yaml

TODO: analysis at the end. This will need an api to get the best results
for a given run, which exists only as a script for now (compare.py)
---
 bootstrap/grid.py        | 86 ++++++++++++++++++++++++++++++++++++++++
 bootstrap/lib/options.py |  8 +++-
 bootstrap/run.py         |  4 +-
 tests/options-grid.yaml  | 51 ++++++++++++++++++++++++
 tests/test_grid.py       | 51 ++++++++++++++++++++++++
 5 files changed, 196 insertions(+), 4 deletions(-)
 create mode 100755 bootstrap/grid.py
 create mode 100644 tests/options-grid.yaml
 create mode 100644 tests/test_grid.py
diff --git a/bootstrap/grid.py b/bootstrap/grid.py
new file mode 100755
index 0000000..45df1cf
--- /dev/null
+++ b/bootstrap/grid.py
@@ -0,0 +1,86 @@
+import ray
+from ray import tune
+import os
+import argparse
+import yaml
+
+from bootstrap.run import run
+
+
+def train_func(config):
+    # change exp dir
+
+    option_path = config.pop("option_file")
+    os.chdir(config.pop("run_dir"))
+    exp_dir = config.pop("exp_dir_prefix")
+
+    override_options = {
+        "resume": "last",
+    }
+
+    for name, value in config.items():
+        override_options[name] = value
+        if type(value) == list:
+            value_str = ",".join(str(x) for x in value)
+        else:
+            value_str = str(value)
+        exp_dir += f"--{name.split('.')[-1]}_{value_str}"
+
+    override_options["exp.dir"] = exp_dir
+    run(path_opts=option_path, override_options=override_options, run_parser=False)
+
+
+def build_tune_config(option_path):
+    with open(option_path, "r") as yaml_file:
+        options = yaml.load(yaml_file)
+    config = {}
+    for key, values in options["gridsearch"].items():
+        config[key] = tune.grid_search(values)
+    config["exp_dir_prefix"] = options["exp"]["dir"]
+    config["option_file"] = option_path
+    config["run_dir"] = os.getcwd()
+    return config, config["exp_dir_prefix"]
+
+
+def grid(path_opts, cpu_per_trial=2, gpu_per_trial=0.5):
+    config, name = build_tune_config(path_opts)
+    ray.init()
+    tune.run(
+        train_func,
+        name=name,
+        # stop={"avg_inc_acc": 100},
+        config=config,
+        resources_per_trial={"cpu": cpu_per_trial, "gpu": gpu_per_trial},
+        local_dir="ray_results",
+    )
+
+    # TODO: tune analysis to get best results.
+    # For this, we need to extract the best score for each experiment.
+    # analysis = tune.run(
+    #     train_mnist, config={"lr": tune.grid_search([0.001, 0.01, 0.1])})
+    # print("Best config: ", analysis.get_best_config(metric="mean_accuracy"))
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-o", "--path_opts", required=True, help="Main file")
+    parser.add_argument(
+        "-g",
+        "--gpu",
+        type=float,
+        default=0.5,
+        help="Percentage of gpu needed for one training",
+    )
+    parser.add_argument(
+        "-c",
+        "--cpu",
+        type=float,
+        default=2,
+        help="Percentage of gpu needed for one training",
+    )
+    args = parser.parse_args()
+    grid(args.path_opts, args.cpu, args.gpu)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bootstrap/lib/options.py b/bootstrap/lib/options.py
index 12ac3f9..390a96b 100755
--- a/bootstrap/lib/options.py
+++ b/bootstrap/lib/options.py
@@ -161,7 +161,7 @@ def exit(self, status=0, message=None):
                 raise Options.MissingOptionsException()
             super().exit(status, message)
 
-    def __new__(cls, source=None, arguments_callback=None, lock=False, run_parser=True):
+    def __new__(cls, source=None, arguments_callback=None, lock=False, run_parser=True, override_options=None):
         # Options is a singleton, we will only build if it has not been built before
         if not Options.__instance:
             Options.__instance = object.__new__(Options)
@@ -178,7 +178,7 @@ def __new__(cls, source=None, arguments_callback=None, lock=False, run_parser=Tr
 
             if run_parser:
                 fullopt_parser = Options.HelpParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-                fullopt_parser.add_argument('-o', '--path_opts', type=str, required=True)
+                fullopt_parser.add_argument('-o', '--path_opts', type=str)
                 Options.__instance.add_options(fullopt_parser, options_dict)
 
                 arguments = fullopt_parser.parse_args()
@@ -201,6 +201,10 @@ def __new__(cls, source=None, arguments_callback=None, lock=False, run_parser=Tr
             else:
                 Options.__instance.options = options_dict
 
+        if override_options is not None:
+            for key, value in override_options.items():
+                Options.__instance.options[key] = value
+
         if lock:
             Options.__instance.lock()
         return Options.__instance
diff --git a/bootstrap/run.py b/bootstrap/run.py
index d3f3b02..e2f805d 100755
--- a/bootstrap/run.py
+++ b/bootstrap/run.py
@@ -53,9 +53,9 @@ def init_logs_options_files(exp_dir, resume=None):
     Logger(exp_dir, name=logs_name)
 
 
-def run(path_opts=None):
+def run(path_opts=None, override_options=None, run_parser=True):
     # first call to Options() load the options yaml file from --path_opts command line argument if path_opts=None
-    Options(path_opts)
+    Options(path_opts, override_options=override_options, run_parser=run_parser)
 
     # init options and exp dir for logging
     init_experiment_directory(Options()['exp']['dir'], Options()['exp']['resume'])
diff --git a/tests/options-grid.yaml b/tests/options-grid.yaml
new file mode 100644
index 0000000..cbd93d4
--- /dev/null
+++ b/tests/options-grid.yaml
@@ -0,0 +1,51 @@
+exp:
+  dir: logs/myproject/1_exp
+  resume: # last, best_[...], or empty (from scratch)
+dataset:
+  import: myproject.datasets.factory
+  name: myproject
+  dir: data/myproject
+  train_split: train
+  eval_split: val
+  nb_threads: 4
+  batch_size: 64
+  nb_items: 100
+model:
+  name: default
+  network:
+    import: myproject.models.networks.factory
+    name: myproject
+    dim_in: 10
+    dim_out: 1
+  criterion:
+    import: myproject.models.criterions.factory
+    name: myproject
+  metric:
+    import: myproject.models.metrics.factory
+    name: myproject
+    thresh: 0.5
+optimizer:
+  name: adam
+  lr: 0.0004
+engine:
+  name: default
+  debug: False
+  nb_epochs: 10
+  print_freq: 10
+  saving_criteria:
+  - loss:min     # save when new_best < best
+  - accuracy:max # save when new_best > best
+misc:
+  cuda: False
+  seed: 1337
+views:
+  name: plotly
+  items:
+  - logs:train_epoch.loss+logs:eval_epoch.loss
+  - logs:train_batch.loss
+  - logs:train_epoch.accuracy+logs:eval_epoch.accuracy
+
+
+gridsearch:
+  "optimizer.lr": [0.1, 1.e-3]
+  "misc.seed": [1337, 42]
diff --git a/tests/test_grid.py b/tests/test_grid.py
new file mode 100644
index 0000000..09b44aa
--- /dev/null
+++ b/tests/test_grid.py
@@ -0,0 +1,51 @@
+from os import path as osp
+import os
+import shutil
+import sys
+from bootstrap.new import new_project
+from tests.test_options import reset_options_instance
+from bootstrap.grid import main as main_grid
+
+
+def test_grid(tmpdir):
+    new_project("MyProject", tmpdir)
+    code_dir = osp.join(tmpdir, "myproject.bootstrap.pytorch")
+    path_opts = osp.join(code_dir, "myproject/options/options-grid.yaml")
+    shutil.copy("tests/options-grid.yaml", path_opts)
+    os.chdir(code_dir)
+
+    expected_exp_dirs = [
+        "logs/myproject/1_exp--lr_0.1--seed_1337",
+        "logs/myproject/1_exp--lr_0.1--seed_42",
+        "logs/myproject/1_exp--lr_0.001--seed_1337",
+        "logs/myproject/1_exp--lr_0.001--seed_42",
+    ]
+
+    # path needed to change import
+    # https://stackoverflow.com/questions/23619595/pythons-os-chdir-function-isnt-working
+    sys.path.insert(0, code_dir)
+    reset_options_instance()
+    sys.argv += ["--path_opts", path_opts]
+    sys.argv += ["--gpu-per-trial", "0.0"]
+    sys.argv += ["--cpu-per-trial", "0.5"]
+    main_grid()
+
+    fnames = [
+        "ckpt_best_accuracy_engine.pth.tar",
+        "ckpt_best_loss_optimizer.pth.tar",
+        "logs.txt",
+        "ckpt_best_accuracy_model.pth.tar",
+        "ckpt_last_engine.pth.tar",
+        "options.yaml",
+        "ckpt_best_accuracy_optimizer.pth.tar",
+        "ckpt_last_model.pth.tar",
+        "view.html",
+        "ckpt_best_loss_engine.pth.tar",
+        "ckpt_last_optimizer.pth.tar",
+        "ckpt_best_loss_model.pth.tar",
+        "logs.json",
+    ]
+    for exp_dir in expected_exp_dirs:
+        for fname in fnames:
+            file_path = osp.join(code_dir, f"{exp_dir}/{fname}")
+            assert osp.isfile(file_path)