diff --git a/CHANGELOG.md b/CHANGELOG.md index 25f9d24..7a566d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Moved documentation from GitHub Pages to Read the Docs. This allows to more easily manage docs for different versions. +### Added +- Support for Numpy 2. + + ## [3.0.0] - 2024-08-19 ### Changed diff --git a/examples/basic/main.py b/examples/basic/main.py index 616d194..4d1f69e 100644 --- a/examples/basic/main.py +++ b/examples/basic/main.py @@ -5,6 +5,8 @@ from cluster_utils import exit_for_resume, finalize_job, initialize_job +random_generator = np.random.default_rng() + def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None): """ @@ -35,7 +37,7 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None): if sharp_penalty and x > 3.20: result += 1 - if np.random.rand() < 0.1: + if random_generator.random() < 0.1: raise ValueError("10 percent of all jobs die here on purpose") return result @@ -43,14 +45,14 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None): if __name__ == "__main__": # Error before update_params (has separate handling) - if np.random.rand() < 0.05: + if random_generator.random() < 0.05: raise ValueError("5 percent of all jobs die early for testing") params = initialize_job() # simulate that the jobs take some time max_sleep_time = params.get("max_sleep_time", 10) - time.sleep(np.random.randint(0, max_sleep_time)) + time.sleep(random_generator.integers(0, max_sleep_time)) result_file = os.path.join(params.working_dir, "result.npy") os.makedirs(params.working_dir, exist_ok=True) @@ -66,7 +68,7 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None): if "test_resume" in params and params.test_resume: exit_for_resume() - noisy_result = noiseless_result + 0.5 * np.random.normal() + noisy_result = noiseless_result + 0.5 * random_generator.normal() metrics = {"result": noisy_result, "noiseless_result": noiseless_result} finalize_job(metrics, params) print(noiseless_result) diff --git a/examples/basic/main_no_fail.py b/examples/basic/main_no_fail.py index efdaac0..9115bf3 100644 --- a/examples/basic/main_no_fail.py +++ b/examples/basic/main_no_fail.py @@ -7,6 +7,8 @@ from cluster_utils import exit_for_resume, finalize_job, initialize_job +random_generator = np.random.default_rng() + def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None): """ @@ -45,7 +47,7 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None): # simulate that the jobs take some time max_sleep_time = params.get("max_sleep_time", 10) - time.sleep(np.random.randint(0, max_sleep_time)) + time.sleep(random_generator.integers(0, max_sleep_time)) result_file = os.path.join(params.working_dir, "result.npy") os.makedirs(params.working_dir, exist_ok=True) @@ -61,7 +63,7 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None): if "test_resume" in params and params.test_resume: exit_for_resume() - noisy_result = noiseless_result + 0.5 * np.random.normal() + noisy_result = noiseless_result + 0.5 * random_generator.normal() metrics = {"result": noisy_result, "noiseless_result": noiseless_result} finalize_job(metrics, params) print(noiseless_result) diff --git a/examples/basic/main_with_decorator.py b/examples/basic/main_with_decorator.py index cc03c77..82c12df 100644 --- a/examples/basic/main_with_decorator.py +++ b/examples/basic/main_with_decorator.py @@ -5,6 +5,8 @@ from cluster_utils import cluster_main, exit_for_resume +random_generator = np.random.default_rng() + def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None): """ @@ -35,7 +37,7 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None): if sharp_penalty and x > 3.20: result += 1 - if np.random.rand() < 0.1: + if random_generator.random() < 0.1: raise ValueError("10 percent of all jobs die here on purpose") return result @@ -51,7 +53,7 @@ def main(working_dir, id, **kwargs): # noqa A002 test_resume = kwargs["test_resume"] # simulate that the jobs take some time - time.sleep(np.random.randint(0, 10)) + time.sleep(random_generator.integers(0, 10)) result_file = os.path.join(working_dir, "result.npy") # here we do a little simulation for checkpointing and resuming @@ -66,7 +68,7 @@ def main(working_dir, id, **kwargs): # noqa A002 if test_resume: exit_for_resume() - noisy_result = noiseless_result + 0.5 * np.random.normal() + noisy_result = noiseless_result + 0.5 * random_generator.normal() metrics = {"result": noisy_result, "noiseless_result": noiseless_result} return metrics diff --git a/pyproject.toml b/pyproject.toml index cc9098a..f0855ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ Issues = "https://github.com/martius-lab/cluster_utils/issues" runner = [ "colorama", "gitpython>=3.0.5", - "numpy<2", + "numpy", "pandas[output_formatting]>=2.0.3", "scipy", "tqdm", @@ -140,7 +140,7 @@ lint.select = [ # "ARG", # unused-arguments # "PD", # pandas-vet # "PL", # pylint - # "NPY", # numpy + "NPY", # numpy "RUF100", # unused 'noqa' directive ] lint.ignore = [ diff --git a/src/cluster_utils/base/utils.py b/src/cluster_utils/base/utils.py index 27fc371..700b585 100644 --- a/src/cluster_utils/base/utils.py +++ b/src/cluster_utils/base/utils.py @@ -1,8 +1,13 @@ import contextlib import textwrap +import numpy as np + from cluster_utils.base import constants +# global random generator that is returned by get_rng() +_global_random_generator = None + class OptionalDependencyNotFoundError(ModuleNotFoundError): """Error to throw if an optional dependency is not found. @@ -69,3 +74,12 @@ def flatten_nested_string_dict(nested_dict, prepend=""): yield sub else: yield prepend + str(key), value + + +def get_rng() -> np.random.Generator: + """Get reference to a global random generator (created on first call).""" + global _global_random_generator + if _global_random_generator is None: + _global_random_generator = np.random.default_rng() + + return _global_random_generator diff --git a/src/cluster_utils/client/__init__.py b/src/cluster_utils/client/__init__.py index be5a8c0..d271945 100644 --- a/src/cluster_utils/client/__init__.py +++ b/src/cluster_utils/client/__init__.py @@ -208,7 +208,12 @@ def add_cmd_params(orig_dict): add_cmd_line_params(orig_dict, args.parameter_overwrites) if args.parameter_dict: - parameter_dict = ast.literal_eval(args.parameter_file_or_dict) + try: + parameter_dict = ast.literal_eval(args.parameter_file_or_dict) + except ValueError as e: + msg = f"Error while evaluating {args.parameter_file_or_dict}. Error: {e}" + raise ValueError(msg) from e + if not isinstance(parameter_dict, dict): msg = ( "'parameter_file_or_dict' must be a dictionary" diff --git a/src/cluster_utils/server/data_analysis.py b/src/cluster_utils/server/data_analysis.py index 08a3298..49db286 100644 --- a/src/cluster_utils/server/data_analysis.py +++ b/src/cluster_utils/server/data_analysis.py @@ -7,6 +7,7 @@ import pandas as pd from cluster_utils.base import constants +from cluster_utils.base.utils import get_rng DISTR_BASE_COLORS = [ (0.99, 0.7, 0.18), @@ -171,7 +172,7 @@ def performance_gain_for_iteration(clf, df_for_iter, params, metric, minimum): for param in params: copy_df = df.copy() - copy_df[param] = np.random.permutation(copy_df[param]) + copy_df[param] = get_rng().permutation(copy_df[param]) ys = clf.predict(copy_df[params]) diffs = ys - copy_df[metric] error = np.mean(np.abs(diffs)) diff --git a/src/cluster_utils/server/distributions.py b/src/cluster_utils/server/distributions.py index 0a1bc65..ee49abc 100644 --- a/src/cluster_utils/server/distributions.py +++ b/src/cluster_utils/server/distributions.py @@ -7,6 +7,7 @@ import scipy.stats from cluster_utils.base import constants +from cluster_utils.base.utils import get_rng from .utils import check_valid_param_name @@ -123,7 +124,7 @@ def prepare_samples(self, howmany): ) # a momentum term 3/4 if not (self.lower <= mean_to_use <= self.upper): mean_to_use = self.mean - self.samples = np.random.normal(size=howmany) * self.std + mean_to_use + self.samples = get_rng().normal(size=howmany) * self.std + mean_to_use super().prepare_samples(howmany) def plot(self): @@ -173,7 +174,7 @@ def prepare_samples(self, howmany): if not (self.lower <= log_mean_to_use <= self.upper): log_mean_to_use = self.log_mean self.samples = np.exp( - np.random.normal(size=howmany) * self.log_std + log_mean_to_use + get_rng().normal(size=howmany) * self.log_std + log_mean_to_use ) super().prepare_samples(howmany) @@ -245,7 +246,16 @@ def prepare_samples(self, howmany): howmany = min( 10, howmany ) # HACK: for smart rounding a reasonable sample size is needed - self.samples = np.random.choice(self.option_list, p=self.probs, size=howmany) + + _samples = get_rng().choice(self.option_list, p=self.probs, size=howmany) + # choice() returns a numpy array, implicitly converting value types to numpy + # types (e.g. native bool becomes np.bool). This causes trouble later on, as + # the parameters passed to the job script are parsed with ast.literal_eval, + # which can only handle native types. Converting back to list here, should also + # get us back to native types for the elements (at least successfully tested + # with bool). + self.samples = _samples.tolist() + super().prepare_samples(howmany) def plot(self):