martius-lab · luator · Oct 24, 2024 · Oct 24, 2024 · Oct 28, 2024 · Oct 28, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Moved documentation from GitHub Pages to Read the Docs.  This allows to more easily
   manage docs for different versions.
 
+### Added
+- Support for Numpy 2.
+
+
 ## [3.0.0] - 2024-08-19
 
 ### Changed

diff --git a/examples/basic/main.py b/examples/basic/main.py
@@ -5,6 +5,8 @@
 
 from cluster_utils import exit_for_resume, finalize_job, initialize_job
 
+random_generator = np.random.default_rng()
+
 
 def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
     """
@@ -35,22 +37,22 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
     if sharp_penalty and x > 3.20:
         result += 1
 
-    if np.random.rand() < 0.1:
+    if random_generator.random() < 0.1:
         raise ValueError("10 percent of all jobs die here on purpose")
 
     return result
 
 
 if __name__ == "__main__":
     # Error before update_params (has separate handling)
-    if np.random.rand() < 0.05:
+    if random_generator.random() < 0.05:
         raise ValueError("5 percent of all jobs die early for testing")
 
     params = initialize_job()
 
     # simulate that the jobs take some time
     max_sleep_time = params.get("max_sleep_time", 10)
-    time.sleep(np.random.randint(0, max_sleep_time))
+    time.sleep(random_generator.integers(0, max_sleep_time))
 
     result_file = os.path.join(params.working_dir, "result.npy")
     os.makedirs(params.working_dir, exist_ok=True)
@@ -66,7 +68,7 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
         if "test_resume" in params and params.test_resume:
             exit_for_resume()
 
-    noisy_result = noiseless_result + 0.5 * np.random.normal()
+    noisy_result = noiseless_result + 0.5 * random_generator.normal()
     metrics = {"result": noisy_result, "noiseless_result": noiseless_result}
     finalize_job(metrics, params)
     print(noiseless_result)
diff --git a/examples/basic/main_no_fail.py b/examples/basic/main_no_fail.py
@@ -7,6 +7,8 @@
 
 from cluster_utils import exit_for_resume, finalize_job, initialize_job
 
+random_generator = np.random.default_rng()
+
 
 def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
     """
@@ -45,7 +47,7 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
 
     # simulate that the jobs take some time
     max_sleep_time = params.get("max_sleep_time", 10)
-    time.sleep(np.random.randint(0, max_sleep_time))
+    time.sleep(random_generator.integers(0, max_sleep_time))
 
     result_file = os.path.join(params.working_dir, "result.npy")
     os.makedirs(params.working_dir, exist_ok=True)
@@ -61,7 +63,7 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
         if "test_resume" in params and params.test_resume:
             exit_for_resume()
 
-    noisy_result = noiseless_result + 0.5 * np.random.normal()
+    noisy_result = noiseless_result + 0.5 * random_generator.normal()
     metrics = {"result": noisy_result, "noiseless_result": noiseless_result}
     finalize_job(metrics, params)
     print(noiseless_result)
diff --git a/examples/basic/main_with_decorator.py b/examples/basic/main_with_decorator.py
@@ -5,6 +5,8 @@
 
 from cluster_utils import cluster_main, exit_for_resume
 
+random_generator = np.random.default_rng()
+
 
 def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
     """
@@ -35,7 +37,7 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
     if sharp_penalty and x > 3.20:
         result += 1
 
-    if np.random.rand() < 0.1:
+    if random_generator.random() < 0.1:
         raise ValueError("10 percent of all jobs die here on purpose")
 
     return result
@@ -51,7 +53,7 @@ def main(working_dir, id, **kwargs):  # noqa A002
     test_resume = kwargs["test_resume"]
 
     # simulate that the jobs take some time
-    time.sleep(np.random.randint(0, 10))
+    time.sleep(random_generator.integers(0, 10))
     result_file = os.path.join(working_dir, "result.npy")
 
     # here we do a little simulation for checkpointing and resuming
@@ -66,7 +68,7 @@ def main(working_dir, id, **kwargs):  # noqa A002
         if test_resume:
             exit_for_resume()
 
-    noisy_result = noiseless_result + 0.5 * np.random.normal()
+    noisy_result = noiseless_result + 0.5 * random_generator.normal()
     metrics = {"result": noisy_result, "noiseless_result": noiseless_result}
     return metrics
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -53,7 +53,7 @@ Issues = "https://github.com/martius-lab/cluster_utils/issues"
 runner = [
     "colorama",
     "gitpython>=3.0.5",
-    "numpy<2",
+    "numpy",
     "pandas[output_formatting]>=2.0.3",
     "scipy",
     "tqdm",
@@ -140,7 +140,7 @@ lint.select = [
     # "ARG", # unused-arguments
     # "PD", # pandas-vet
     # "PL", # pylint
-    # "NPY", # numpy
+    "NPY", # numpy
     "RUF100",  # unused 'noqa' directive
 ]
 lint.ignore = [

diff --git a/src/cluster_utils/base/utils.py b/src/cluster_utils/base/utils.py
@@ -1,8 +1,13 @@
 import contextlib
 import textwrap
 
+import numpy as np
+
 from cluster_utils.base import constants
 
+# global random generator that is returned by get_rng()
+_global_random_generator = None
+
 
 class OptionalDependencyNotFoundError(ModuleNotFoundError):
     """Error to throw if an optional dependency is not found.
@@ -69,3 +74,12 @@ def flatten_nested_string_dict(nested_dict, prepend=""):
                 yield sub
         else:
             yield prepend + str(key), value
+
+
+def get_rng() -> np.random.Generator:
+    """Get reference to a global random generator (created on first call)."""
+    global _global_random_generator
+    if _global_random_generator is None:
+        _global_random_generator = np.random.default_rng()
+
+    return _global_random_generator
diff --git a/src/cluster_utils/client/__init__.py b/src/cluster_utils/client/__init__.py
@@ -208,7 +208,12 @@ def add_cmd_params(orig_dict):
         add_cmd_line_params(orig_dict, args.parameter_overwrites)
 
     if args.parameter_dict:
-        parameter_dict = ast.literal_eval(args.parameter_file_or_dict)
+        try:
+            parameter_dict = ast.literal_eval(args.parameter_file_or_dict)
+        except ValueError as e:
+            msg = f"Error while evaluating {args.parameter_file_or_dict}. Error: {e}"
+            raise ValueError(msg) from e
+
         if not isinstance(parameter_dict, dict):
             msg = (
                 "'parameter_file_or_dict' must be a dictionary"

diff --git a/src/cluster_utils/server/data_analysis.py b/src/cluster_utils/server/data_analysis.py
@@ -7,6 +7,7 @@
 import pandas as pd
 
 from cluster_utils.base import constants
+from cluster_utils.base.utils import get_rng
 
 DISTR_BASE_COLORS = [
     (0.99, 0.7, 0.18),
@@ -171,7 +172,7 @@ def performance_gain_for_iteration(clf, df_for_iter, params, metric, minimum):
 
         for param in params:
             copy_df = df.copy()
-            copy_df[param] = np.random.permutation(copy_df[param])
+            copy_df[param] = get_rng().permutation(copy_df[param])
             ys = clf.predict(copy_df[params])
             diffs = ys - copy_df[metric]
             error = np.mean(np.abs(diffs))

diff --git a/src/cluster_utils/server/distributions.py b/src/cluster_utils/server/distributions.py
@@ -7,6 +7,7 @@
 import scipy.stats
 
 from cluster_utils.base import constants
+from cluster_utils.base.utils import get_rng
 
 from .utils import check_valid_param_name
 
@@ -123,7 +124,7 @@ def prepare_samples(self, howmany):
         )  # a momentum term 3/4
         if not (self.lower <= mean_to_use <= self.upper):
             mean_to_use = self.mean
-        self.samples = np.random.normal(size=howmany) * self.std + mean_to_use
+        self.samples = get_rng().normal(size=howmany) * self.std + mean_to_use
         super().prepare_samples(howmany)
 
     def plot(self):
@@ -173,7 +174,7 @@ def prepare_samples(self, howmany):
         if not (self.lower <= log_mean_to_use <= self.upper):
             log_mean_to_use = self.log_mean
         self.samples = np.exp(
-            np.random.normal(size=howmany) * self.log_std + log_mean_to_use
+            get_rng().normal(size=howmany) * self.log_std + log_mean_to_use
         )
         super().prepare_samples(howmany)
 
@@ -245,7 +246,16 @@ def prepare_samples(self, howmany):
         howmany = min(
             10, howmany
         )  # HACK: for smart rounding a reasonable sample size is needed
-        self.samples = np.random.choice(self.option_list, p=self.probs, size=howmany)
+
+        _samples = get_rng().choice(self.option_list, p=self.probs, size=howmany)
+        # choice() returns a numpy array, implicitly converting value types to numpy
+        # types (e.g. native bool becomes np.bool).  This causes trouble later on, as
+        # the parameters passed to the job script are parsed with ast.literal_eval,
+        # which can only handle native types.  Converting back to list here, should also
+        # get us back to native types for the elements (at least successfully tested
+        # with bool).
+        self.samples = _samples.tolist()
+
         super().prepare_samples(howmany)
 
     def plot(self):