Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Moved documentation from GitHub Pages to Read the Docs. This allows to more easily
manage docs for different versions.

### Added
- Support for Numpy 2.


## [3.0.0] - 2024-08-19

### Changed
Expand Down
10 changes: 6 additions & 4 deletions examples/basic/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

from cluster_utils import exit_for_resume, finalize_job, initialize_job

random_generator = np.random.default_rng()


def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
"""
Expand Down Expand Up @@ -35,22 +37,22 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
if sharp_penalty and x > 3.20:
result += 1

if np.random.rand() < 0.1:
if random_generator.random() < 0.1:
raise ValueError("10 percent of all jobs die here on purpose")

return result


if __name__ == "__main__":
# Error before update_params (has separate handling)
if np.random.rand() < 0.05:
if random_generator.random() < 0.05:
raise ValueError("5 percent of all jobs die early for testing")

params = initialize_job()

# simulate that the jobs take some time
max_sleep_time = params.get("max_sleep_time", 10)
time.sleep(np.random.randint(0, max_sleep_time))
time.sleep(random_generator.integers(0, max_sleep_time))

result_file = os.path.join(params.working_dir, "result.npy")
os.makedirs(params.working_dir, exist_ok=True)
Expand All @@ -66,7 +68,7 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
if "test_resume" in params and params.test_resume:
exit_for_resume()

noisy_result = noiseless_result + 0.5 * np.random.normal()
noisy_result = noiseless_result + 0.5 * random_generator.normal()
metrics = {"result": noisy_result, "noiseless_result": noiseless_result}
finalize_job(metrics, params)
print(noiseless_result)
6 changes: 4 additions & 2 deletions examples/basic/main_no_fail.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

from cluster_utils import exit_for_resume, finalize_job, initialize_job

random_generator = np.random.default_rng()


def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
"""
Expand Down Expand Up @@ -45,7 +47,7 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):

# simulate that the jobs take some time
max_sleep_time = params.get("max_sleep_time", 10)
time.sleep(np.random.randint(0, max_sleep_time))
time.sleep(random_generator.integers(0, max_sleep_time))

result_file = os.path.join(params.working_dir, "result.npy")
os.makedirs(params.working_dir, exist_ok=True)
Expand All @@ -61,7 +63,7 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
if "test_resume" in params and params.test_resume:
exit_for_resume()

noisy_result = noiseless_result + 0.5 * np.random.normal()
noisy_result = noiseless_result + 0.5 * random_generator.normal()
metrics = {"result": noisy_result, "noiseless_result": noiseless_result}
finalize_job(metrics, params)
print(noiseless_result)
8 changes: 5 additions & 3 deletions examples/basic/main_with_decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

from cluster_utils import cluster_main, exit_for_resume

random_generator = np.random.default_rng()


def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
"""
Expand Down Expand Up @@ -35,7 +37,7 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
if sharp_penalty and x > 3.20:
result += 1

if np.random.rand() < 0.1:
if random_generator.random() < 0.1:
raise ValueError("10 percent of all jobs die here on purpose")

return result
Expand All @@ -51,7 +53,7 @@ def main(working_dir, id, **kwargs): # noqa A002
test_resume = kwargs["test_resume"]

# simulate that the jobs take some time
time.sleep(np.random.randint(0, 10))
time.sleep(random_generator.integers(0, 10))
result_file = os.path.join(working_dir, "result.npy")

# here we do a little simulation for checkpointing and resuming
Expand All @@ -66,7 +68,7 @@ def main(working_dir, id, **kwargs): # noqa A002
if test_resume:
exit_for_resume()

noisy_result = noiseless_result + 0.5 * np.random.normal()
noisy_result = noiseless_result + 0.5 * random_generator.normal()
metrics = {"result": noisy_result, "noiseless_result": noiseless_result}
return metrics

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Issues = "https://github.com/martius-lab/cluster_utils/issues"
runner = [
"colorama",
"gitpython>=3.0.5",
"numpy<2",
"numpy",
"pandas[output_formatting]>=2.0.3",
"scipy",
"tqdm",
Expand Down Expand Up @@ -140,7 +140,7 @@ lint.select = [
# "ARG", # unused-arguments
# "PD", # pandas-vet
# "PL", # pylint
# "NPY", # numpy
"NPY", # numpy
"RUF100", # unused 'noqa' directive
]
lint.ignore = [
Expand Down
14 changes: 14 additions & 0 deletions src/cluster_utils/base/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import contextlib
import textwrap

import numpy as np

from cluster_utils.base import constants

# global random generator that is returned by get_rng()
_global_random_generator = None


class OptionalDependencyNotFoundError(ModuleNotFoundError):
"""Error to throw if an optional dependency is not found.
Expand Down Expand Up @@ -69,3 +74,12 @@ def flatten_nested_string_dict(nested_dict, prepend=""):
yield sub
else:
yield prepend + str(key), value


def get_rng() -> np.random.Generator:
"""Get reference to a global random generator (created on first call)."""
global _global_random_generator
if _global_random_generator is None:
_global_random_generator = np.random.default_rng()

return _global_random_generator
7 changes: 6 additions & 1 deletion src/cluster_utils/client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,12 @@ def add_cmd_params(orig_dict):
add_cmd_line_params(orig_dict, args.parameter_overwrites)

if args.parameter_dict:
parameter_dict = ast.literal_eval(args.parameter_file_or_dict)
try:
parameter_dict = ast.literal_eval(args.parameter_file_or_dict)
except ValueError as e:
msg = f"Error while evaluating {args.parameter_file_or_dict}. Error: {e}"
raise ValueError(msg) from e

if not isinstance(parameter_dict, dict):
msg = (
"'parameter_file_or_dict' must be a dictionary"
Expand Down
3 changes: 2 additions & 1 deletion src/cluster_utils/server/data_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pandas as pd

from cluster_utils.base import constants
from cluster_utils.base.utils import get_rng

DISTR_BASE_COLORS = [
(0.99, 0.7, 0.18),
Expand Down Expand Up @@ -171,7 +172,7 @@ def performance_gain_for_iteration(clf, df_for_iter, params, metric, minimum):

for param in params:
copy_df = df.copy()
copy_df[param] = np.random.permutation(copy_df[param])
copy_df[param] = get_rng().permutation(copy_df[param])
ys = clf.predict(copy_df[params])
diffs = ys - copy_df[metric]
error = np.mean(np.abs(diffs))
Expand Down
16 changes: 13 additions & 3 deletions src/cluster_utils/server/distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import scipy.stats

from cluster_utils.base import constants
from cluster_utils.base.utils import get_rng

from .utils import check_valid_param_name

Expand Down Expand Up @@ -123,7 +124,7 @@ def prepare_samples(self, howmany):
) # a momentum term 3/4
if not (self.lower <= mean_to_use <= self.upper):
mean_to_use = self.mean
self.samples = np.random.normal(size=howmany) * self.std + mean_to_use
self.samples = get_rng().normal(size=howmany) * self.std + mean_to_use
super().prepare_samples(howmany)

def plot(self):
Expand Down Expand Up @@ -173,7 +174,7 @@ def prepare_samples(self, howmany):
if not (self.lower <= log_mean_to_use <= self.upper):
log_mean_to_use = self.log_mean
self.samples = np.exp(
np.random.normal(size=howmany) * self.log_std + log_mean_to_use
get_rng().normal(size=howmany) * self.log_std + log_mean_to_use
)
super().prepare_samples(howmany)

Expand Down Expand Up @@ -245,7 +246,16 @@ def prepare_samples(self, howmany):
howmany = min(
10, howmany
) # HACK: for smart rounding a reasonable sample size is needed
self.samples = np.random.choice(self.option_list, p=self.probs, size=howmany)

_samples = get_rng().choice(self.option_list, p=self.probs, size=howmany)
# choice() returns a numpy array, implicitly converting value types to numpy
# types (e.g. native bool becomes np.bool). This causes trouble later on, as
# the parameters passed to the job script are parsed with ast.literal_eval,
# which can only handle native types. Converting back to list here, should also
# get us back to native types for the elements (at least successfully tested
# with bool).
self.samples = _samples.tolist()

super().prepare_samples(howmany)

def plot(self):
Expand Down