diff --git a/autosklearn/automl.py b/autosklearn/automl.py index 8c0f4717dc..d8c624d06a 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -62,7 +62,7 @@ convert_if_sparse, ) from autosklearn.data.xy_data_manager import XYDataManager -from autosklearn.ensemble_builder import EnsembleBuilderManager +from autosklearn.ensemble_building import EnsembleBuilderManager from autosklearn.ensembles.singlebest_ensemble import SingleBest from autosklearn.evaluation import ExecuteTaFuncWithQueue, get_cost_of_crash from autosklearn.evaluation.abstract_evaluator import _fit_and_suppress_warnings @@ -303,6 +303,8 @@ def __init__( self._label_num = None self._parser = None self._can_predict = False + self._read_at_most = None + self._max_ensemble_build_iterations = None self.models_: Optional[dict] = None self.cv_models_: Optional[dict] = None self.ensemble_ = None @@ -808,9 +810,9 @@ def fit( max_models_on_disc=self._max_models_on_disc, seed=self._seed, precision=self.precision, - max_iterations=None, - read_at_most=np.inf, - ensemble_memory_limit=self._memory_limit, + max_iterations=self._max_ensemble_build_iterations, + read_at_most=self._read_at_most, + memory_limit=self._memory_limit, random_state=self._seed, logger_port=self._logger_port, pynisher_context=self._multiprocessing_context, @@ -923,7 +925,7 @@ def fit( ) result = proc_ensemble.futures.pop().result() if result: - ensemble_history, _, _, _, _ = result + ensemble_history, _ = result self.ensemble_performance_history.extend(ensemble_history) self._logger.info("Ensemble script finished, continue shutdown.") @@ -1524,8 +1526,8 @@ def fit_ensemble( seed=self._seed, precision=precision if precision else self.precision, max_iterations=1, - read_at_most=np.inf, - ensemble_memory_limit=self._memory_limit, + read_at_most=None, + memory_limit=self._memory_limit, random_state=self._seed, logger_port=self._logger_port, pynisher_context=self._multiprocessing_context, @@ -1538,7 +1540,7 @@ def fit_ensemble( "Error building the ensemble - please check the log file and command " "line output for error messages." ) - self.ensemble_performance_history, _, _, _, _ = result + self.ensemble_performance_history, _ = result self._ensemble_size = ensemble_size self._load_models() @@ -2096,6 +2098,15 @@ def has_key(rv, key): return ensemble_dict + def has_ensemble(self) -> bool: + """ + Returns + ------- + bool + Whether this AutoML instance has an ensemble + """ + return self.ensemble_ is not None + def _create_search_space( self, tmp_dir: str, @@ -2154,7 +2165,7 @@ def fit( y: SUPPORTED_TARGET_TYPES | spmatrix, X_test: Optional[SUPPORTED_FEAT_TYPES] = None, y_test: Optional[SUPPORTED_TARGET_TYPES | spmatrix] = None, - feat_type: Optional[list[bool]] = None, + feat_type: Optional[list[str]] = None, dataset_name: Optional[str] = None, only_return_configuration_space: bool = False, load_models: bool = True, @@ -2244,7 +2255,7 @@ def fit( y: SUPPORTED_TARGET_TYPES | spmatrix, X_test: Optional[SUPPORTED_FEAT_TYPES] = None, y_test: Optional[SUPPORTED_TARGET_TYPES | spmatrix] = None, - feat_type: Optional[list[bool]] = None, + feat_type: Optional[list[str]] = None, dataset_name: Optional[str] = None, only_return_configuration_space: bool = False, load_models: bool = True, diff --git a/autosklearn/ensemble_builder.py b/autosklearn/ensemble_builder.py deleted file mode 100644 index 3dec9828ef..0000000000 --- a/autosklearn/ensemble_builder.py +++ /dev/null @@ -1,1637 +0,0 @@ -# -*- encoding: utf-8 -*- -from typing import List, Optional, Tuple, Union - -import glob -import gzip -import logging.handlers -import math -import multiprocessing -import numbers -import os -import pickle -import re -import shutil -import time -import traceback -import zlib - -import dask.distributed -import numpy as np -import pandas as pd -import pynisher -from smac.callbacks import IncorporateRunResultCallback -from smac.optimizer.smbo import SMBO -from smac.runhistory.runhistory import RunInfo, RunValue -from smac.tae.base import StatusType - -from autosklearn.automl_common.common.ensemble_building.abstract_ensemble import ( # noqa: E501 - AbstractEnsemble, -) -from autosklearn.automl_common.common.utils.backend import Backend -from autosklearn.constants import BINARY_CLASSIFICATION -from autosklearn.ensembles.ensemble_selection import EnsembleSelection -from autosklearn.metrics import Scorer, calculate_losses, calculate_scores -from autosklearn.util.logging_ import get_named_client_logger -from autosklearn.util.parallel import preload_modules - -Y_ENSEMBLE = 0 -Y_VALID = 1 -Y_TEST = 2 - -MODEL_FN_RE = r"_([0-9]*)_([0-9]*)_([0-9]{1,3}\.[0-9]*)\.npy" - - -class EnsembleBuilderManager(IncorporateRunResultCallback): - def __init__( - self, - start_time: float, - time_left_for_ensembles: float, - backend: Backend, - dataset_name: str, - task: int, - metric: Scorer, - ensemble_size: int, - ensemble_nbest: int, - max_models_on_disc: Union[float, int], - seed: int, - precision: int, - max_iterations: Optional[int], - read_at_most: int, - ensemble_memory_limit: Optional[int], - random_state: Union[int, np.random.RandomState], - logger_port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT, - pynisher_context: str = "fork", - ): - """SMAC callback to handle ensemble building - - Parameters - ---------- - start_time: int - the time when this job was started, to account for any latency in job - allocation. - - time_left_for_ensemble: int - How much time is left for the task. Job should finish within this - allocated time - - backend: util.backend.Backend - backend to write and read files - - dataset_name: str - name of dataset - - task_type: int - type of ML task - - metric: str - name of metric to compute the loss of the given predictions - - ensemble_size: int - maximal size of ensemble - - ensemble_nbest: int/float - if int: consider only the n best prediction - if float: consider only this fraction of the best models - Both wrt to validation predictions - If performance_range_threshold > 0, might return less models - - max_models_on_disc: int - Defines the maximum number of models that are kept in the disc. - - If int, it must be greater or equal than 1, and dictates the max - number of models to keep. - - If float, it will be interpreted as the max megabytes allowed of - disc space. That is, if the number of ensemble candidates require more - disc space than this float value, the worst models will be deleted to - keep within this budget. Models and predictions of the worst-performing - models will be deleted then. - - If None, the feature is disabled. It defines an upper bound on the - models that can be used in the ensemble. - - seed: int - random seed - - max_iterations: int - maximal number of iterations to run this script - (default None --> deactivated) - - precision: [16,32,64,128] - precision of floats to read the predictions - - memory_limit: Optional[int] - memory limit in mb. If ``None``, no memory limit is enforced. - - read_at_most: int - read at most n new prediction files in each iteration - - logger_port: int - port that receives logging records - - pynisher_context: str - The multiprocessing context for pynisher. One of spawn/fork/forkserver. - - """ - self.start_time = start_time - self.time_left_for_ensembles = time_left_for_ensembles - self.backend = backend - self.dataset_name = dataset_name - self.task = task - self.metric = metric - self.ensemble_size = ensemble_size - self.ensemble_nbest = ensemble_nbest - self.max_models_on_disc = max_models_on_disc - self.seed = seed - self.precision = precision - self.max_iterations = max_iterations - self.read_at_most = read_at_most - self.ensemble_memory_limit = ensemble_memory_limit - self.random_state = random_state - self.logger_port = logger_port - self.pynisher_context = pynisher_context - - # Store something similar to SMAC's runhistory - self.history = [] - - # We only submit new ensembles when there is not an active ensemble job - self.futures = [] - - # The last criteria is the number of iterations - self.iteration = 0 - - # Keep track of when we started to know when we need to finish! - self.start_time = time.time() - - def __call__( - self, - smbo: "SMBO", - run_info: RunInfo, - result: RunValue, - time_left: float, - ): - """ - Returns - ------- - List[Tuple[int, float, float, float]]: - A list with the performance history of this ensemble, of the form - [(pandas_timestamp, train_performance, val_performance, test_performance)] - """ - if result.status in (StatusType.STOP, StatusType.ABORT) or smbo._stop: - return - self.build_ensemble(smbo.tae_runner.client) - - def build_ensemble( - self, dask_client: dask.distributed.Client, unit_test: bool = False - ) -> None: - - # The second criteria is elapsed time - elapsed_time = time.time() - self.start_time - - logger = get_named_client_logger( - name="EnsembleBuilder", - port=self.logger_port, - ) - - # First test for termination conditions - if self.time_left_for_ensembles < elapsed_time: - logger.info( - "Terminate ensemble building as not time is left (run for {}s)".format( - elapsed_time - ), - ) - return - if self.max_iterations is not None and self.max_iterations <= self.iteration: - logger.info( - "Terminate ensemble building because of max iterations:" - f" {self.max_iterations} of {self.iteration}" - ) - return - - if len(self.futures) != 0: - if self.futures[0].done(): - result = self.futures.pop().result() - if result: - ensemble_history, self.ensemble_nbest, _, _, _ = result - logger.debug( - "iteration={} @ elapsed_time={} has history={}".format( - self.iteration, - elapsed_time, - ensemble_history, - ) - ) - self.history.extend(ensemble_history) - - # Only submit new jobs if the previous ensemble job finished - if len(self.futures) == 0: - - # Add the result of the run - # On the next while iteration, no references to - # ensemble builder object, so it should be garbage collected to - # save memory while waiting for resources - # Also, notice how ensemble nbest is returned, so we don't waste - # iterations testing if the deterministic predictions size can - # be fitted in memory - try: - # Submit a Dask job from this job, to properly - # see it in the dask diagnostic dashboard - # Notice that the forked ensemble_builder_process will - # wait for the below function to be done - self.futures.append( - dask_client.submit( - fit_and_return_ensemble, - backend=self.backend, - dataset_name=self.dataset_name, - task_type=self.task, - metric=self.metric, - ensemble_size=self.ensemble_size, - ensemble_nbest=self.ensemble_nbest, - max_models_on_disc=self.max_models_on_disc, - seed=self.seed, - precision=self.precision, - memory_limit=self.ensemble_memory_limit, - read_at_most=self.read_at_most, - random_state=self.random_state, - end_at=self.start_time + self.time_left_for_ensembles, - iteration=self.iteration, - return_predictions=False, - priority=100, - pynisher_context=self.pynisher_context, - logger_port=self.logger_port, - unit_test=unit_test, - ) - ) - - logger.info( - "{}/{} Started Ensemble builder job at {} for iteration {}.".format( - # Log the client to make sure we - # remain connected to the scheduler - self.futures[0], - dask_client, - time.strftime("%Y.%m.%d-%H.%M.%S"), - self.iteration, - ), - ) - self.iteration += 1 - except Exception as e: - exception_traceback = traceback.format_exc() - error_message = repr(e) - logger.critical(exception_traceback) - logger.critical(error_message) - - -def fit_and_return_ensemble( - backend: Backend, - dataset_name: str, - task_type: str, - metric: Scorer, - ensemble_size: int, - ensemble_nbest: int, - max_models_on_disc: Union[float, int], - seed: int, - precision: int, - read_at_most: int, - end_at: float, - iteration: int, - return_predictions: bool, - pynisher_context: str, - logger_port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT, - unit_test: bool = False, - memory_limit: Optional[int] = None, - random_state: Optional[Union[int, np.random.RandomState]] = None, -) -> Tuple[ - List[Tuple[int, float, float, float]], - int, - Optional[np.ndarray], - Optional[np.ndarray], - Optional[np.ndarray], -]: - """ - - A short function to fit and create an ensemble. It is just a wrapper to easily send - a request to dask to create an ensemble and clean the memory when finished - - Parameters - ---------- - backend: util.backend.Backend - backend to write and read files - - dataset_name: str - name of dataset - - metric: str - name of metric to compute the loss of the given predictions - - task_type: int - type of ML task - - ensemble_size: int - maximal size of ensemble (passed to autosklearn.ensemble.ensemble_selection) - - ensemble_nbest: int/float - if int: consider only the n best prediction - if float: consider only this fraction of the best models - Both wrt to validation predictions - If performance_range_threshold > 0, might return less models - - max_models_on_disc: int - Defines the maximum number of models that are kept in the disc. - - If int, it must be greater or equal than 1, and dictates the max number of - models to keep. - - If float, it will be interpreted as the max megabytes allowed of disc space. - That is, if the number of ensemble candidates require more disc space than - this float value, the worst models will be deleted to keep within this - budget. Models and predictions of the worst-performing models will be - deleted then. - - If None, the feature is disabled. - It defines an upper bound on the models that can be used in the ensemble. - - seed: int - random seed - - precision: [16,32,64,128] - precision of floats to read the predictions - - read_at_most: int - read at most n new prediction files in each iteration - - end_at: float - At what time the job must finish. Needs to be the endtime and not the - time left because we do not know when dask schedules the job. - - iteration: int - The current iteration - - pynisher_context: str - Context to use for multiprocessing, can be either fork, spawn or forkserver. - - logger_port: int = DEFAULT_TCP_LOGGING_PORT - The port where the logging server is listening to. - - unit_test: bool = False - Turn on unit testing mode. This currently makes fit_ensemble raise a - MemoryError. Having this is very bad coding style, but I did not find a way - to make unittest.mock work through the pynisher with all spawn contexts. - If you know a better solution, please let us know by opening an issue. - - memory_limit: Optional[int] = None - memory limit in mb. If ``None``, no memory limit is enforced. - - random_state: Optional[int | RandomState] = None - A random state used for the ensemble selection process. - - Returns - ------- - List[Tuple[int, float, float, float]] - A list with the performance history of this ensemble, of the form - [(pandas_timestamp, train_performance, val_performance, test_performance)] - """ - result = EnsembleBuilder( - backend=backend, - dataset_name=dataset_name, - task_type=task_type, - metric=metric, - ensemble_size=ensemble_size, - ensemble_nbest=ensemble_nbest, - max_models_on_disc=max_models_on_disc, - seed=seed, - precision=precision, - memory_limit=memory_limit, - read_at_most=read_at_most, - random_state=random_state, - logger_port=logger_port, - unit_test=unit_test, - ).run( - end_at=end_at, - iteration=iteration, - return_predictions=return_predictions, - pynisher_context=pynisher_context, - ) - return result - - -class EnsembleBuilder(object): - def __init__( - self, - backend: Backend, - dataset_name: str, - task_type: int, - metric: Scorer, - ensemble_size: int = 10, - ensemble_nbest: Union[int, float] = 100, - max_models_on_disc: int = 100, - performance_range_threshold: float = 0, - seed: int = 1, - precision: int = 32, - memory_limit: Optional[int] = 1024, - read_at_most: int = 5, - logger_port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT, - random_state: Optional[Union[int, np.random.RandomState]] = None, - unit_test: bool = False, - ): - """ - Constructor - - Parameters - ---------- - backend: util.backend.Backend - backend to write and read files - dataset_name: str - name of dataset - task_type: int - type of ML task - metric: str - name of metric to compute the loss of the given predictions - ensemble_size: int = 10 - maximal size of ensemble (passed to autosklearn.ensemble.ensemble_selection) - ensemble_nbest: int | float = 100 - if int: consider only the n best prediction - if float: consider only this fraction of the best models - Both with respect to the validation predictions - If performance_range_threshold > 0, might return less models - max_models_on_disc: int = 100 - Defines the maximum number of models that are kept in the disc. - If int, it must be greater or equal than 1, and dictates the max number of - models to keep. - If float, it will be interpreted as the max megabytes allowed of disc space. - That is, if the number of ensemble candidates require more disc space than - this float value, the worst models are deleted to keep within this budget. - Models and predictions of the worst-performing models will be deleted then. - If None, the feature is disabled. - It defines an upper bound on the models that can be used in the ensemble. - performance_range_threshold: float = 0 - Keep only models that are better than: - dummy + (best - dummy)*performance_range_threshold - E.g dummy=2, best=4, thresh=0.5 --> only consider models with loss > 3 - Will at most return the minimum between ensemble_nbest models, - and max_models_on_disc. Might return less - seed: int = 1 - random seed that is used as part of the filename - precision: int in [16,32,64,128] = 32 - precision of floats to read the predictions - memory_limit: Optional[int] = 1024 - memory limit in mb. If ``None``, no memory limit is enforced. - read_at_most: int = 5 - read at most n new prediction files in each iteration - logger_port: int = DEFAULT_TCP_LOGGING_PORT - port that receives logging records - random_state: Optional[int | RandomState] = None - An int or RandomState object used for generating the ensemble. - unit_test: bool = False - Turn on unit testing mode. This currently makes fit_ensemble raise - a MemoryError. Having this is very bad coding style, but I did not find a - way to make unittest.mock work through the pynisher with all spawn contexts. - If you know a better solution, please let us know by opening an issue. - """ - - super(EnsembleBuilder, self).__init__() - - self.backend = backend # communication with filesystem - self.dataset_name = dataset_name - self.task_type = task_type - self.metric = metric - self.ensemble_size = ensemble_size - self.performance_range_threshold = performance_range_threshold - - if isinstance(ensemble_nbest, numbers.Integral) and ensemble_nbest < 1: - raise ValueError( - "Integer ensemble_nbest has to be larger 1: %s" % ensemble_nbest - ) - elif not isinstance(ensemble_nbest, numbers.Integral): - if ensemble_nbest < 0 or ensemble_nbest > 1: - raise ValueError( - "Float ensemble_nbest best has to be >= 0 and <= 1: %s" - % ensemble_nbest - ) - - self.ensemble_nbest = ensemble_nbest - - # max_models_on_disc can be a float, in such case we need to - # remember the user specified Megabytes and translate this to - # max number of ensemble models. max_resident_models keeps the - # maximum number of models in disc - if max_models_on_disc is not None and max_models_on_disc < 0: - raise ValueError("max_models_on_disc has to be a positive number or None") - self.max_models_on_disc = max_models_on_disc - self.max_resident_models = None - - self.seed = seed - self.precision = precision - self.memory_limit = memory_limit - self.read_at_most = read_at_most - self.random_state = random_state - self.unit_test = unit_test - - # Setup the logger - self.logger_port = logger_port - self.logger = get_named_client_logger( - name="EnsembleBuilder", - port=self.logger_port, - ) - - if ensemble_nbest == 1: - self.logger.debug( - "Behaviour depends on int/float: %s, %s (ensemble_nbest, type)" - % (ensemble_nbest, type(ensemble_nbest)) - ) - - self.start_time = 0 - self.model_fn_re = re.compile(MODEL_FN_RE) - - self.last_hash = None # hash of ensemble training data - self.y_true_ensemble = None - self.SAVE2DISC = True - - # already read prediction files - # {"file name": { - # "ens_loss": float - # "mtime_ens": str, - # "mtime_valid": str, - # "mtime_test": str, - # "seed": int, - # "num_run": int, - # }} - self.read_losses = {} - # {"file_name": { - # Y_ENSEMBLE: np.ndarray - # Y_VALID: np.ndarray - # Y_TEST: np.ndarray - # } - # } - self.read_preds = {} - - # Depending on the dataset dimensions, - # regenerating every iteration, the predictions - # losses for self.read_preds - # is too computationally expensive - # As the ensemble builder is stateless - # (every time the ensemble builder gets resources - # from dask, it builds this object from scratch) - # we save the state of this dictionary to memory - # and read it if available - self.ensemble_memory_file = os.path.join( - self.backend.internals_directory, "ensemble_read_preds.pkl" - ) - if os.path.exists(self.ensemble_memory_file): - try: - with (open(self.ensemble_memory_file, "rb")) as memory: - self.read_preds, self.last_hash = pickle.load(memory) - except Exception as e: - self.logger.warning( - "Could not load the previous iterations of ensemble_builder" - " predictions. This might impact the quality of the run." - f" Exception={e} {traceback.format_exc()}" - ) - self.ensemble_loss_file = os.path.join( - self.backend.internals_directory, "ensemble_read_losses.pkl" - ) - if os.path.exists(self.ensemble_loss_file): - try: - with (open(self.ensemble_loss_file, "rb")) as memory: - self.read_losses = pickle.load(memory) - except Exception as e: - self.logger.warning( - "Could not load the previous iterations of ensemble_builder losses." - "This might impact the quality of the run. Exception={} {}".format( - e, - traceback.format_exc(), - ) - ) - - # hidden feature which can be activated via an environment variable. - # This keeps all models and predictions which have ever been a candidate. - # This is necessary to post-hoc compute the whole ensemble building trajectory. - self._has_been_candidate = set() - - self.validation_performance_ = np.inf - - # Track the ensemble performance - datamanager = self.backend.load_datamanager() - self.y_valid = datamanager.data.get("Y_valid") - self.y_test = datamanager.data.get("Y_test") - del datamanager - self.ensemble_history = [] - - def run( - self, - iteration: int, - pynisher_context: str, - time_left: Optional[float] = None, - end_at: Optional[float] = None, - time_buffer=5, - return_predictions: bool = False, - ): - - if time_left is None and end_at is None: - raise ValueError("Must provide either time_left or end_at.") - elif time_left is not None and end_at is not None: - raise ValueError("Cannot provide both time_left and end_at.") - - self.logger = get_named_client_logger( - name="EnsembleBuilder", - port=self.logger_port, - ) - - process_start_time = time.time() - while True: - - if time_left is not None: - time_elapsed = time.time() - process_start_time - time_left -= time_elapsed - else: - current_time = time.time() - if current_time > end_at: - break - else: - time_left = end_at - current_time - - wall_time_in_s = int(time_left - time_buffer) - if wall_time_in_s < 1: - break - context = multiprocessing.get_context(pynisher_context) - preload_modules(context) - - safe_ensemble_script = pynisher.enforce_limits( - wall_time_in_s=wall_time_in_s, - mem_in_mb=self.memory_limit, - logger=self.logger, - context=context, - )(self.main) - safe_ensemble_script(time_left, iteration, return_predictions) - if safe_ensemble_script.exit_status is pynisher.MemorylimitException: - # if ensemble script died because of memory error, - # reduce nbest to reduce memory consumption and try it again - - # ATTENTION: main will start from scratch; - # all data structures are empty again - try: - os.remove(self.ensemble_memory_file) - except: # noqa E722 - pass - - if ( - isinstance(self.ensemble_nbest, numbers.Integral) - and self.ensemble_nbest <= 1 - ): - if self.read_at_most == 1: - self.logger.error( - "Memory Exception -- Unable to further reduce the number" - " of ensemble members and can no further limit the number" - " of ensemble members loaded per iteration, please restart" - " Auto-sklearn with a higher value for the argument" - f" `memory_limit` (current limit is {self.memory_limit}MB)." - " The ensemble builder will keep running to delete files" - " from disk in case this was enabled.", - ) - self.ensemble_nbest = 0 - else: - self.read_at_most = 1 - self.logger.warning( - "Memory Exception -- Unable to further reduce the number of" - " ensemble members. Now reducing the number of predictions" - " per call to read at most to 1." - ) - else: - if isinstance(self.ensemble_nbest, numbers.Integral): - self.ensemble_nbest = max(1, int(self.ensemble_nbest / 2)) - else: - self.ensemble_nbest = self.ensemble_nbest / 2 - self.logger.warning( - "Memory Exception -- restart with " - "less ensemble_nbest: %d" % self.ensemble_nbest - ) - return [], self.ensemble_nbest, None, None, None - else: - return safe_ensemble_script.result - - return [], self.ensemble_nbest, None, None, None - - def main(self, time_left, iteration, return_predictions): - - # Pynisher jobs inside dask 'forget' - # the logger configuration. So we have to set it up - # accordingly - self.logger = get_named_client_logger( - name="EnsembleBuilder", - port=self.logger_port, - ) - - self.start_time = time.time() - train_pred, valid_pred, test_pred = None, None, None - - used_time = time.time() - self.start_time - self.logger.debug( - "Starting iteration %d, time left: %f", - iteration, - time_left - used_time, - ) - - # populates self.read_preds and self.read_losses - if not self.compute_loss_per_model(): - if return_predictions: - return ( - self.ensemble_history, - self.ensemble_nbest, - train_pred, - valid_pred, - test_pred, - ) - else: - return self.ensemble_history, self.ensemble_nbest, None, None, None - - # Only the models with the n_best predictions are candidates - # to be in the ensemble - candidate_models = self.get_n_best_preds() - if not candidate_models: # no candidates yet - if return_predictions: - return ( - self.ensemble_history, - self.ensemble_nbest, - train_pred, - valid_pred, - test_pred, - ) - else: - return self.ensemble_history, self.ensemble_nbest, None, None, None - - # populates predictions in self.read_preds - # reduces selected models if file reading failed - n_sel_valid, n_sel_test = self.get_valid_test_preds( - selected_keys=candidate_models - ) - - # If valid/test predictions loaded, then reduce candidate models to this set - if ( - len(n_sel_test) != 0 - and len(n_sel_valid) != 0 - and len(set(n_sel_valid).intersection(set(n_sel_test))) == 0 - ): - # Both n_sel_* have entries, but there is no overlap, this is critical - self.logger.error( - "n_sel_valid and n_sel_test are not empty, but do not overlap" - ) - if return_predictions: - return ( - self.ensemble_history, - self.ensemble_nbest, - train_pred, - valid_pred, - test_pred, - ) - else: - return self.ensemble_history, self.ensemble_nbest, None, None, None - - # If any of n_sel_* is not empty and overlaps with candidate_models, - # then ensure candidate_models AND n_sel_test are sorted the same - candidate_models_set = set(candidate_models) - if candidate_models_set.intersection(n_sel_valid).intersection(n_sel_test): - candidate_models = sorted( - list( - candidate_models_set.intersection(n_sel_valid).intersection( - n_sel_test - ) - ) - ) - n_sel_test = candidate_models - n_sel_valid = candidate_models - elif candidate_models_set.intersection(n_sel_valid): - candidate_models = sorted( - list(candidate_models_set.intersection(n_sel_valid)) - ) - n_sel_valid = candidate_models - elif candidate_models_set.intersection(n_sel_test): - candidate_models = sorted( - list(candidate_models_set.intersection(n_sel_test)) - ) - n_sel_test = candidate_models - else: - # This has to be the case - n_sel_test = [] - n_sel_valid = [] - - if os.environ.get("ENSEMBLE_KEEP_ALL_CANDIDATES"): - for candidate in candidate_models: - self._has_been_candidate.add(candidate) - - # train ensemble - ensemble = self.fit_ensemble(selected_keys=candidate_models) - - # Save the ensemble for later use in the main auto-sklearn module! - if ensemble is not None and self.SAVE2DISC: - self.backend.save_ensemble(ensemble, iteration, self.seed) - - # Delete files of non-candidate models - can only be done after fitting the - # ensemble and saving it to disc so we do not accidentally delete models in - # the previous ensemble - if self.max_resident_models is not None: - self._delete_excess_models(selected_keys=candidate_models) - - # Save the read losses status for the next iteration - with open(self.ensemble_loss_file, "wb") as memory: - pickle.dump(self.read_losses, memory) - - if ensemble is not None: - train_pred = self.predict( - set_="train", - ensemble=ensemble, - selected_keys=candidate_models, - n_preds=len(candidate_models), - index_run=iteration, - ) - # We can't use candidate_models here, as n_sel_* might be empty - valid_pred = self.predict( - set_="valid", - ensemble=ensemble, - selected_keys=n_sel_valid, - n_preds=len(candidate_models), - index_run=iteration, - ) - # TODO if predictions fails, build the model again during the - # next iteration! - test_pred = self.predict( - set_="test", - ensemble=ensemble, - selected_keys=n_sel_test, - n_preds=len(candidate_models), - index_run=iteration, - ) - - # Add a score to run history to see ensemble progress - self._add_ensemble_trajectory(train_pred, valid_pred, test_pred) - - # The loaded predictions and hash can only be saved after the ensemble has been - # built, because the hash is computed during the construction of the ensemble - with open(self.ensemble_memory_file, "wb") as memory: - pickle.dump((self.read_preds, self.last_hash), memory) - - if return_predictions: - return ( - self.ensemble_history, - self.ensemble_nbest, - train_pred, - valid_pred, - test_pred, - ) - else: - return self.ensemble_history, self.ensemble_nbest, None, None, None - - def get_disk_consumption(self, pred_path): - """ - gets the cost of a model being on disc - """ - - match = self.model_fn_re.search(pred_path) - if not match: - raise ValueError("Invalid path format %s" % pred_path) - _seed = int(match.group(1)) - _num_run = int(match.group(2)) - _budget = float(match.group(3)) - - stored_files_for_run = os.listdir( - self.backend.get_numrun_directory(_seed, _num_run, _budget) - ) - stored_files_for_run = [ - os.path.join( - self.backend.get_numrun_directory(_seed, _num_run, _budget), file_name - ) - for file_name in stored_files_for_run - ] - this_model_cost = sum([os.path.getsize(path) for path in stored_files_for_run]) - - # get the megabytes - return round(this_model_cost / math.pow(1024, 2), 2) - - def compute_loss_per_model(self): - """ - Compute the loss of the predictions on ensemble building data set; - populates self.read_preds and self.read_losses - """ - - self.logger.debug("Read ensemble data set predictions") - - if self.y_true_ensemble is None: - try: - self.y_true_ensemble = self.backend.load_targets_ensemble() - except FileNotFoundError: - self.logger.debug( - "Could not find true targets on ensemble data set: %s", - traceback.format_exc(), - ) - return False - - pred_path = os.path.join( - glob.escape(self.backend.get_runs_directory()), - "%d_*_*" % self.seed, - "predictions_ensemble_%s_*_*.npy*" % self.seed, - ) - y_ens_files = glob.glob(pred_path) - y_ens_files = [ - y_ens_file - for y_ens_file in y_ens_files - if y_ens_file.endswith(".npy") or y_ens_file.endswith(".npy.gz") - ] - self.y_ens_files = y_ens_files - # no validation predictions so far -- no files - if len(self.y_ens_files) == 0: - self.logger.debug( - "Found no prediction files on ensemble data set:" " %s" % pred_path - ) - return False - - # First sort files chronologically - to_read = [] - for y_ens_fn in self.y_ens_files: - match = self.model_fn_re.search(y_ens_fn) - _seed = int(match.group(1)) - _num_run = int(match.group(2)) - _budget = float(match.group(3)) - mtime = os.path.getmtime(y_ens_fn) - - to_read.append([y_ens_fn, match, _seed, _num_run, _budget, mtime]) - - n_read_files = 0 - # Now read file wrt to num_run - for y_ens_fn, match, _seed, _num_run, _budget, mtime in sorted( - to_read, key=lambda x: x[5] - ): - if self.read_at_most and n_read_files >= self.read_at_most: - # limit the number of files that will be read - # to limit memory consumption - break - - if not y_ens_fn.endswith(".npy") and not y_ens_fn.endswith(".npy.gz"): - self.logger.info( - "Error loading file (not .npy or .npy.gz): %s", y_ens_fn - ) - continue - - if not self.read_losses.get(y_ens_fn): - self.read_losses[y_ens_fn] = { - "ens_loss": np.inf, - "mtime_ens": 0, - "mtime_valid": 0, - "mtime_test": 0, - "seed": _seed, - "num_run": _num_run, - "budget": _budget, - "disc_space_cost_mb": None, - # Lazy keys so far: - # 0 - not loaded - # 1 - loaded and in memory - # 2 - loaded but dropped again - # 3 - deleted from disk due to space constraints - "loaded": 0, - } - if not self.read_preds.get(y_ens_fn): - self.read_preds[y_ens_fn] = { - Y_ENSEMBLE: None, - Y_VALID: None, - Y_TEST: None, - } - - if self.read_losses[y_ens_fn]["mtime_ens"] == mtime: - # same time stamp; nothing changed; - continue - - # actually read the predictions and compute their respective loss - try: - y_ensemble = self._read_np_fn(y_ens_fn) - loss = calculate_losses( - solution=self.y_true_ensemble, - prediction=y_ensemble, - task_type=self.task_type, - metrics=[self.metric], - scoring_functions=None, - )[self.metric.name] - - if np.isfinite(self.read_losses[y_ens_fn]["ens_loss"]): - self.logger.debug( - "Changing ensemble loss for file %s from %f to %f " - "because file modification time changed? %f - %f", - y_ens_fn, - self.read_losses[y_ens_fn]["ens_loss"], - loss, - self.read_losses[y_ens_fn]["mtime_ens"], - os.path.getmtime(y_ens_fn), - ) - - self.read_losses[y_ens_fn]["ens_loss"] = loss - - # It is not needed to create the object here - # To save memory, we just compute the loss. - self.read_losses[y_ens_fn]["mtime_ens"] = os.path.getmtime(y_ens_fn) - self.read_losses[y_ens_fn]["loaded"] = 2 - self.read_losses[y_ens_fn][ - "disc_space_cost_mb" - ] = self.get_disk_consumption(y_ens_fn) - - n_read_files += 1 - - except Exception: - self.logger.warning( - "Error loading %s: %s", - y_ens_fn, - traceback.format_exc(), - ) - self.read_losses[y_ens_fn]["ens_loss"] = np.inf - - self.logger.debug( - "Done reading %d new prediction files. Loaded %d predictions in " "total.", - n_read_files, - np.sum([pred["loaded"] > 0 for pred in self.read_losses.values()]), - ) - return True - - def get_n_best_preds(self): - """ - get best n predictions (i.e., keys of self.read_losses) - according to the loss on the "ensemble set" - n: self.ensemble_nbest - - Side effects: - ->Define the n-best models to use in ensemble - ->Only the best models are loaded - ->Any model that is not best is candidate to deletion - if max models in disc is exceeded. - """ - - sorted_keys = self._get_list_of_sorted_preds() - - # number of models available - num_keys = len(sorted_keys) - # remove all that are at most as good as random - # note: dummy model must have run_id=1 (there is no run_id=0) - dummy_losses = list(filter(lambda x: x[2] == 1, sorted_keys)) - # number of dummy models - num_dummy = len(dummy_losses) - dummy_loss = dummy_losses[0] - self.logger.debug("Use %f as dummy loss" % dummy_loss[1]) - - # sorted_keys looks like: (k, v["ens_loss"], v["num_run"]) - # On position 1 we have the loss of a minimization problem. - # keep only the predictions with a loss smaller than the dummy - # prediction - sorted_keys = filter(lambda x: x[1] < dummy_loss[1], sorted_keys) - - # remove Dummy Classifier - sorted_keys = list(filter(lambda x: x[2] > 1, sorted_keys)) - if not sorted_keys: - # no model left; try to use dummy loss (num_run==0) - # log warning when there are other models but not better than dummy model - if num_keys > num_dummy: - self.logger.warning( - "No models better than random - using Dummy loss!" - "Number of models besides current dummy model: %d. " - "Number of dummy models: %d", - num_keys - 1, - num_dummy, - ) - sorted_keys = [ - (k, v["ens_loss"], v["num_run"]) - for k, v in self.read_losses.items() - if v["seed"] == self.seed and v["num_run"] == 1 - ] - # reload predictions if losses changed over time and a model is - # considered to be in the top models again! - if not isinstance(self.ensemble_nbest, numbers.Integral): - # Transform to number of models to keep. Keep at least one - keep_nbest = max( - 1, min(len(sorted_keys), int(len(sorted_keys) * self.ensemble_nbest)) - ) - self.logger.debug( - "Library pruning: using only top %f percent of the models for ensemble " - "(%d out of %d)", - self.ensemble_nbest * 100, - keep_nbest, - len(sorted_keys), - ) - else: - # Keep only at most ensemble_nbest - keep_nbest = min(self.ensemble_nbest, len(sorted_keys)) - self.logger.debug( - "Library Pruning: using for ensemble only " - " %d (out of %d) models" % (keep_nbest, len(sorted_keys)) - ) - - # If max_models_on_disc is None, do nothing - # One can only read at most max_models_on_disc models - if self.max_models_on_disc is not None: - if not isinstance(self.max_models_on_disc, numbers.Integral): - consumption = [ - [ - v["ens_loss"], - v["disc_space_cost_mb"], - ] - for v in self.read_losses.values() - if v["disc_space_cost_mb"] is not None - ] - max_consumption = max(c[1] for c in consumption) - - # We are pessimistic with the consumption limit indicated by - # max_models_on_disc by 1 model. Such model is assumed to spend - # max_consumption megabytes - if ( - sum(c[1] for c in consumption) + max_consumption - ) > self.max_models_on_disc: - - # just leave the best -- smaller is better! - # This list is in descending order, to preserve the best models - sorted_cum_consumption = ( - np.cumsum([c[1] for c in list(sorted(consumption))]) - + max_consumption - ) - max_models = np.argmax( - sorted_cum_consumption > self.max_models_on_disc - ) - - # Make sure that at least 1 model survives - self.max_resident_models = max(1, max_models) - self.logger.warning( - "Limiting num of models via float max_models_on_disc={}" - " as accumulated={} worst={} num_models={}".format( - self.max_models_on_disc, - (sum(c[1] for c in consumption) + max_consumption), - max_consumption, - self.max_resident_models, - ) - ) - else: - self.max_resident_models = None - else: - self.max_resident_models = self.max_models_on_disc - - if ( - self.max_resident_models is not None - and keep_nbest > self.max_resident_models - ): - self.logger.debug( - "Restricting the number of models to %d instead of %d due to argument " - "max_models_on_disc", - self.max_resident_models, - keep_nbest, - ) - keep_nbest = self.max_resident_models - - # consider performance_range_threshold - if self.performance_range_threshold > 0: - best_loss = sorted_keys[0][1] - worst_loss = dummy_loss[1] - worst_loss -= (worst_loss - best_loss) * self.performance_range_threshold - if sorted_keys[keep_nbest - 1][1] > worst_loss: - # We can further reduce number of models - # since worst model is worse than thresh - for i in range(0, keep_nbest): - # Look at most at keep_nbest models, - # but always keep at least one model - current_loss = sorted_keys[i][1] - if current_loss >= worst_loss: - self.logger.debug( - "Dynamic Performance range: " - "Further reduce from %d to %d models", - keep_nbest, - max(1, i), - ) - keep_nbest = max(1, i) - break - ensemble_n_best = keep_nbest - - # reduce to keys - sorted_keys = list(map(lambda x: x[0], sorted_keys)) - - # remove loaded predictions for non-winning models - for k in sorted_keys[ensemble_n_best:]: - if k in self.read_preds: - self.read_preds[k][Y_ENSEMBLE] = None - self.read_preds[k][Y_VALID] = None - self.read_preds[k][Y_TEST] = None - if self.read_losses[k]["loaded"] == 1: - self.logger.debug( - "Dropping model %s (%d,%d) with loss %f.", - k, - self.read_losses[k]["seed"], - self.read_losses[k]["num_run"], - self.read_losses[k]["ens_loss"], - ) - self.read_losses[k]["loaded"] = 2 - - # Load the predictions for the winning - for k in sorted_keys[:ensemble_n_best]: - if ( - k not in self.read_preds or self.read_preds[k][Y_ENSEMBLE] is None - ) and self.read_losses[k]["loaded"] != 3: - self.read_preds[k][Y_ENSEMBLE] = self._read_np_fn(k) - # No need to load valid and test here because they are loaded - # only if the model ends up in the ensemble - self.read_losses[k]["loaded"] = 1 - - # return keys of self.read_losses with lowest losses - return sorted_keys[:ensemble_n_best] - - def get_valid_test_preds( - self, selected_keys: List[str] - ) -> Tuple[List[str], List[str]]: - """Get valid and test predictions from disc and store them in self.read_preds - Parameters - --------- - selected_keys: list - list of selected keys of self.read_preds - - Return - ------ - success_keys: - all keys in selected keys for which we could read the valid and - test predictions - """ - success_keys_valid = [] - success_keys_test = [] - - for k in selected_keys: - valid_fn = glob.glob( - os.path.join( - glob.escape(self.backend.get_runs_directory()), - "%d_%d_%s" - % ( - self.read_losses[k]["seed"], - self.read_losses[k]["num_run"], - self.read_losses[k]["budget"], - ), - "predictions_valid_%d_%d_%s.npy*" - % ( - self.read_losses[k]["seed"], - self.read_losses[k]["num_run"], - self.read_losses[k]["budget"], - ), - ) - ) - valid_fn = [ - vfn - for vfn in valid_fn - if vfn.endswith(".npy") or vfn.endswith(".npy.gz") - ] - test_fn = glob.glob( - os.path.join( - glob.escape(self.backend.get_runs_directory()), - "%d_%d_%s" - % ( - self.read_losses[k]["seed"], - self.read_losses[k]["num_run"], - self.read_losses[k]["budget"], - ), - "predictions_test_%d_%d_%s.npy*" - % ( - self.read_losses[k]["seed"], - self.read_losses[k]["num_run"], - self.read_losses[k]["budget"], - ), - ) - ) - test_fn = [ - tfn - for tfn in test_fn - if tfn.endswith(".npy") or tfn.endswith(".npy.gz") - ] - - if len(valid_fn) == 0: - # self.logger.debug("Not found validation prediction file " - # "(although ensemble predictions available): " - # "%s" % valid_fn) - pass - else: - valid_fn = valid_fn[0] - if ( - self.read_losses[k]["mtime_valid"] == os.path.getmtime(valid_fn) - and k in self.read_preds - and self.read_preds[k][Y_VALID] is not None - ): - success_keys_valid.append(k) - continue - try: - y_valid = self._read_np_fn(valid_fn) - self.read_preds[k][Y_VALID] = y_valid - success_keys_valid.append(k) - self.read_losses[k]["mtime_valid"] = os.path.getmtime(valid_fn) - except Exception: - self.logger.warning( - "Error loading %s: %s", valid_fn, traceback.format_exc() - ) - - if len(test_fn) == 0: - # self.logger.debug("Not found test prediction file (although " - # "ensemble predictions available):%s" % - # test_fn) - pass - else: - test_fn = test_fn[0] - if ( - self.read_losses[k]["mtime_test"] == os.path.getmtime(test_fn) - and k in self.read_preds - and self.read_preds[k][Y_TEST] is not None - ): - success_keys_test.append(k) - continue - try: - y_test = self._read_np_fn(test_fn) - self.read_preds[k][Y_TEST] = y_test - success_keys_test.append(k) - self.read_losses[k]["mtime_test"] = os.path.getmtime(test_fn) - except Exception: - self.logger.warning( - "Error loading %s: %s", test_fn, traceback.format_exc() - ) - - return success_keys_valid, success_keys_test - - def fit_ensemble(self, selected_keys: list): - """ - Parameters - --------- - selected_keys: list - list of selected keys of self.read_losses - - Returns - ------- - ensemble: EnsembleSelection - trained Ensemble - """ - if self.unit_test: - raise MemoryError() - - predictions_train = [self.read_preds[k][Y_ENSEMBLE] for k in selected_keys] - include_num_runs = [ - ( - self.read_losses[k]["seed"], - self.read_losses[k]["num_run"], - self.read_losses[k]["budget"], - ) - for k in selected_keys - ] - - # check hash if ensemble training data changed - current_hash = "".join( - [ - str(zlib.adler32(predictions_train[i].data.tobytes())) - for i in range(len(predictions_train)) - ] - ) - if self.last_hash == current_hash: - self.logger.debug( - "No new model predictions selected -- skip ensemble building " - "-- current performance: %f", - self.validation_performance_, - ) - - return None - self.last_hash = current_hash - - ensemble = EnsembleSelection( - ensemble_size=self.ensemble_size, - task_type=self.task_type, - metric=self.metric, - random_state=self.random_state, - ) - - try: - self.logger.debug( - "Fitting the ensemble on %d models.", - len(predictions_train), - ) - start_time = time.time() - ensemble.fit(predictions_train, self.y_true_ensemble, include_num_runs) - end_time = time.time() - self.logger.debug( - "Fitting the ensemble took %.2f seconds.", - end_time - start_time, - ) - self.logger.info(ensemble) - self.validation_performance_ = min( - self.validation_performance_, - ensemble.get_validation_performance(), - ) - - except ValueError: - self.logger.error("Caught ValueError: %s", traceback.format_exc()) - return None - except IndexError: - self.logger.error("Caught IndexError: %s" + traceback.format_exc()) - return None - finally: - # Explicitly free memory - del predictions_train - - return ensemble - - def predict( - self, - set_: str, - ensemble: AbstractEnsemble, - selected_keys: list, - n_preds: int, - index_run: int, - ): - """Save preditions on ensemble, validation and test data on disc - - Parameters - ---------- - set_: ["valid","test"] - data split name - ensemble: EnsembleSelection - trained Ensemble - selected_keys: list - list of selected keys of self.read_losses - n_preds: int - number of prediction models used for ensemble building - same number of predictions on valid and test are necessary - index_run: int - n-th time that ensemble predictions are written to disc - - Return - ------ - y: np.ndarray - """ - self.logger.debug("Predicting the %s set with the ensemble!", set_) - - if set_ == "valid": - pred_set = Y_VALID - elif set_ == "test": - pred_set = Y_TEST - else: - pred_set = Y_ENSEMBLE - predictions = [self.read_preds[k][pred_set] for k in selected_keys] - - if n_preds == len(predictions): - y = ensemble.predict(predictions) - if self.task_type == BINARY_CLASSIFICATION: - y = y[:, 1] - return y - else: - self.logger.info( - "Found inconsistent number of predictions and models (%d vs " - "%d) for subset %s", - len(predictions), - n_preds, - set_, - ) - return None - - def _add_ensemble_trajectory(self, train_pred, valid_pred, test_pred): - """ - Records a snapshot of how the performance look at a given training - time. - - Parameters - ---------- - ensemble: EnsembleSelection - The ensemble selection object to record - valid_pred: np.ndarray - The predictions on the validation set using ensemble - test_pred: np.ndarray - The predictions on the test set using ensemble - - """ - if self.task_type == BINARY_CLASSIFICATION: - if len(train_pred.shape) == 1 or train_pred.shape[1] == 1: - train_pred = np.vstack( - ((1 - train_pred).reshape((1, -1)), train_pred.reshape((1, -1))) - ).transpose() - if valid_pred is not None and ( - len(valid_pred.shape) == 1 or valid_pred.shape[1] == 1 - ): - valid_pred = np.vstack( - ((1 - valid_pred).reshape((1, -1)), valid_pred.reshape((1, -1))) - ).transpose() - if test_pred is not None and ( - len(test_pred.shape) == 1 or test_pred.shape[1] == 1 - ): - test_pred = np.vstack( - ((1 - test_pred).reshape((1, -1)), test_pred.reshape((1, -1))) - ).transpose() - - performance_stamp = { - "Timestamp": pd.Timestamp.now(), - "ensemble_optimization_score": calculate_scores( - solution=self.y_true_ensemble, - prediction=train_pred, - task_type=self.task_type, - metrics=[self.metric], - scoring_functions=None, - )[self.metric.name], - } - if valid_pred is not None: - # TODO: valid_pred are a legacy from competition manager - # and this if never happens. Re-evaluate Y_valid support - performance_stamp["ensemble_val_score"] = calculate_scores( - solution=self.y_valid, - prediction=valid_pred, - task_type=self.task_type, - metrics=[self.metric], - scoring_functions=None, - )[self.metric.name] - - # In case test_pred was provided - if test_pred is not None: - performance_stamp["ensemble_test_score"] = calculate_scores( - solution=self.y_test, - prediction=test_pred, - task_type=self.task_type, - metrics=[self.metric], - scoring_functions=None, - )[self.metric.name] - - self.ensemble_history.append(performance_stamp) - - def _get_list_of_sorted_preds(self): - """ - Returns a list of sorted predictions in descending order - Losses are taken from self.read_losses. - - Parameters - ---------- - None - - Return - ------ - sorted_keys: list - """ - # Sort by loss - smaller is better! - sorted_keys = list( - sorted( - [(k, v["ens_loss"], v["num_run"]) for k, v in self.read_losses.items()], - # Sort by loss as priority 1 and then by num_run on a ascending order - # We want small num_run first - key=lambda x: (x[1], x[2]), - ) - ) - return sorted_keys - - def _delete_excess_models(self, selected_keys: List[str]): - """ - Deletes models excess models on disc. self.max_models_on_disc - defines the upper limit on how many models to keep. - Any additional model with a worst loss than the top - self.max_models_on_disc is deleted. - - """ - - # Loop through the files currently in the directory - for pred_path in self.y_ens_files: - - # Do not delete candidates - if pred_path in selected_keys: - continue - - if pred_path in self._has_been_candidate: - continue - - match = self.model_fn_re.search(pred_path) - _seed = int(match.group(1)) - _num_run = int(match.group(2)) - _budget = float(match.group(3)) - - # Do not delete the dummy prediction - if _num_run == 1: - continue - - numrun_dir = self.backend.get_numrun_directory(_seed, _num_run, _budget) - try: - os.rename(numrun_dir, numrun_dir + ".old") - shutil.rmtree(numrun_dir + ".old") - self.logger.info("Deleted files of non-candidate model %s", pred_path) - self.read_losses[pred_path]["disc_space_cost_mb"] = None - self.read_losses[pred_path]["loaded"] = 3 - self.read_losses[pred_path]["ens_loss"] = np.inf - except Exception as e: - self.logger.error( - "Failed to delete files of non-candidate model %s due" - " to error %s", - pred_path, - e, - ) - - def _read_np_fn(self, path): - - # Support for string precision - if isinstance(self.precision, str): - precision = int(self.precision) - self.logger.warning("Interpreted str-precision as {}".format(precision)) - else: - precision = self.precision - - if path.endswith("gz"): - open_method = gzip.open - elif path.endswith("npy"): - open_method = open - else: - raise ValueError("Unknown filetype %s" % path) - with open_method(path, "rb") as fp: - if precision == 16: - predictions = np.load(fp, allow_pickle=True).astype(dtype=np.float16) - elif precision == 32: - predictions = np.load(fp, allow_pickle=True).astype(dtype=np.float32) - elif precision == 64: - predictions = np.load(fp, allow_pickle=True).astype(dtype=np.float64) - else: - predictions = np.load(fp, allow_pickle=True) - return predictions diff --git a/autosklearn/ensemble_building/__init__.py b/autosklearn/ensemble_building/__init__.py new file mode 100644 index 0000000000..4c63165e1b --- /dev/null +++ b/autosklearn/ensemble_building/__init__.py @@ -0,0 +1,5 @@ +from autosklearn.ensemble_building.builder import EnsembleBuilder +from autosklearn.ensemble_building.manager import EnsembleBuilderManager +from autosklearn.ensemble_building.run import Run + +__all__ = ["EnsembleBuilder", "EnsembleBuilderManager", "Run"] diff --git a/autosklearn/ensemble_building/builder.py b/autosklearn/ensemble_building/builder.py new file mode 100644 index 0000000000..fdd9a31b05 --- /dev/null +++ b/autosklearn/ensemble_building/builder.py @@ -0,0 +1,926 @@ +from __future__ import annotations + +from typing import Any, Iterable, Sequence, cast + +import logging.handlers +import multiprocessing +import numbers +import os +import pickle +import time +import traceback +from itertools import accumulate +from pathlib import Path + +import numpy as np +import pandas as pd +import pynisher + +from autosklearn.automl_common.common.utils.backend import Backend +from autosklearn.data.xy_data_manager import XYDataManager +from autosklearn.ensemble_building.run import Run, RunID +from autosklearn.ensembles.ensemble_selection import EnsembleSelection +from autosklearn.metrics import Scorer, calculate_losses, calculate_scores +from autosklearn.util.disk import rmtree +from autosklearn.util.functional import cut, findwhere, split +from autosklearn.util.logging_ import get_named_client_logger +from autosklearn.util.parallel import preload_modules + +CANDIDATES_FILENAME = "previous_ensemble_building_candidates.pkl" + + +class EnsembleBuilder: + """Builds ensembles out of runs that exist in the Backend + + This is used by EnsembleBuilderManager and created in a dask-client + every time a run finishes and there is currently no EnsembleBuilder active. + """ + + def __init__( + self, + backend: Backend, + dataset_name: str, + task_type: int, + metric: Scorer, + ensemble_size: int = 50, + ensemble_nbest: int | float = 50, + max_models_on_disc: int | float | None = 100, + performance_range_threshold: float = 0, + seed: int = 1, + precision: int = 32, + memory_limit: int | None = 1024, + read_at_most: int | None = None, + logger_port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT, + random_state: int | np.random.RandomState | None = None, + ): + """ + Parameters + ---------- + backend: Backend + backend to write and read files + + dataset_name: str + name of dataset + + task_type: int + type of ML task + + metric: str + name of metric to compute the loss of the given predictions + + ensemble_size: int = 50 + maximal size of ensemble (passed to autosklearn.ensemble.ensemble_selection) + + ensemble_nbest: int | float = 50 + + * int: consider only the n best prediction (> 0) + + * float: consider only this fraction of the best, between (0, 1) + + Both with respect to the validation predictions. + If performance_range_threshold > 0, might return less models + + max_models_on_disc: int | float | None = 100 + Defines the maximum number of models that are kept in the disc. + It defines an upper bound on the models that can be used in the ensemble. + + * int: and dictates the max number of models to keep. (>= 1) + + * float: it will be interpreted as the max megabytes allowed of disc space. + If the number of ensemble candidates require more disc space than + this float value, the worst models are deleted to keep within this budget. + Models and predictions of the worst-performing models will be deleted then. + + * None: the feature is disabled. + + performance_range_threshold: float = 0 + Will at most return the minimum between ensemble_nbest models, + and max_models_on_disc. Might return less + + Keep only models that are better than: + + x = performance_range_threshold + x * dummy + + E.g dummy=2, best=4, thresh=0.5 --> only consider models with loss > 3 + + seed: int = 1 + random seed that is used as part of the filename + + precision: int [16 | 32 | 64 | 128] = 32 + precision of floats to read the predictions + + memory_limit: int | None = 1024 + memory limit in mb. If ``None``, no memory limit is enforced. + + read_at_most: int | None = None + read at most n new prediction files in each iteration. If `None`, will read + the predictions and calculate losses for all runs that require it. + + + logger_port: int = DEFAULT_TCP_LOGGING_PORT + port that receives logging records + + random_state: int | RandomState | None = None + An int or RandomState object used for generating the ensemble. + """ + if isinstance(ensemble_nbest, int) and ensemble_nbest < 1: + raise ValueError(f"int ensemble_nbest ({ensemble_nbest}) must be (>1)") + + if isinstance(ensemble_nbest, float) and not (0 <= ensemble_nbest <= 1): + raise ValueError(f"float ensemble_nbest ({ensemble_nbest}) not in (0,1)") + + if max_models_on_disc is not None and max_models_on_disc < 0: + raise ValueError("max_models_on_disc must be positive or None") + + if read_at_most is not None and (read_at_most < 1 or read_at_most == np.inf): + raise ValueError("Read at most must be int greater than 1 or None") + + # Setup the logger + self.logger = get_named_client_logger(name="EnsembleBuilder", port=logger_port) + self.logger_port = logger_port + + # Log the behaviour + if ensemble_nbest == 1: + t = type(ensemble_nbest) + self.logger.debug(f"Using behaviour when {t} for {ensemble_nbest}:{t}") + + self.seed = seed + self.metric = metric + self.backend = backend + self.precision = precision + self.task_type = task_type + self.memory_limit = memory_limit + self.read_at_most = read_at_most + self.random_state = random_state + self.dataset_name = dataset_name + self.ensemble_size = ensemble_size + self.ensemble_nbest = ensemble_nbest + self.performance_range_threshold = performance_range_threshold + + # Decide if self.max_models_on_disc is a memory limit or model limit + self.max_models_on_disc: int | None = None + self.model_memory_limit: float | None = None + + if isinstance(max_models_on_disc, int): + self.max_models_on_disc = max_models_on_disc + elif isinstance(max_models_on_disc, float): + self.model_memory_limit = max_models_on_disc + + # The starting time of the procedure + self.start_time: float = 0.0 + + # Track the ensemble performance + self.ensemble_history: list[dict[str, Any]] = [] + + # Keep running knowledge of its validation performance + self.validation_performance_ = np.inf + + # Data we may need + datamanager: XYDataManager = self.backend.load_datamanager() + self._y_test: np.ndarray | None = datamanager.data.get("Y_test", None) + self._y_ensemble: np.ndarray | None = None + + @property + def previous_candidates_path(self) -> Path: + """Path to the cached losses we store between runs""" + return Path(self.backend.internals_directory) / CANDIDATES_FILENAME + + def previous_candidates(self) -> dict[RunID, Run]: + """Load any previous candidates that were saved from previous runs + + Returns + ------- + dict[RunID, Run] + A dictionary from RunId's to the previous candidates + """ + if self.previous_candidates_path.exists(): + with self.previous_candidates_path.open("rb") as f: + return pickle.load(f) + else: + return {} + + def available_runs(self) -> dict[RunID, Run]: + """Get a dictionary of all available runs on the filesystem + + Returns + ------- + dict[RunID, Run] + A dictionary from RunId's to the available runs + """ + runs_dir = Path(self.backend.get_runs_directory()) + runs = iter(Run(path=dir) for dir in runs_dir.iterdir() if Run.valid(dir)) + return {run.id: run for run in runs} + + def targets(self, kind: str = "ensemble") -> np.ndarray | None: + """The ensemble targets used for training the ensemble + + It will attempt to load and cache them in memory but + return None if it can't. + + Returns + ------- + np.ndarray | None + The ensemble targets, if they can be loaded + """ + if kind == "ensemble": + if self._y_ensemble is None: + if os.path.exists(self.backend._get_targets_ensemble_filename()): + self._y_ensemble = self.backend.load_targets_ensemble() + return self._y_ensemble + + elif kind == "test": + return self._y_test + + else: + raise NotImplementedError(kind) + + def run( + self, + iteration: int, + pynisher_context: str | None = None, + time_left: float | None = None, + end_at: float | None = None, + time_buffer: int = 5, + ) -> tuple[list[dict[str, Any]], int | float]: + """Run the ensemble building process + + Parameters + ---------- + iteration : int + What iteration to associate with this run + + pynisher_context : str | None = None + The pynisher context to run in. If None, defaults to + multiprocessing.get_context(None) + + time_left : float | None = None + How much time should be left for this run. Either this or `end_at` must + be provided. + + end_at : float | None = None + When this run should end. Either this or `time_left` must be provided. + + time_buffer : int = 5 + How much extra time to add as a buffer to this run. This means there is + always some amount of time to do something useful. + + Returns + ------- + (ensemble_history, nbest) + """ + if time_left is None and end_at is None: + raise ValueError("Must provide either time_left or end_at.") + + elif time_left is not None and end_at is not None: + raise ValueError("Cannot provide both time_left and end_at.") + + if not self.logger: + self.logger = get_named_client_logger( + name="EnsembleBuilder", + port=self.logger_port, + ) + + process_start_time = time.time() + while True: + + if time_left is not None: + time_elapsed = time.time() - process_start_time + time_left -= time_elapsed + else: + assert end_at is not None + current_time = time.time() + if current_time > end_at: + break + else: + time_left = end_at - current_time + + wall_time_in_s = int(time_left - time_buffer) + if wall_time_in_s < 1: + break + + context = multiprocessing.get_context(pynisher_context) + preload_modules(context) + + safe_ensemble_script = pynisher.enforce_limits( + wall_time_in_s=wall_time_in_s, + mem_in_mb=self.memory_limit, + logger=self.logger, + context=context, + )(self.main) + + safe_ensemble_script(time_left, iteration) + + status = safe_ensemble_script.exit_status + if isinstance(status, pynisher.MemorylimitException): + # if ensemble script died because of memory error, + # reduce nbest to reduce memory consumption and try it again + + # ATTENTION: main will start from scratch; + # all data structures are empty again + try: + self.previous_candidates_path.unlink() + except: # noqa E722 + pass + + if ( + isinstance(self.ensemble_nbest, numbers.Integral) + and self.ensemble_nbest <= 1 + ): + if self.read_at_most == 1: + self.logger.error( + "Memory Exception -- Unable to further reduce the number" + " of ensemble members and can no further limit the number" + " of ensemble members loaded per iteration, please restart" + " Auto-sklearn with a higher value for the argument" + f" `memory_limit` (current limit is {self.memory_limit}MB)." + " The ensemble builder will keep running to delete files" + " from disk in case this was enabled.", + ) + self.ensemble_nbest = 0 + else: + self.read_at_most = 1 + self.logger.warning( + "Memory Exception -- Unable to further reduce the number of" + " ensemble members. Now reducing the number of predictions" + " per call to read at most to 1." + ) + else: + if isinstance(self.ensemble_nbest, numbers.Integral): + self.ensemble_nbest = max(1, int(self.ensemble_nbest / 2)) + else: + self.ensemble_nbest = self.ensemble_nbest / 2 + self.logger.warning( + "Memory Exception -- restart with " + "less ensemble_nbest: %d" % self.ensemble_nbest + ) + return [], self.ensemble_nbest + elif isinstance(status, pynisher.AnythingException): + return ([], self.ensemble_nbest) + else: + return safe_ensemble_script.result + + return [], self.ensemble_nbest + + def main( + self, + time_left: float | None = None, + iteration: int = 0, + ) -> tuple[list[dict[str, Any]], int | float]: + """Run the main loop of ensemble building + + The process is: + * Load all available runs + previous candidates (if any) + * Update the loss of those that require + * From these runs, get a list of candidates + * Save candidates + * Delete models that are not candidates + * Build an ensemble from the candidates if there are new candidates + + Parameters + ---------- + time_left : float | None = None + How much time is left for this run + + iteration : int = 0 + The iteration of this run + + Returns + ------- + (ensemble_history: list[dict[str, Any]], nbest: int | float) + """ + # Pynisher jobs inside dask 'forget' the logger configuration. + # So we have to set it up accordingly + self.logger = get_named_client_logger( + name="EnsembleBuilder", + port=self.logger_port, + ) + + if time_left is not None: + self.start_time = time.time() + used_time = time.time() - self.start_time + left_for_iter = time_left - used_time + itr = iteration if str(iteration) is not None else "" + self.logger.debug(f"Starting iteration {itr}, time left: {left_for_iter}") + + # Can't load data, exit early + if not os.path.exists(self.backend._get_targets_ensemble_filename()): + self.logger.debug(f"No targets for ensemble: {traceback.format_exc()}") + raise RuntimeError("No targets for ensemble") + + # We will delete runs once we are complete + deletable_runs: set[Run] = set() + + # Load in information from previous candidates and also runs + available_runs = self.available_runs() + + # Update runs with information of available previous candidates + previous_candidates = self.previous_candidates() + available_runs.update(previous_candidates) + + # We just need the values now, not the key value pairs {run.id: Run} + runs = list(available_runs.values()) + + if len(runs) == 0: + self.logger.debug("Found no runs") + raise RuntimeError("Found no runs") + + # Calculate the loss for those that require it + requires_update = self.requires_loss_update(runs) + if self.read_at_most is not None: + requires_update = requires_update[: self.read_at_most] + + for run in requires_update: + run.record_modified_times() # So we don't count as modified next time + run.loss = self.loss(run, kind="ensemble") + + # Get the dummy and real runs + dummies, candidates = split(runs, by=lambda r: r.is_dummy()) + + # We see if we need to delete any of the real runs before we waste compute + # on evaluating their candidacy for ensemble building + if any(candidates): + candidates, to_delete = self.requires_deletion( + candidates, + max_models=self.max_models_on_disc, + memory_limit=self.model_memory_limit, + ) + + # If there are no candidates left, we just keep the best one + if not any(candidates): + best = min(to_delete, key=lambda r: (r.loss, r.num_run)) + candidates = [best] + to_delete.remove(best) + + if any(to_delete): + self.logger.info( + f"Deleting runs {to_delete} due to" + f" max_models={self.max_models_on_disc} and/or" + f" memory_limit={self.model_memory_limit}" + ) + deletable_runs.update(to_delete) + + # If there are any candidates, perform candidates selection + if any(candidates): + candidates, to_delete = self.candidate_selection( + runs=candidates, + dummies=dummies, + better_than_dummy=True, + nbest=self.ensemble_nbest, + performance_range_threshold=self.performance_range_threshold, + ) + if any(to_delete): + self.logger.info( + f"Deleting runs {to_delete} due to" + f" nbest={self.ensemble_nbest} and/or" + f" performance_range_threshold={self.performance_range_threshold}" + ) + deletable_runs.update(to_delete) + else: + candidates = dummies + self.logger.warning("No runs were available to build an ensemble from") + + # In case we record test predictions and not every model has test predictions, + # only use the subset of models that has predictions for both the test set and + # the ensemble optimization set. + candidates_set = set(candidates) + test_subset = {r for r in candidates if r.pred_path("test").exists()} + + if len(test_subset) > 0: + candidates = sorted(test_subset, key=lambda r: r.id) + test_models = candidates + + to_delete = candidates_set - test_subset + if any(to_delete): + self.logger.info( + f"Deleting runs {to_delete} due to runs not" + ' having "test_predictions" while others do not:' + f"\nHave test_predictions = {test_subset}" + f"\nNo test_predictions = {to_delete}" + ) + deletable_runs.update(to_delete) + + else: + candidates = sorted(candidates_set, key=lambda r: r.id) + test_models = [] + + # Save the candidates for the next round + with self.previous_candidates_path.open("wb") as f: + pickle.dump({run.id: run for run in candidates}, f) + + # If there was any change from the previous run, either in terms of + # runs or one of those runs had its loss updated, then we need to + # fit the ensemble builder + previous_candidate_ids = set(previous_candidates) + current_candidate_ids = set(run.id for run in candidates) + difference = previous_candidate_ids ^ current_candidate_ids + + was_updated_candidates = list(run in candidates for run in requires_update) + + if not any(difference) and not any(was_updated_candidates): + self.logger.info("All ensemble candidates the same, no update required") + return self.ensemble_history, self.ensemble_nbest + + targets = cast(np.ndarray, self.targets("ensemble")) # Sure they exist + + ensemble = self.fit_ensemble( + candidates, + targets=targets, + size=self.ensemble_size, + task=self.task_type, + metric=self.metric, + precision=self.precision, + random_state=self.random_state, + ) + + self.logger.info(str(ensemble)) + ens_perf = ensemble.get_validation_performance() + self.validation_performance_ = min(self.validation_performance_, ens_perf) + self.backend.save_ensemble( + ensemble=ensemble, idx=iteration, seed=self.seed # type: ignore + ) + + performance_stamp = {"Timestamp": pd.Timestamp.now()} + + for kind, score_name, models in [ + ("ensemble", "optimization", candidates), + ("test", "test", test_models), + ]: + if len(models) == 0: + continue + + pred_targets = self.targets(kind) + if pred_targets is None: + self.logger.warning(f"No ensemble targets for {kind}") + continue + + run_preds = [r.predictions(kind, precision=self.precision) for r in models] + pred = ensemble.predict(run_preds) + + score = calculate_scores( + solution=pred_targets, + prediction=pred, + task_type=self.task_type, + metrics=[self.metric], + scoring_functions=None, + )[self.metric.name] + performance_stamp[f"ensemble_{score_name}_score"] = score + self.ensemble_history.append(performance_stamp) + + # Lastly, delete any runs that need to be deleted. We save this as the last step + # so that we have an ensemble saved that is up to date. If we do not do so, + # there could be runs deleted that are in th previous ensemble and we do not + # manage to update the ensemble due to a crash or the process being killed + # before it could be updated + self.delete_runs(deletable_runs) + + return self.ensemble_history, self.ensemble_nbest + + def requires_loss_update( + self, + runs: Sequence[Run], + ) -> list[Run]: + """ + + Parameters + ---------- + runs : Sequence[Run] + The runs to process + + Returns + ------- + list[Run] + The runs that require a loss to be calculated + """ + queue = [] + for run in sorted(runs, key=lambda run: run.recorded_mtimes["ensemble"]): + if run.loss == np.inf: + queue.append(run) + + elif run.was_modified(): + self.logger.debug(f"{run.id} had its predictions modified?") + queue.append(run) + + return queue + + def candidate_selection( + self, + runs: Sequence[Run], + dummies: Run | list[Run], + *, + better_than_dummy: bool = False, + nbest: int | float | None = None, + performance_range_threshold: float | None = None, + ) -> tuple[list[Run], set[Run]]: + """Get a list of candidates from `runs`, garuanteeing at least one + + Applies a set of reductions in order of parameters to reach a set of final + candidates. + + Expects at least one `dummies` run. + + Parameters + ---------- + runs : Sequence[Run] + The runs to evaluate candidates from. + + dummies: Run | Sequence[Run] + The dummy run to base from + + better_than_dummy: bool = False + Whether the run must be better than the best dummy run to be a candidate. + In the case where there are no candidates left, the dummies will then be + used. + + nbest : int | float | None + The nbest models to select. If `int`, acts as an absolute limit. + If `float`, acts as a percentage of available candidates. + + model_memory_limit : float | None + A maximum memory limit in MB for the runs to occupy. If the candidates at + this point exceed this limit, the best n candidates that fit into this limit + will be chosen. + + performance_range_threshold : float | None + A number in (0, 1) to select candidates from. Expects a dummy run for worst + + Returns + ------- + (candidates: list[Run], discarded: set[Run]) + A tuple of runs that are candidates and also those that didn't make it + """ + if isinstance(dummies, Run): + dummies = [dummies] + + assert len(dummies) > 0 and len(runs) > 0, "At least 1 real run and dummy run" + + all_discarded: set[Run] = set() + + # We filter out all runs that don't have any predictions for the ensemble + candidates, discarded = split( + runs, by=lambda run: run.pred_path("ensemble").exists() + ) + all_discarded.update(discarded) + + if len(candidates) == 0: + self.logger.debug("No runs with predictions on ensemble set, using dummies") + return dummies, all_discarded + + for run in discarded: + self.logger.warning(f"Have no ensemble predictions for {run}") + + # Get all the ones that have a tangible loss + candidates, discarded = split( + candidates, + by=lambda r: r.loss < np.inf, + ) + all_discarded.update(discarded) + + if len(candidates) == 0: + self.logger.debug("No runs with a usable loss, using dummies") + return dummies, all_discarded + + if better_than_dummy: + dummies = sorted(dummies, key=lambda r: r.loss) + dummy_cutoff = dummies[0].loss + self.logger.debug(f"Using {dummy_cutoff} to filter candidates") + + candidates, discarded = split( + candidates, + by=lambda r: r.loss < dummy_cutoff, + ) + all_discarded.update(discarded) + + # If there are no real candidates left, use the dummies + if len(candidates) == 0: + self.logger.warning( + "No models better than random - using Dummy loss!" + f"\n\tModels besides current dummy model: {len(candidates)}" + f"\n\tDummy models: {len(dummies)}", + ) + return dummies, all_discarded + + # Sort the candidates so that they ordered by best loss, using num_run for tie + candidates = sorted(candidates, key=lambda r: (r.loss, r.num_run)) + + if nbest is not None: + # Determine how many to keep, always keeping one + if isinstance(nbest, float): + nkeep = int(len(candidates) * nbest) + else: + nkeep = nbest + + candidates, discarded = cut(candidates, nkeep) + self.logger.info(f"Discarding {len(discarded)}/{len(candidates)} runs") + + # Always preserve at least one, the best + if len(candidates) == 0: + candidates, discared = cut(discarded, 1) + self.logger.warning("nbest too aggresive, using single best") + + all_discarded.update(discarded) + + if performance_range_threshold is not None: + x = performance_range_threshold + worst = dummies[0].loss + best = candidates[0].loss + + cutoff = x * best + (1 - x) * worst + + candidates, discarded = cut(candidates, where=lambda r: r.loss >= cutoff) + + # Always preserve at least one, the best + if len(candidates) == 0: + candidates, discared = cut(discarded, 1) + self.logger.warning("No models in performance range, using single best") + + all_discarded.update(discarded) + + return candidates, all_discarded + + def fit_ensemble( + self, + runs: list[Run], + targets: np.ndarray, + *, + size: int | None = None, + task: int | None = None, + metric: Scorer | None = None, + precision: int | None = None, + random_state: int | np.random.RandomState | None = None, + ) -> EnsembleSelection: + """Fit an ensemble from the provided runs. + + Note + ---- + Expects all runs to have the "ensemble" predictions present + + Parameters + ---------- + runs: list[Run] + List of runs to build an ensemble from + + targets: np.ndarray + The targets to build the ensemble with + + size: int | None = None + The size of the ensemble to build + + task: int | None = None + The kind of task performed + + metric: Scorer | None = None + The metric to use when comparing run predictions to the targets + + precision: int | None = None + The precision with which to load run predictions + + random_state: int | RandomState | None = None + The random state to use + + Returns + ------- + ensemble: EnsembleSelection + The trained ensemble + """ + task = task if task is not None else self.task_type + size = size if size is not None else self.ensemble_size + metric = metric if metric is not None else self.metric + rs = random_state if random_state is not None else self.random_state + + ensemble = EnsembleSelection( + ensemble_size=size, + task_type=task, + metric=metric, + random_state=rs, + ) + + self.logger.debug(f"Fitting ensemble on {len(runs)} models") + start_time = time.time() + + precision = precision if precision is not None else self.precision + predictions_train = [ + run.predictions("ensemble", precision=precision) for run in runs + ] + + ensemble.fit( + predictions=predictions_train, + labels=targets, + identifiers=[run.id for run in runs], + ) + + duration = time.time() - start_time + self.logger.debug(f"Fitting the ensemble took {duration} seconds.") + return ensemble + + def requires_deletion( + self, + runs: Sequence[Run], + *, + max_models: int | None = None, + memory_limit: float | None = None, + ) -> tuple[list[Run], set[Run]]: + """Cut a list of runs into those to keep and those to delete + + If neither params are specified, this method should do nothing. + + Parameters + ---------- + runs : Sequence[Run] + The runs to check + + max_models : int | None = None + The maximum amount of models to have on disk. Leave `None` for no effect + + memory_limit : float | None = None + The memory limit in MB, leave `None` for no effect + + Returns + ------- + (keep: list[Run], delete: set[Run]) + The list of runs to keep and those to delete + """ + if memory_limit is None and max_models is None: + return list(runs), set() + + # Start with keep all runs and dummies, deleteing None + keep = sorted(runs, key=lambda r: (r.loss, r.num_run)) + delete: set[Run] = set() + + if max_models is not None and max_models < len(runs): + keep, to_delete = cut(keep, max_models) + + if any(to_delete): + delete.update(to_delete) + + if memory_limit is not None: + largest = max(runs, key=lambda r: r.mem_usage) + cutoff = memory_limit - largest.mem_usage + + accumulated_mem_usage = accumulate(r.mem_usage for r in runs) + + cutpoint = findwhere(accumulated_mem_usage, lambda mem: mem > cutoff) + keep, to_delete = cut(keep, cutpoint) + + if any(to_delete): + self.logger.warning( + "Limiting num of models via `memory_limit`" + f" memory_limit={memory_limit}" + f" cutoff={cutoff}" + f" largest={largest.mem_usage}" + f" remaining={len(keep)}" + f" discarded={len(to_delete)}" + ) + delete.update(to_delete) + + return keep, delete + + def loss(self, run: Run, kind: str = "ensemble") -> float: + """Calculate the loss for a run + + Parameters + ---------- + run: Run + The run to calculate the loss for + + Returns + ------- + float + The loss for the run + """ + targets = self.targets(kind) + if targets is None: + self.logger.error(f"No targets of {kind}") + return np.inf + + try: + predictions = run.predictions(kind, precision=self.precision) + loss: float = calculate_losses( # type: ignore + solution=targets, + prediction=predictions, + task_type=self.task_type, + metrics=[self.metric], + )[self.metric.name] + except Exception as e: + self.logger.error(f"Error getting loss {run}:{e}{traceback.format_exc()}") + loss = np.inf + finally: + return loss + + def delete_runs(self, runs: Iterable[Run]) -> None: + """Delete runs + + Will not delete dummy runs + + Parameters + ---------- + runs : Sequence[Run] + The runs to delete + """ + items = iter(run for run in runs if not run.is_dummy() and run.dir.exists()) + for run in items: + try: + rmtree(run.dir, atomic=True) + self.logger.info(f"Deleted files for {run}") + except Exception as e: + self.logger.error(f"Failed to delete files for {run}: \n{e}") diff --git a/autosklearn/ensemble_building/manager.py b/autosklearn/ensemble_building/manager.py new file mode 100644 index 0000000000..46ab291bc2 --- /dev/null +++ b/autosklearn/ensemble_building/manager.py @@ -0,0 +1,370 @@ +from __future__ import annotations + +from typing import Any + +import logging.handlers +import time +import traceback + +import dask.distributed +import numpy as np +from smac.callbacks import IncorporateRunResultCallback +from smac.optimizer.smbo import SMBO +from smac.runhistory.runhistory import RunInfo, RunValue +from smac.tae.base import StatusType + +from autosklearn.automl_common.common.utils.backend import Backend +from autosklearn.ensemble_building.builder import EnsembleBuilder +from autosklearn.metrics import Scorer +from autosklearn.util.logging_ import get_named_client_logger + + +class EnsembleBuilderManager(IncorporateRunResultCallback): + def __init__( + self, + backend: Backend, + dataset_name: str, + task: int, + metric: Scorer, + time_left_for_ensembles: float = np.inf, + max_iterations: int | None = None, + pynisher_context: str = "fork", + ensemble_size: int = 50, + ensemble_nbest: int | float = 50, + max_models_on_disc: int | float | None = None, + seed: int = 1, + precision: int = 32, + memory_limit: int | None = None, + read_at_most: int | None = None, + logger_port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT, + random_state: int | np.random.RandomState | None = None, + start_time: float | None = None, + ): + """SMAC callback to handle ensemble building + + Parameters + ---------- + backend: Backend + backend to write and read files + + dataset_name: str + name of dataset + + task: int + Type of ML task + + metric: Scorer + Metric to compute the loss of the given predictions + + time_left_for_ensemble: float = np.inf + How much time is left for the task in seconds. + Job should finish within this allocated time + + max_iterations: int | None = None + maximal number of iterations to run this script. None indicates no limit + on iterations. + + pynisher_context: "spawn" | "fork" | "forkserver" = "fork" + The multiprocessing context for pynisher. + + ensemble_size: int = 50 + maximal size of ensemble + + ensemble_nbest: int | float = 50 + If int: consider only the n best prediction + If float: consider only this fraction of the best models + + max_models_on_disc: int | float | None = None + Defines the maximum number of models that are kept in the disc. + + If int, it must be greater or equal than 1, and dictates the max + number of models to keep. + + If float, it will be interpreted as the max megabytes allowed of + disc space. That is, if the number of ensemble candidates require more + disc space than this float value, the worst models will be deleted to + keep within this budget. Models and predictions of the worst-performing + models will be deleted then. + + If None, the feature is disabled. It defines an upper bound on the + models that can be used in the ensemble. + + seed: int = 1 + Seed used for the inidividual runs + + precision: 16 | 32 | 64 | 128 = 32 + Precision of floats to read the predictions + + memory_limit: int | None = None + Memory limit in mb. If ``None``, no memory limit is enforced. + + read_at_most: int | None = None + read at most n new prediction files in each iteration. If `None`, will read + the predictions and calculate losses for all runs that require it. + + logger_port: int = DEFAULT_TCP_LOGGING_PORT + Port that receives logging records + + start_time: float | None = None + DISABLED: Just using time.time() to set it + The time when this job was started, to account for any latency in job + allocation. + """ + self.time_left_for_ensembles = time_left_for_ensembles + self.backend = backend + self.dataset_name = dataset_name + self.task = task + self.metric = metric + self.ensemble_size = ensemble_size + self.ensemble_nbest = ensemble_nbest + self.max_models_on_disc = max_models_on_disc + self.seed = seed + self.precision = precision + self.max_iterations = max_iterations + self.read_at_most = read_at_most + self.memory_limit = memory_limit + self.random_state = random_state + self.logger_port = logger_port + self.pynisher_context = pynisher_context + + # Store something similar to SMAC's runhistory + self.history: list[dict[str, Any]] = [] + + # We only submit new ensembles when there is not an active ensemble job + self.futures: list[dask.distributed.Future] = [] + + # The last criteria is the number of iterations + self.iteration = 0 + + # Keep track of when we started to know when we need to finish! + self.start_time = time.time() + + def __call__( + self, + smbo: "SMBO", + run_info: RunInfo, + result: RunValue, + time_left: float, + ) -> None: + """ + Returns + ------- + List[Tuple[int, float, float, float]]: + A list with the performance history of this ensemble, of the form + [(pandas_timestamp, train_performance, val_performance, test_performance)] + """ + if result.status in (StatusType.STOP, StatusType.ABORT) or smbo._stop: + return + self.build_ensemble(smbo.tae_runner.client) + + def build_ensemble( + self, + dask_client: dask.distributed.Client, + ) -> None: + """Build the ensemble + + Parameters + ---------- + dask_client: dask.distributed.Client + The dask client to use + """ + # The second criteria is elapsed time + elapsed_time = time.time() - self.start_time + + logger = get_named_client_logger( + name="EnsembleBuilder", + port=self.logger_port, + ) + + # First test for termination conditions + if self.time_left_for_ensembles < elapsed_time: + logger.info( + "Terminate ensemble building as not time is left (run for {}s)".format( + elapsed_time + ), + ) + return + if self.max_iterations is not None and self.max_iterations <= self.iteration: + logger.info( + "Terminate ensemble building because of max iterations:" + f" {self.max_iterations} of {self.iteration}" + ) + return + + if len(self.futures) != 0: + if self.futures[0].done(): + result = self.futures.pop().result() + if result: + ensemble_history, self.ensemble_nbest = result + logger.debug( + f"iteration={self.iteration} @ elapsed_time={elapsed_time}" + f" has history={ensemble_history}" + ) + self.history.extend(ensemble_history) + + # Only submit new jobs if the previous ensemble job finished + if len(self.futures) == 0: + + # Add the result of the run + # On the next while iteration, no references to + # ensemble builder object, so it should be garbage collected to + # save memory while waiting for resources + # Also, notice how ensemble nbest is returned, so we don't waste + # iterations testing if the deterministic predictions size can + # be fitted in memory + try: + # Submit a Dask job from this job, to properly + # see it in the dask diagnostic dashboard + # Notice that the forked ensemble_builder_process will + # wait for the below function to be done + self.futures.append( + dask_client.submit( + EnsembleBuilderManager.fit_and_return_ensemble, + backend=self.backend, + dataset_name=self.dataset_name, + task_type=self.task, + metric=self.metric, + ensemble_size=self.ensemble_size, + ensemble_nbest=self.ensemble_nbest, + max_models_on_disc=self.max_models_on_disc, + seed=self.seed, + precision=self.precision, + memory_limit=self.memory_limit, + read_at_most=self.read_at_most, + random_state=self.random_state, + end_at=self.start_time + self.time_left_for_ensembles, + iteration=self.iteration, + pynisher_context=self.pynisher_context, + logger_port=self.logger_port, + ) + ) + + logger.info( + "{}/{} Started Ensemble builder job at {} for iteration {}.".format( + # Log the client to make sure we + # remain connected to the scheduler + self.futures[0], + dask_client, + time.strftime("%Y.%m.%d-%H.%M.%S"), + self.iteration, + ), + ) + self.iteration += 1 + except Exception as e: + exception_traceback = traceback.format_exc() + error_message = repr(e) + logger.critical(exception_traceback) + logger.critical(error_message) + + @staticmethod + def fit_and_return_ensemble( + iteration: int, + end_at: float, + backend: Backend, + dataset_name: str, + task_type: int, + metric: Scorer, + pynisher_context: str, + ensemble_size: int = 50, + ensemble_nbest: int | float = 50, + max_models_on_disc: int | float | None = None, + seed: int = 1, + precision: int = 32, + memory_limit: int | None = None, + read_at_most: int | None = None, + logger_port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT, + random_state: int | np.random.RandomState | None = None, + ) -> tuple[list[dict[str, Any]], int | float]: + """ + A short function to fit and create an ensemble. It is just a wrapper to easily + send a request to dask to create an ensemble and clean the memory when finished + + Parameters + ---------- + iteration: int + The current iteration + + end_at: float + At what time the job must finish. Needs to be the endtime and not the + time left because we do not know when dask schedules the job. + + backend: Backend + Backend to write and read files + + dataset_name: str + name of dataset + + task_type: int + type of ML task + + metric: Scorer + Metric to compute the loss of the given predictions + + pynisher_context: "fork" | "spawn" | "forkserver" = "fork" + Context to use for multiprocessing, can be either fork, spawn or forkserver. + + ensemble_size: int = 50 + Maximal size of ensemble + + ensemble_nbest: int | float = 50 + If int: consider only the n best prediction + If float: consider only this fraction of the best models + + max_models_on_disc: int | float | None = 100 + Defines the maximum number of models that are kept in the disc. + + If int, it must be greater or equal than 1, and dictates the max number of + models to keep. + + If float, it will be interpreted as the max megabytes allowed of disc space. + That is, if the number of ensemble candidates require more disc space than + this float value, the worst models will be deleted to keep within this + budget. Models and predictions of the worst-performing models will be + deleted then. + + If None, the feature is disabled. + + seed: int = 1 + Seed used for training the models in the backend + + precision: 16 | 32 | 64 | 128 = 32 + Precision of floats to read the predictions + + memory_limit: int | None = None + Memory limit in mb. If ``None``, no memory limit is enforced. + + read_at_most: int | None = None + read at most n new prediction files in each iteration. If `None`, will read + the predictions and calculate losses for all runs that require it. + + logger_port: int = DEFAULT_TCP_LOGGING_PORT + The port where the logging server is listening to. + + random_state: int | RandomState | None = None + A random state used for the ensemble selection process. + + Returns + ------- + (ensemble_history: list[dict[str, Any]], nbest: int | float) + The ensemble history and the nbest chosen members + """ + result = EnsembleBuilder( + backend=backend, + dataset_name=dataset_name, + task_type=task_type, + metric=metric, + ensemble_size=ensemble_size, + ensemble_nbest=ensemble_nbest, + max_models_on_disc=max_models_on_disc, + seed=seed, + precision=precision, + memory_limit=memory_limit, + read_at_most=read_at_most, + random_state=random_state, + logger_port=logger_port, + ).run( + end_at=end_at, + iteration=iteration, + pynisher_context=pynisher_context, + ) + return result diff --git a/autosklearn/ensemble_building/run.py b/autosklearn/ensemble_building/run.py new file mode 100644 index 0000000000..fa73f91e45 --- /dev/null +++ b/autosklearn/ensemble_building/run.py @@ -0,0 +1,181 @@ +from __future__ import annotations + +from typing import Tuple + +import re +from pathlib import Path + +import numpy as np + +from autosklearn.util.disk import sizeof + +RunID = Tuple[int, int, float] + + +class Run: + """Class for storing information about a run used during ensemble building. + + Note + ---- + This is for internal use by the EnsembleBuilder and not for general usage. + """ + + # For matching prediction files + RE_MODEL_PREDICTION_FILE = ( + r"^predictions_ensemble_([0-9]*)_([0-9]*)_([0-9]{1,3}\.[0-9]*).npy$" + ) + + # For matching run directories + RE_MODEL_DIR = r"^([0-9]*)_([0-9]*)_([0-9]{1,3}\.[0-9]*)$" + + def __init__(self, path: Path) -> None: + """Creates a Run from a path pointing to the directory of a run + + Parameters + ---------- + path: Path + Expects something like /path/to/{seed}_{numrun}_{budget} + """ + name = path.name + seed, num_run, budget = name.split("_") + + self.dir = path + self.seed = int(seed) + self.num_run = int(num_run) + self.budget = float(budget) + + self.loss: float = np.inf + self._mem_usage: float | None = None + + # Items that will be delete when the run is saved back to file + self._cache: dict[str, np.ndarray] = {} + + # The recorded time of ensemble/test predictions modified + self.recorded_mtimes: dict[str, float] = {} + self.record_modified_times() + + @property + def mem_usage(self) -> float: + """The memory usage of this run based on it's directory""" + if self._mem_usage is None: + self._mem_usage = round(sizeof(self.dir, unit="MB"), 2) + + return self._mem_usage + + def is_dummy(self) -> bool: + """Whether this run is a dummy run or not""" + return self.num_run == 1 + + def was_modified(self) -> bool: + """Query for when the ens file was last modified""" + recorded = self.recorded_mtimes.get("ensemble") + last = self.pred_path().stat().st_mtime + return recorded != last + + def pred_path(self, kind: str = "ensemble") -> Path: + """Get the path to certain predictions""" + fname = f"predictions_{kind}_{self.seed}_{self.num_run}_{self.budget}.npy" + return self.dir / fname + + def record_modified_times(self) -> None: + """Records the last time each prediction file type was modified, if it exists""" + self.recorded_mtimes = {} + for kind in ["ensemble", "test"]: + path = self.pred_path(kind) # type: ignore + if path.exists(): + self.recorded_mtimes[kind] = path.stat().st_mtime + + def has_predictions(self, kind: str = "ensemble") -> bool: + """ + Parameters + ---------- + kind: "ensemble" | "test" = "ensemble" + The kind of predictions to query for + + Returns + ------- + bool + Whether this run has the kind of predictions queried for + """ + return self.pred_path(kind).exists() + + def predictions( + self, + kind: str = "ensemble", + precision: int | None = None, + ) -> np.ndarray: + """Load the predictions for this run + + Parameters + ---------- + kind : "ensemble" | "test" + The kind of predictions to load + + precisions : type | None = None + What kind of precision reduction to apply + + Returns + ------- + np.ndarray + The loaded predictions + """ + key = f"predictions_{kind}" + if key in self._cache: + return self._cache[key] + + path = self.pred_path(kind) + + with path.open("rb") as f: + # TODO: We should probably remove this requirement. I'm not sure why model + # predictions are being saved as pickled + predictions = np.load(f, allow_pickle=True) + + if precision: + dtypes: dict[int, type] = {16: np.float16, 32: np.float32, 64: np.float64} + dtype = dtypes.get(precision, None) + + if dtype is not None: + predictions = predictions.astype(dtype=dtype, copy=False) + + self._cache[key] = predictions + return predictions + + def __getstate__(self) -> dict: + """Remove the cache when pickling.""" + state = self.__dict__.copy() + del state["_cache"] + return state + + def __setstate__(self, state: dict) -> None: + """Reset state and instansiate blank cache.""" + self.__dict__.update(state) + self._cache = {} + + @property + def id(self) -> RunID: + """Get the three components of it's id""" + return self.seed, self.num_run, self.budget + + def __hash__(self) -> int: + return hash(self.id) + + def __repr__(self) -> str: + return f"Run(id={self.id}, loss={self.loss})" + + def __eq__(self, other: object) -> bool: + return isinstance(other, Run) and other.id == self.id + + @staticmethod + def valid(path: Path) -> bool: + """ + Parameters + ---------- + path: Path + The path to check + + Returns + ------- + bool + Whether the path is a valid run dir + """ + return re.match(Run.RE_MODEL_DIR, path.name) is not None diff --git a/autosklearn/smbo.py b/autosklearn/smbo.py index 6326d3a36f..583a09e34d 100644 --- a/autosklearn/smbo.py +++ b/autosklearn/smbo.py @@ -33,7 +33,7 @@ TASK_TYPES_TO_STRING, ) from autosklearn.data.abstract_data_manager import AbstractDataManager -from autosklearn.ensemble_builder import EnsembleBuilderManager +from autosklearn.ensemble_building import EnsembleBuilderManager from autosklearn.evaluation import ExecuteTaFuncWithQueue, get_cost_of_crash from autosklearn.metalearning.metafeatures.metafeatures import ( calculate_all_metafeatures_encoded_labels, diff --git a/autosklearn/util/disk.py b/autosklearn/util/disk.py new file mode 100644 index 0000000000..279a88f0ea --- /dev/null +++ b/autosklearn/util/disk.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +from typing import Any + +import math +import shutil +import tempfile +import uuid +from pathlib import Path + +sizes = { + "B": 0, + "KB": 1, + "MB": 2, + "GB": 3, + "TB": 4, +} + + +def sizeof(path: Path | str, unit: str = "B") -> float: + """Get the size of some path object + + Parameters + ---------- + path : Path | str + The path of the file or directory to get the size of + + unit : "B" | "KB" | "MB" | "GB" | "TB" = "B" + What unit to get the answer in + + Returns + ------- + float + The size of the folder/file in the given units + """ + if unit not in sizes: + raise ValueError(f"Not a known unit {unit}") + + if not isinstance(path, Path): + path = Path(path) + + if path.is_file(): + size = path.stat().st_size + else: + size = sum(f.stat().st_size for f in path.glob("**/*") if f.is_file()) + + power = sizes[unit] + return size / math.pow(1024, power) + + +def rmtree( + path: Path | str, + *, + atomic: bool = False, + tmp: bool | Path | str = False, + **kwargs: Any, +) -> None: + """Delete a file or directory + + Parameters + ---------- + path: Path | str + The path to delete + + atomic: bool = False + Whether to delete the file/folder atomically. This is done by first + using a `move` before `rmtree`. + + The `move` is not guaranteed to be atomic if moving between + different file systems which can happen when moving to /tmp, + depending on the OS and setup. + + The deletion part is not atomic. + + * https://docs.python.org/3/library/shutil.html#shutil.move + + tmp: bool | Path | str = False + If bool, this defines whether atomic should use the tmp dir + for it's move. Otherwise, a path can be specified to use + + **kwargs + Forwarded to `rmtree` + * https://docs.python.org/3/library/shutil.html#shutil.rmtree + """ + if isinstance(path, str): + path = Path(path) + + if atomic: + if tmp is True: + dir = Path(tempfile.gettempdir()) + uid = uuid.uuid4() + mvpath = dir / f"autosklearn-{path.name}.old_{uid}" + + elif tmp is False: + uid = uuid.uuid4() + mvpath = path.parent / f"{path.name}.old_{uid}" + else: + mvpath = tmp if isinstance(tmp, Path) else Path(tmp) + + shutil.move(str(path), str(mvpath)) + shutil.rmtree(mvpath, **kwargs) + else: + shutil.rmtree(path, **kwargs) diff --git a/autosklearn/util/functional.py b/autosklearn/util/functional.py index 55f38ddf5d..e48f018305 100644 --- a/autosklearn/util/functional.py +++ b/autosklearn/util/functional.py @@ -1,54 +1,188 @@ -from typing import Optional +from __future__ import annotations -import numpy as np +from typing import Callable, Iterable, TypeVar +from functools import reduce +from itertools import chain, tee -def normalize(x: np.ndarray, axis: Optional[int] = None) -> np.ndarray: - """Normalizes an array along an axis +T = TypeVar("T") - Note - ---- - TODO: Only works for positive numbers + +def intersection(*items: Iterable[T]) -> set[T]: + """Does an intersection over all collection of items ..code:: python - x = np.ndarray([ - [1, 1, 1], - [2, 2, 2], - [7, 7, 7], - ]) + ans = intersection(["a", "b", "c"], "ab", ("b", "c")) + + items = [(1, 2, 3), (2, 3), (4, 5)] + ans = intesection(*items) + + Parameters + ---------- + *items : Iterable[T] + A list of lists + + Returns + ------- + Set[T] + The intersection of all items + """ + if len(items) == 0: + return set() + + return set(reduce(lambda s1, s2: set(s1) & set(s2), items, items[0])) - print(normalize(x, axis=0)) - np.ndarray([ - [.1, .1, .1] - [.2, .2, .2] - [.7, .7, .7] - ]) +def cut( + itr: Iterable[T], + where: int | Callable[[T], bool], +) -> tuple[list[T], list[T]]: + """Cut a list in two at a given index or predicate - print(normalize(x, axis=1)) + Parameters + ---------- + lst : Iterable[T] + An iterable of items - np.ndarray([ - [.333, .333, .333] - [.333, .333, .333] - [.333, .333, .333] - ]) + at : int | Callable[[T], bool] + Where to split at, either an index or a predicate + + Returns + ------- + tuple[list[T], list[T]] + The split items + """ + if isinstance(where, int): + lst = list(itr) + return lst[:where], lst[where:] + + a = [] + itr2 = iter(itr) + broke = False + for x in itr2: + if not where(x): + a.append(x) + else: + broke = True + break + + if broke: + return a, [x] + list(itr2) + else: + return a, [] + + +def split( + lst: Iterable[T], + by: Callable[[T], bool], +) -> tuple[list[T], list[T]]: + """Split a list in two based on a predicate. Note ---- - Does not account for 0 sums along an axis + First element can not contain None + + Parameters + ---------- + lst : Iterable[T] + The iterator to split + + by : Callable[[T], bool] + The predicate to split it on + + Returns + ------- + (a: list[T], b: list[T]) + a is where the func is True and b is where the func was False. + """ + a = [] + b = [] + for x in lst: + if by(x): + a.append(x) + else: + b.append(x) + + return a, b + + +def bound(val: float, bounds: tuple[float, float]) -> float: + """Bounds a value between a low and high + + .. code:: python + + x = bound(14, low=0, high=13.1) + # x == 13.1 + + Parameters + ---------- + val : float + The value to bound + + bounds: tuple[foat, float] + The bounds to bound the value between (low, high) + + Returns + ------- + float + The bounded value + """ + return max(bounds[0], min(val, bounds[1])) + + +def findwhere(itr: Iterable[T], func: Callable[[T], bool], *, default: int = -1) -> int: + """Find the index of the next occurence where func is True. Parameters ---------- - x : np.ndarray - The array to normalize + itr : Iterable[T] + The iterable to search over - axis : Optional[int] = None - The axis to normalize across + func : Callable[[T], bool] + The function to use + + default : int = -1 + The default value to give if no value was found where func was True Returns ------- - np.ndarray - The normalized array + int + The first index where func was True """ - return x / x.sum(axis=axis, keepdims=True) + return next((i for i, t in enumerate(itr) if func(t)), default) + + +def pairs(itr: Iterable[T]) -> Iterable[tuple[T, T]]: + """An iterator over pairs of items in the iterator + + ..code:: python + + # Check if sorted + if all(a < b for a, b in pairs(items)): + ... + + Parameters + ---------- + itr : Iterable[T] + An itr of items + + Returns + ------- + Iterable[tuple[T, T]] + An itr of sequential pairs of the items + """ + itr1, itr2 = tee(itr) + + # Skip first item + _ = next(itr2) + + # Check there is a second element + peek = next(itr2, None) + if peek is None: + raise ValueError("Can't create a pair from iterable with 1 item") + + # Put it back in + itr2 = chain([peek], itr2) + + return iter((a, b) for a, b in zip(itr1, itr2)) diff --git a/pyproject.toml b/pyproject.toml index 7fa3bc4111..5a131b5ec9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ [tool.pytest.ini_options] testpaths = ["test"] minversion = "3.7" -#addopts = "--cov=autosklearn" +addopts = "--forked" [tool.coverage.run] branch = true @@ -96,7 +96,6 @@ module = [ "autosklearn.automl", "autosklearn.smbo", "autosklearn.experimental.askl2", - "autosklearn.ensemble_builder", "autosklearn.ensembles.singlebest_ensemble", "autosklearn.ensembles.ensemble_selection", "autosklearn.evaluation", #__init__ diff --git a/setup.py b/setup.py index e182cd716b..84af24be33 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ "pytest-cov", "pytest-xdist", "pytest-timeout", - "pytest-cases", + "pytest-cases>=3.6.11", "mypy", "isort", "black", diff --git a/test/fixtures/automl.py b/test/fixtures/automl.py index abf31d304d..726424b9a0 100644 --- a/test/fixtures/automl.py +++ b/test/fixtures/automl.py @@ -7,7 +7,7 @@ from autosklearn.automl import AutoML, AutoMLClassifier, AutoMLRegressor from autosklearn.automl_common.common.utils.backend import Backend -from pytest import fixture +from pytest import FixtureRequest, fixture from unittest.mock import Mock from test.conftest import DEFAULT_SEED @@ -16,6 +16,7 @@ def _create_automl( automl_type: Type[AutoML] = AutoML, + _id: str | None = None, **kwargs: Any, ) -> AutoML: """ @@ -25,6 +26,10 @@ def _create_automl( automl_type : Type[AutoML] = AutoML The type of AutoML object to use + _id: str | None = None + If no dask client is provided, a unique id is required to create one + so that it can be shut down after the test ends + **kwargs: Any Options to pass on to the AutoML type for construction @@ -39,12 +44,22 @@ def _create_automl( "per_run_time_limit": 5, "seed": DEFAULT_SEED, "n_jobs": 2, + "ensemble_size": 10, + "ensemble_nbest": 10, + "max_models_on_disc": 10, + "initial_configurations_via_metalearning": 5, } + # If a temp directory was explicitly passed, don't delete it automatically + # Normally the `tmp_path` fixutre will delete it anyways + if "temporary_directory" in kwargs: + test_defaults["delete_tmp_folder_after_terminate"] = False + opts: Dict[str, Any] = {**test_defaults, **kwargs} if "dask_client" not in opts: - client = create_test_dask_client(n_workers=opts["n_jobs"]) + assert _id is not None + client = create_test_dask_client(id=_id, n_workers=opts["n_jobs"]) opts["dask_client"] = client auto = automl_type(**opts) @@ -52,21 +67,21 @@ def _create_automl( @fixture -def make_automl() -> Callable[..., Tuple[AutoML, Callable]]: +def make_automl(request: FixtureRequest) -> Callable[..., Tuple[AutoML, Callable]]: """See `_create_automl`""" - yield partial(_create_automl, automl_type=AutoML) + yield partial(_create_automl, automl_type=AutoML, _id=request.node.nodeid) @fixture -def make_automl_classifier() -> Callable[..., AutoMLClassifier]: +def make_automl_classifier(request: FixtureRequest) -> Callable[..., AutoMLClassifier]: """See `_create_automl`""" - yield partial(_create_automl, automl_type=AutoMLClassifier) + yield partial(_create_automl, automl_type=AutoMLClassifier, _id=request.node.nodeid) @fixture -def make_automl_regressor() -> Callable[..., AutoMLRegressor]: +def make_automl_regressor(request: FixtureRequest) -> Callable[..., AutoMLRegressor]: """See `_create_automl`""" - yield partial(_create_automl, automl_type=AutoMLRegressor) + yield partial(_create_automl, automl_type=AutoMLRegressor, _id=request.node.nodeid) class AutoMLStub(AutoML): diff --git a/test/fixtures/backend.py b/test/fixtures/backend.py index 3ee4626199..2dfcc4d472 100644 --- a/test/fixtures/backend.py +++ b/test/fixtures/backend.py @@ -1,12 +1,64 @@ -from typing import Callable, Union +from __future__ import annotations + +from typing import Callable import os +from distutils.dir_util import copy_tree from pathlib import Path from autosklearn.automl_common.common.utils.backend import Backend, create +from autosklearn.data.xy_data_manager import XYDataManager from pytest import fixture +HERE = Path(__file__).parent.resolve() +DATAPATH = HERE.parent / "data" + + +def copy_backend(old: Backend | Path | str, new: Backend | Path | str) -> Backend: + """Transfers a backend to a new path + + Parameters + ---------- + old_backend: Backend | Path | str + The backend to transfer from + + new_path: Backend | Path | str + Where to place the new backend + + Returns + ------- + Backend + The new backend with the contents of the old + """ + if isinstance(new, str): + new_backend = create( + temporary_directory=new, + output_directory=None, + prefix="auto-sklearn", + ) + elif isinstance(new, Path): + new_backend = create( + temporary_directory=str(new), + output_directory=None, + prefix="auto-sklearn", + ) + else: + new_backend = new + + dst = new_backend.temporary_directory + + if isinstance(old, str): + src = old + elif isinstance(old, Path): + src = str(old) + else: + src = old.temporary_directory + + copy_tree(src, dst) + + return new_backend + # TODO Update to return path once everything can use a path @fixture @@ -26,7 +78,7 @@ def tmp_dir(tmp_path: Path) -> str: @fixture -def make_backend() -> Callable[..., Backend]: +def make_backend(tmp_path: Path) -> Callable[..., Backend]: """Make a backend Parameters @@ -34,21 +86,43 @@ def make_backend() -> Callable[..., Backend]: path: Union[str, Path] The path to place the backend at + template: Optional[Path] = None + Setup with a pre-existing layout if not None + Returns ------- Backend The created backend object """ # TODO redo once things use paths - def _make(path: Union[str, Path]) -> Backend: - _path = Path(path) if not isinstance(path, Path) else path - assert not _path.exists() - - backend = create( - temporary_directory=str(_path), - output_directory=None, - prefix="auto-sklearn", - ) + def _make( + path: str | Path | None = None, + template: Path | Backend | None = None, + datamanager: XYDataManager | None = None, + ) -> Backend: + if template is not None and datamanager is not None: + raise ValueError("Does not support template and datamanager") + + if path is None: + _path = Path(tmp_path) / "backend" + elif isinstance(path, str): + _path = Path(path) + else: + _path = path + + assert not _path.exists(), "Path exists, Try passing path / 'backend'" + + if template is not None: + backend = copy_backend(old=template, new=_path) + else: + backend = create( + temporary_directory=str(_path), + output_directory=None, + prefix="auto-sklearn", + ) + + if datamanager is not None: + backend.save_datamanager(datamanager) return backend diff --git a/test/fixtures/caching.py b/test/fixtures/caching.py index 936d7c35a9..d8be21aca4 100644 --- a/test/fixtures/caching.py +++ b/test/fixtures/caching.py @@ -1,20 +1,23 @@ from __future__ import annotations -from typing import Any, Callable, Optional +from typing import Any, Callable +import os import pickle import shutil -import traceback +import tempfile from functools import partial from pathlib import Path -from autosklearn.automl import AutoML +from filelock import FileLock from pytest import FixtureRequest from pytest_cases import fixture from test.conftest import AUTOSKLEARN_CACHE_NAME +LOCK_DIR = Path(tempfile.gettempdir()) + class Cache: """Used for the below fixtures. @@ -73,8 +76,10 @@ def __init__(self, key: str, cache_dir: Path, verbose: int = 0): verbose : int = 0 Whether to be verbose or not. Currently only has one level (> 0) """ + self.key = key self.dir = cache_dir / key self.verbose = verbose > 0 + self._lock: FileLock = None def items(self) -> list[Path]: """Get any paths associated to items in this dir""" @@ -87,7 +92,7 @@ def path(self, name: str) -> Path: """Path to an item for this cache""" return self.dir / name - def _load(self, name: str) -> Any: + def load(self, name: str) -> Any: """Load an item from the cache with a given name""" if self.verbose: print(f"Loading cached item {self.path(name)}") @@ -95,7 +100,7 @@ def _load(self, name: str) -> Any: with self.path(name).open("rb") as f: return pickle.load(f) - def _save(self, item: Any, name: str) -> None: + def save(self, item: Any, name: str) -> None: """Dump an item to cache with a name""" if self.verbose: print(f"Saving cached item {self.path(name)}") @@ -108,53 +113,28 @@ def reset(self) -> None: shutil.rmtree(self.dir) self.dir.mkdir() + def __enter__(self): + if int(os.environ.get("PYTEST_XDIST_WORKER_COUNT", 1)) <= 1: + return self + else: + path = LOCK_DIR / f"{self.key}.lock" + self._lock = FileLock(path) + self._lock.acquire(poll_interval=1.0) + if self.verbose: + print(f"locked cache {path}") -class AutoMLCache(Cache): - def save(self, model: AutoML) -> None: - """Save the model""" - self._save(model, "model") - - def model(self) -> Optional[AutoML]: - """Returns the saved model if it can. - - In the case of an issue loading an existing model file, it will delete - this cache item. - """ - if "model" not in self: - return None - - # Try to load the model, if there was an issue, delete all cached items - # for the model and return None - try: - model = self._load("model") - except Exception: - model = None - print(traceback.format_exc()) - self.reset() - finally: - return model + return self - def backend_path(self) -> Path: - """The path for the backend of the automl model""" - return self.path("backend") + def __exit__(self, *args, **kwargs): + if self._lock is not None: + self._lock.release() @fixture -def cache(request: FixtureRequest) -> Callable[[str], Cache]: +def make_cache(request: FixtureRequest) -> Callable[[str], Cache]: """Gives the access to a cache.""" pytest_cache = request.config.cache assert pytest_cache is not None cache_dir = pytest_cache.mkdir(AUTOSKLEARN_CACHE_NAME) return partial(Cache, cache_dir=cache_dir) - - -@fixture -def automl_cache(request: FixtureRequest) -> Callable[[str], AutoMLCache]: - """Gives access to an automl cache""" - pytest_cache = request.config.cache - assert pytest_cache is not None - - cache_dir = pytest_cache.mkdir(AUTOSKLEARN_CACHE_NAME) - verbosity = request.config.getoption("verbose") - return partial(AutoMLCache, cache_dir=cache_dir, verbose=verbosity) diff --git a/test/fixtures/dask.py b/test/fixtures/dask.py index 0c1f112800..40a113e9d6 100644 --- a/test/fixtures/dask.py +++ b/test/fixtures/dask.py @@ -2,6 +2,8 @@ from typing import Callable +from functools import partial + from dask.distributed import Client, get_client from pytest import FixtureRequest, fixture @@ -13,31 +15,36 @@ @fixture(autouse=True) def clean_up_any_dask_clients(request: FixtureRequest) -> None: """Auto injected fixture to close dask clients after each test""" - yield - if any(active_clients): - for adr in list(active_clients.keys()): - if request.config.getoption("verbose") > 1: - print(f"\nFixture closing dask_client at {adr}") + yield # Give control to the function + + # Initiate cleanup + id = request.node.nodeid + if id in active_clients: + if request.config.getoption("verbose") > 1: + print(f"\nFixture closing dask_client for {id}") - close = active_clients[adr] - close() - del active_clients[adr] + close = active_clients[id] + close() -def create_test_dask_client(n_workers: int = 2) -> Client: +def create_test_dask_client( + id: str, + n_workers: int = 2, +) -> Client: """Factory to make a Dask client and a function to close it - them + them. Parameters ---------- + id: str + An id to associate with this dask client + n_workers: int = 2 - inside asklea - inside AutoML. Returns ------- - Client, Callable - The client and a function to call to close that client + Client + The client """ # Workers are in subprocesses to not create deadlocks with the pynisher # and logging. @@ -57,13 +64,13 @@ def close() -> None: except Exception: pass - active_clients[adr] = close + active_clients[id] = close return client @fixture -def make_dask_client() -> Callable[[int], Client]: +def make_dask_client(request: FixtureRequest) -> Callable[[int], Client]: """Factory to make a Dask client and a function to close it Parameters @@ -76,7 +83,7 @@ def make_dask_client() -> Callable[[int], Client]: Client, Callable The client and a function to call to close that client """ - return create_test_dask_client + return partial(create_test_dask_client, id=request.node.nodeid) # TODO remove in favour of make_dask_client diff --git a/test/fixtures/datasets.py b/test/fixtures/datasets.py index 39d948e5a9..569241ac51 100644 --- a/test/fixtures/datasets.py +++ b/test/fixtures/datasets.py @@ -17,7 +17,6 @@ from autosklearn.data.validation import SUPPORTED_FEAT_TYPES, SUPPORTED_TARGET_TYPES from autosklearn.data.xy_data_manager import XYDataManager from autosklearn.pipeline.util import get_dataset -from autosklearn.util.functional import normalize from pytest import fixture @@ -41,7 +40,6 @@ def astype( @fixture def make_sklearn_dataset() -> Callable: """ - Parameters ---------- name : str = "iris" @@ -62,6 +60,12 @@ def make_sklearn_dataset() -> Callable: make_binary : bool = False Whether to force the data into being binary + task: Optional[int] = None + The task of the data, required for the datamanager + + feat_type: Optional[Dict | str] = None + The features types for the data if making a XYDataManager + as_datamanager: bool = False Wether to return the information as an XYDataManager @@ -77,9 +81,9 @@ def _make( train_size_maximum: int = 150, make_multilabel: bool = False, make_binary: bool = False, - as_datamanager: bool = False, task: Optional[int] = None, feat_type: Optional[Dict | str] = None, + as_datamanager: bool = False, ) -> Any: X, y, Xt, yt = get_dataset( dataset=name, @@ -93,6 +97,7 @@ def _make( if not as_datamanager: return (X, y, Xt, yt) else: + assert task is not None and feat_type is not None if isinstance(feat_type, str): feat_type = {i: feat_type for i in range(X.shape[1])} @@ -128,7 +133,7 @@ def _make_binary_data( weights = np.ones_like(classes) / len(classes) assert len(weights) == len(classes) - weights = normalize(np.asarray(weights)) + weights = weights / np.sum(weights, keepdims=True) X = rs.rand(*dims) y = rs.choice([0, 1], dims[0], p=weights) @@ -157,7 +162,7 @@ def _make_multiclass_data( weights = np.ones_like(classes) / len(classes) assert len(weights) == len(classes) - weights = normalize(np.asarray(weights)) + weights = weights / np.sum(weights, keepdims=True) X = rs.rand(*dims) y = rs.choice(classes, dims[0], p=weights) @@ -188,7 +193,7 @@ def _make_multilabel_data( weights = np.ones(classes.shape[0]) / len(classes) assert len(weights) == len(classes) - weights = normalize(np.asarray(weights)) + weights = weights / np.sum(weights, keepdims=True) X = rs.rand(*dims) diff --git a/test/fixtures/ensemble_building.py b/test/fixtures/ensemble_building.py new file mode 100644 index 0000000000..42dd7fbb9a --- /dev/null +++ b/test/fixtures/ensemble_building.py @@ -0,0 +1,223 @@ +from __future__ import annotations + +from typing import Any, Callable + +import math +import pickle +import sys +from pathlib import Path + +import numpy as np + +from autosklearn.automl import AutoML +from autosklearn.automl_common.common.utils.backend import Backend +from autosklearn.constants import BINARY_CLASSIFICATION +from autosklearn.data.xy_data_manager import XYDataManager +from autosklearn.ensemble_building import EnsembleBuilder, EnsembleBuilderManager, Run +from autosklearn.metrics import Scorer, accuracy + +from pytest_cases import fixture + +from test.conftest import DEFAULT_SEED + + +@fixture +def make_run(tmp_path: Path) -> Callable[..., Run]: + def _make( + id: int | None = None, + dummy: bool = False, + backend: Backend | None = None, + seed: int = DEFAULT_SEED, + modified: bool = False, + budget: float = 0.0, + loss: float | None = None, + model_size: int | None = None, + mem_usage: float | None = None, + predictions: str | list[str] | dict[str, np.ndarray] | None = "ensemble", + ) -> Run: + if dummy: + assert id is None + id = 1 + loss = loss if loss is not None else 50_000 + + if id is None: + id = np.random.randint(sys.maxsize) + + model_id = f"{seed}_{id}_{budget}" + + # Use this backend to set things up + if backend is not None: + runsdir = Path(backend.get_runs_directory()) + else: + runsdir = tmp_path + + dir = runsdir / model_id + + if not dir.exists(): + dir.mkdir() + + # Populate if None + if isinstance(predictions, str): + predictions = [predictions] + + # Convert to dict + if isinstance(predictions, list): + preds = np.asarray([[0]]) + predictions = {kind: preds for kind in predictions} + + # Write them + if isinstance(predictions, dict): + for kind, val in predictions.items(): + fname = f"predictions_{kind}_{seed}_{id}_{budget}.npy" + with (dir / fname).open("wb") as f: + np.save(f, val) + + run = Run(dir) + + if modified: + assert predictions is not None, "Can only modify if predictions" + for k, v in run.recorded_mtimes.items(): + run.recorded_mtimes[k] = v + 1e-4 + + if loss is not None: + run.loss = loss + + if mem_usage is not None: + run._mem_usage = mem_usage + + # MB + if model_size is not None: + n_bytes = int(model_size * math.pow(1024, 2)) + model_path = dir / f"{seed}.{id}.{budget}.model" + with model_path.open("wb") as f: + f.write(bytearray(n_bytes)) + + return run + + return _make + + +@fixture +def make_ensemble_builder( + make_backend: Callable[..., Backend], + make_sklearn_dataset: Callable[..., XYDataManager], +) -> Callable[..., EnsembleBuilder]: + def _make( + *, + automl: AutoML | None = None, + previous_candidates: list[Run] | None = None, + backend: Backend | None = None, + dataset_name: str = "TEST", + task_type: int = BINARY_CLASSIFICATION, + metric: Scorer = accuracy, + **kwargs: Any, + ) -> EnsembleBuilder: + + if automl: + backend = automl._backend + dataset_name = automl._dataset_name + task_type = automl._task + metric = automl._metrics[0] + kwargs = { + "ensemble_size": automl._ensemble_size, + "ensemble_nbest": automl._ensemble_nbest, + "max_models_on_disc": automl._max_models_on_disc, + "precision": automl.precision, + "read_at_most": automl._read_at_most, + "memory_limit": automl._memory_limit, + "logger_port": automl._logger_port, + } + + if backend is None: + backend = make_backend() + + # If there's no datamanager, just try populate it with some generic one, + if not Path(backend._get_datamanager_pickle_filename()).exists(): + datamanager = make_sklearn_dataset( + name="breast_cancer", + task=BINARY_CLASSIFICATION, + feat_type="numerical", # They're all numerical + as_datamanager=True, + ) + backend.save_datamanager(datamanager) + + # Annoyingly, some places use datamanger, some places use the file + # Hence, we take the y_train of the datamanager and use that as the + # the targets + if "Y_train" in datamanager.data: + backend.save_targets_ensemble(datamanager.data["Y_train"]) + + builder = EnsembleBuilder( + backend=backend, + dataset_name=dataset_name, + task_type=task_type, + metric=metric, + **kwargs, + ) + + if previous_candidates is not None: + with builder.previous_candidates_path.open("wb") as f: + pickle.dump({run.id: run for run in previous_candidates}, f) + + return builder + + return _make + + +@fixture +def make_ensemble_builder_manager( + make_backend: Callable[..., Backend], + make_sklearn_dataset: Callable[..., XYDataManager], +) -> Callable[..., EnsembleBuilderManager]: + """Use `make_run` to create runs for this manager + + .. code:: python + + def test_x(make_run, make_ensemble_builder_manager): + manager = make_ensemble_builder(...) + + # Will use the backend to place runs correctly + runs = make_run(predictions={"ensemble": ...}, backend=manager.backend) + + # ... test stuff + + + """ + + def _make( + *, + backend: Backend | None = None, + dataset_name: str = "TEST", + task: int = BINARY_CLASSIFICATION, + metric: Scorer = accuracy, + random_state: int | np.random.RandomState | None = DEFAULT_SEED, + **kwargs: Any, + ) -> EnsembleBuilderManager: + if backend is None: + backend = make_backend() + + if not Path(backend._get_datamanager_pickle_filename()).exists(): + datamanager = make_sklearn_dataset( + name="breast_cancer", + task=BINARY_CLASSIFICATION, + feat_type="numerical", # They're all numerical + as_datamanager=True, + ) + backend.save_datamanager(datamanager) + + # Annoyingly, some places use datamanger, some places use the file + # Hence, we take the y_train of the datamanager and use that as the + # the targets + if "Y_train" in datamanager.data: + backend.save_targets_ensemble(datamanager.data["Y_train"]) + + return EnsembleBuilderManager( + backend=backend, + dataset_name=dataset_name, + task=task, + metric=metric, + random_state=random_state, + **kwargs, + ) + + return _make diff --git a/test/mocks/logging.py b/test/mocks/logging.py index e61ca2c870..50e6578489 100644 --- a/test/mocks/logging.py +++ b/test/mocks/logging.py @@ -30,9 +30,11 @@ def __init__( self.port = port or MOCKPORT # Overwrite the logging implementations with mocks - attrs = ["debug", "info", "warning", "error", "exception", "critical", "log"] - for attr in attrs: - setattr(self, attr, Mock(return_value=None)) - - # This mock logger is enabled for all levels - setattr(self, "isEnabledFor", Mock(return_value=True)) + self.debug = Mock(return_value=None) # type: ignore + self.info = Mock(return_value=None) # type: ignore + self.warning = Mock(return_value=None) # type: ignore + self.error = Mock(return_value=None) # type: ignore + self.exception = Mock(return_value=None) # type: ignore + self.critical = Mock(return_value=None) # type: ignore + self.log = Mock(return_value=None) # type: ignore + self.isEnabledFor = Mock(return_value=True) # type: ignore diff --git a/test/test_automl/cases.py b/test/test_automl/cases.py index a3dfa00dba..a6779d1b08 100644 --- a/test/test_automl/cases.py +++ b/test/test_automl/cases.py @@ -14,7 +14,10 @@ {fitted} - If the automl case has been fitted {cv, holdout} - Whether explicitly cv or holdout was used {no_ensemble} - Fit with no ensemble size + {cached} - If the resulting case is then cached """ +from __future__ import annotations + from typing import Callable, Tuple from pathlib import Path @@ -22,10 +25,12 @@ import numpy as np from autosklearn.automl import AutoMLClassifier, AutoMLRegressor +from autosklearn.automl_common.common.utils.backend import Backend from pytest_cases import case, parametrize -from test.fixtures.caching import AutoMLCache +from test.fixtures.backend import copy_backend +from test.fixtures.caching import Cache @case(tags=["classifier"]) @@ -53,148 +58,180 @@ def case_regressor( # ################################### # The following are fitted and cached # ################################### -@case(tags=["classifier", "fitted", "holdout"]) +@case(tags=["classifier", "fitted", "holdout", "cached"]) @parametrize("dataset", ["iris"]) -def case_classifier_fitted_holdout( - automl_cache: Callable[[str], AutoMLCache], +def case_classifier_fitted_holdout_iterative( dataset: str, + make_cache: Callable[[str], Cache], + make_backend: Callable[..., Backend], make_automl_classifier: Callable[..., AutoMLClassifier], make_sklearn_dataset: Callable[..., Tuple[np.ndarray, ...]], ) -> AutoMLClassifier: """Case of a holdout fitted classifier""" resampling_strategy = "holdout-iterative-fit" - cache = automl_cache(f"case_classifier_{resampling_strategy}_{dataset}") + key = f"case_classifier_{resampling_strategy}_{dataset}" + + # This locks the cache for this item while we check, required for pytest-xdist + with make_cache(key) as cache: + if "model" not in cache: + # Make the model in the cache + model = make_automl_classifier( + temporary_directory=cache.path("backend"), + delete_tmp_folder_after_terminate=False, + resampling_strategy=resampling_strategy, + ) - model = cache.model() - if model is not None: - return model + X, y, Xt, yt = make_sklearn_dataset(name=dataset) + model.fit(X, y, dataset_name=dataset) - X, y, Xt, yt = make_sklearn_dataset(name=dataset) + # Save the model + cache.save(model, "model") - model = make_automl_classifier( - temporary_directory=cache.path("backend"), - delete_tmp_folder_after_terminate=False, - resampling_strategy=resampling_strategy, - ) - model.fit(X, y, dataset_name=dataset) + # Try the model from the cache + model = cache.load("model") + model._backend = copy_backend(old=model._backend, new=make_backend()) - cache.save(model) return model -@case(tags=["classifier", "fitted", "cv"]) +@case(tags=["classifier", "fitted", "cv", "cached"]) @parametrize("dataset", ["iris"]) def case_classifier_fitted_cv( - automl_cache: Callable[[str], AutoMLCache], + make_cache: Callable[[str], Cache], dataset: str, + make_backend: Callable[..., Backend], make_automl_classifier: Callable[..., AutoMLClassifier], make_sklearn_dataset: Callable[..., Tuple[np.ndarray, ...]], ) -> AutoMLClassifier: """Case of a fitted cv AutoMLClassifier""" resampling_strategy = "cv" - cache = automl_cache(f"case_classifier_{resampling_strategy}_{dataset}") - - model = cache.model() - if model is not None: - return model - - X, y, Xt, yt = make_sklearn_dataset(name=dataset) - model = make_automl_classifier( - time_left_for_this_task=60, # We include some extra time for cv - per_run_time_limit=10, - resampling_strategy=resampling_strategy, - temporary_directory=cache.path("backend"), - delete_tmp_folder_after_terminate=False, - ) - model.fit(X, y, dataset_name=dataset) - - cache.save(model) + + key = f"case_classifier_{resampling_strategy}_{dataset}" + + # This locks the cache for this item while we check, required for pytest-xdist + with make_cache(key) as cache: + if "model" not in cache: + model = make_automl_classifier( + resampling_strategy=resampling_strategy, + temporary_directory=cache.path("backend"), + delete_tmp_folder_after_terminate=False, + time_left_for_this_task=60, # Give some more for CV + per_run_time_limit=10, + ) + + X, y, Xt, yt = make_sklearn_dataset(name=dataset) + model.fit(X, y, dataset_name=dataset) + + cache.save(model, "model") + + # Try the model from the cache + model = cache.load("model") + model._backend = copy_backend(old=model._backend, new=make_backend()) + return model -@case(tags=["regressor", "fitted", "holdout"]) +@case(tags=["regressor", "fitted", "holdout", "cached"]) @parametrize("dataset", ["boston"]) def case_regressor_fitted_holdout( - automl_cache: Callable[[str], AutoMLCache], + make_cache: Callable[[str], Cache], dataset: str, + make_backend: Callable[..., Backend], make_automl_regressor: Callable[..., AutoMLRegressor], make_sklearn_dataset: Callable[..., Tuple[np.ndarray, ...]], ) -> AutoMLRegressor: - """Case of fitted regressor with cv resampling""" + """Case of fitted regressor with holdout""" resampling_strategy = "holdout" - cache = automl_cache(f"case_regressor_{resampling_strategy}_{dataset}") - model = cache.model() - if model is not None: - return model + key = f"case_regressor_{resampling_strategy}_{dataset}" + + # This locks the cache for this item while we check, required for pytest-xdist + with make_cache(key) as cache: + if "model" not in cache: + model = make_automl_regressor( + temporary_directory=cache.path("backend"), + resampling_strategy=resampling_strategy, + delete_tmp_folder_after_terminate=False, + ) + + X, y, Xt, yt = make_sklearn_dataset(name=dataset) + model.fit(X, y, dataset_name=dataset) + + cache.save(model, "model") - X, y, Xt, yt = make_sklearn_dataset(name=dataset) - model = make_automl_regressor( - resampling_strategy=resampling_strategy, - temporary_directory=cache.path("backend"), - delete_tmp_folder_after_terminate=False, - ) - model.fit(X, y, dataset_name=dataset) + # Try the model from the cache + model = cache.load("model") + model._backend = copy_backend(old=model._backend, new=make_backend()) - cache.save(model) return model -@case(tags=["regressor", "fitted", "cv"]) +@case(tags=["regressor", "fitted", "cv", "cached"]) @parametrize("dataset", ["boston"]) def case_regressor_fitted_cv( - automl_cache: Callable[[str], AutoMLCache], + make_cache: Callable[[str], Cache], dataset: str, + make_backend: Callable[..., Backend], make_automl_regressor: Callable[..., AutoMLRegressor], make_sklearn_dataset: Callable[..., Tuple[np.ndarray, ...]], ) -> AutoMLRegressor: """Case of fitted regressor with cv resampling""" resampling_strategy = "cv" + key = f"case_regressor_{resampling_strategy}_{dataset}" - cache = automl_cache(f"case_regressor_{resampling_strategy}_{dataset}") - model = cache.model() - if model is not None: - return model + # This locks the cache for this item while we check, required for pytest-xdist + with make_cache(key) as cache: - X, y, Xt, yt = make_sklearn_dataset(name=dataset) + if "model" not in cache: + model = make_automl_regressor( + temporary_directory=cache.path("backend"), + resampling_strategy=resampling_strategy, + delete_tmp_folder_after_terminate=False, + time_left_for_this_task=60, # Some extra time for CV + per_run_time_limit=10, + ) - model = make_automl_regressor( - time_left_for_this_task=60, # Give some extra time for cv - per_run_time_limit=10, - temporary_directory=cache.path("backend"), - delete_tmp_folder_after_terminate=False, - resampling_strategy=resampling_strategy, - ) - model.fit(X, y, dataset_name=dataset) + X, y, Xt, yt = make_sklearn_dataset(name=dataset) + model.fit(X, y, dataset_name=dataset) + + cache.save(model, "model") + + # Try the model from the cache + model = cache.load("model") + model._backend = copy_backend(old=model._backend, new=make_backend()) - cache.save(model) return model -@case(tags=["classifier", "fitted", "no_ensemble"]) +@case(tags=["classifier", "fitted", "no_ensemble", "cached"]) @parametrize("dataset", ["iris"]) def case_classifier_fitted_no_ensemble( - automl_cache: Callable[[str], AutoMLCache], + make_cache: Callable[[str], Cache], dataset: str, + make_backend: Callable[..., Backend], make_automl_classifier: Callable[..., AutoMLClassifier], make_sklearn_dataset: Callable[..., Tuple[np.ndarray, ...]], ) -> AutoMLClassifier: """Case of a fitted classifier but enemble_size was set to 0""" - cache = automl_cache(f"case_classifier_fitted_no_ensemble_{dataset}") + key = f"case_classifier_fitted_no_ensemble_{dataset}" + + # This locks the cache for this item while we check, required for pytest-xdist + with make_cache(key) as cache: + + if "model" not in cache: + model = make_automl_classifier( + temporary_directory=cache.path("backend"), + delete_tmp_folder_after_terminate=False, + ensemble_size=0, + ) - model = cache.model() - if model is not None: - return model + X, y, Xt, yt = make_sklearn_dataset(name=dataset) + model.fit(X, y, dataset_name=dataset) - X, y, Xt, yt = make_sklearn_dataset(name=dataset) + cache.save(model, "model") - model = make_automl_classifier( - temporary_directory=cache.path("backend"), - delete_tmp_folder_after_terminate=False, - ensemble_size=0, - ) - model.fit(X, y, dataset_name=dataset) + model = cache.load("model") + model._backend = copy_backend(old=model._backend, new=make_backend()) - cache.save(model) return model diff --git a/test/test_automl/test_outputs.py b/test/test_automl/test_outputs.py index 458347c145..ba005019bd 100644 --- a/test/test_automl/test_outputs.py +++ b/test/test_automl/test_outputs.py @@ -1,6 +1,7 @@ from pathlib import Path from autosklearn.automl import AutoML +from autosklearn.ensemble_building.builder import CANDIDATES_FILENAME from pytest import mark from pytest_cases import parametrize_with_cases @@ -67,10 +68,9 @@ def test_paths_created_with_ensemble(automl: AutoML) -> None: expected = [ partial / fixture for fixture in ( - "ensemble_read_preds.pkl", - "ensemble_read_losses.pkl", "ensembles", "ensemble_history.json", + CANDIDATES_FILENAME, ) ] @@ -80,6 +80,12 @@ def test_paths_created_with_ensemble(automl: AutoML) -> None: @parametrize_with_cases("automl", cases=cases, has_tag="fitted") def test_at_least_one_model_and_predictions(automl: AutoML) -> None: + """ + Expects + ------- + * There should be at least one models saved + * Each model saved should have predictions for the ensemble + """ assert automl._backend is not None runs_dir = Path(automl._backend.get_runs_directory()) @@ -100,6 +106,11 @@ def test_at_least_one_model_and_predictions(automl: AutoML) -> None: @parametrize_with_cases("automl", cases=cases, filter=has_ensemble) def test_at_least_one_ensemble(automl: AutoML) -> None: + """ + Expects + ------- + * There should be at least one ensemble generated + """ assert automl._backend is not None ens_dir = Path(automl._backend.get_ensemble_dir()) diff --git a/test/test_automl/test_post_fit.py b/test/test_automl/test_post_fit.py index 674a452d02..7cc6dafb6e 100644 --- a/test/test_automl/test_post_fit.py +++ b/test/test_automl/test_post_fit.py @@ -20,7 +20,10 @@ def test_holdout_loaded_models(automl: AutoML) -> None: * The cv_models_ attr should remain None """ assert automl.ensemble_ is not None - assert set(automl.models_.keys()) == set(automl.ensemble_.identifiers_) + + ensemble_identifiers = automl.ensemble_.get_selected_model_identifiers() + + assert set(automl.models_.keys()) == set(ensemble_identifiers) assert automl.cv_models_ is None @@ -39,8 +42,11 @@ def test_cv_loaded_models(automl: AutoML) -> None: * The cv_models_ should contain the identifiers of what's in the ensemble """ assert automl.ensemble_ is not None - assert set(automl.models_.keys()) == set(automl.ensemble_.identifiers_) - assert set(automl.cv_models_.keys()) == set(automl.ensemble_.identifiers_) + + ensemble_identifiers = automl.ensemble_.get_selected_model_identifiers() + + assert set(automl.models_.keys()) == set(ensemble_identifiers) + assert set(automl.cv_models_.keys()) == set(ensemble_identifiers) @parametrize_with_cases("automl", cases=cases, has_tag=["fitted", "no_ensemble"]) diff --git a/test/test_ensemble_builder/data/.auto-sklearn/predictions_ensemble_true.npy b/test/test_ensemble_builder/data/.auto-sklearn/predictions_ensemble_true.npy deleted file mode 100644 index fee3160c86..0000000000 Binary files a/test/test_ensemble_builder/data/.auto-sklearn/predictions_ensemble_true.npy and /dev/null differ diff --git a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_1_0.0/0.1.0.0.model b/test/test_ensemble_builder/data/.auto-sklearn/runs/0_1_0.0/0.1.0.0.model deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy b/test/test_ensemble_builder/data/.auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy deleted file mode 100644 index 1b2320113d..0000000000 Binary files a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy and /dev/null differ diff --git a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_1_0.0/predictions_test_0_1_0.0.npy b/test/test_ensemble_builder/data/.auto-sklearn/runs/0_1_0.0/predictions_test_0_1_0.0.npy deleted file mode 100644 index 1b2320113d..0000000000 Binary files a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_1_0.0/predictions_test_0_1_0.0.npy and /dev/null differ diff --git a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_1_0.0/predictions_valid_0_1_0.0.npy b/test/test_ensemble_builder/data/.auto-sklearn/runs/0_1_0.0/predictions_valid_0_1_0.0.npy deleted file mode 100644 index 1b2320113d..0000000000 Binary files a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_1_0.0/predictions_valid_0_1_0.0.npy and /dev/null differ diff --git a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_2_0.0/0.2.0.0.model b/test/test_ensemble_builder/data/.auto-sklearn/runs/0_2_0.0/0.2.0.0.model deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy b/test/test_ensemble_builder/data/.auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy deleted file mode 100644 index fee3160c86..0000000000 Binary files a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy and /dev/null differ diff --git a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.np b/test/test_ensemble_builder/data/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.np deleted file mode 100644 index fee3160c86..0000000000 Binary files a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.np and /dev/null differ diff --git a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.npy b/test/test_ensemble_builder/data/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.npy deleted file mode 100644 index fee3160c86..0000000000 Binary files a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.npy and /dev/null differ diff --git a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_2_0.0/predictions_valid_0_2_0.0.npy b/test/test_ensemble_builder/data/.auto-sklearn/runs/0_2_0.0/predictions_valid_0_2_0.0.npy deleted file mode 100644 index fee3160c86..0000000000 Binary files a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_2_0.0/predictions_valid_0_2_0.0.npy and /dev/null differ diff --git a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_3_100.0/0.3.0.0.model b/test/test_ensemble_builder/data/.auto-sklearn/runs/0_3_100.0/0.3.0.0.model deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_3_100.0/0.3.100.0.model b/test/test_ensemble_builder/data/.auto-sklearn/runs/0_3_100.0/0.3.100.0.model deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy b/test/test_ensemble_builder/data/.auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy deleted file mode 100644 index fee3160c86..0000000000 Binary files a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy and /dev/null differ diff --git a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_3_100.0/predictions_test_0_3_100.0.npy b/test/test_ensemble_builder/data/.auto-sklearn/runs/0_3_100.0/predictions_test_0_3_100.0.npy deleted file mode 100644 index fee3160c86..0000000000 Binary files a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_3_100.0/predictions_test_0_3_100.0.npy and /dev/null differ diff --git a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_3_100.0/predictions_valid_0_3_100.0.npy b/test/test_ensemble_builder/data/.auto-sklearn/runs/0_3_100.0/predictions_valid_0_3_100.0.npy deleted file mode 100644 index fee3160c86..0000000000 Binary files a/test/test_ensemble_builder/data/.auto-sklearn/runs/0_3_100.0/predictions_valid_0_3_100.0.npy and /dev/null differ diff --git a/test/test_ensemble_builder/ensemble_utils.py b/test/test_ensemble_builder/ensemble_utils.py deleted file mode 100644 index 7a3cd7f252..0000000000 --- a/test/test_ensemble_builder/ensemble_utils.py +++ /dev/null @@ -1,116 +0,0 @@ -import os -import shutil - -import numpy as np - -from autosklearn.automl_common.common.ensemble_building.abstract_ensemble import ( - AbstractEnsemble, -) -from autosklearn.ensemble_builder import EnsembleBuilder -from autosklearn.metrics import make_scorer - -import unittest -import unittest.mock - - -def scorer_function(a, b): - return 0.9 - - -MockMetric = make_scorer("mock", scorer_function) - - -class BackendMock(object): - def __init__(self, target_directory): - this_directory = os.path.abspath(os.path.dirname(__file__)) - shutil.copytree( - os.path.join(this_directory, "data"), os.path.join(target_directory) - ) - self.temporary_directory = target_directory - self.internals_directory = os.path.join( - self.temporary_directory, ".auto-sklearn" - ) - - def load_datamanager(self): - manager = unittest.mock.Mock() - manager.__reduce__ = lambda self: (unittest.mock.MagicMock, ()) - array = np.load( - os.path.join( - self.temporary_directory, - ".auto-sklearn", - "runs", - "0_3_100.0", - "predictions_test_0_3_100.0.npy", - ) - ) - manager.data.get.return_value = array - return manager - - def load_targets_ensemble(self): - with open( - os.path.join( - self.temporary_directory, - ".auto-sklearn", - "predictions_ensemble_true.npy", - ), - "rb", - ) as fp: - y = np.load(fp, allow_pickle=True) - return y - - def save_ensemble(self, ensemble, index_run, seed): - return - - def save_predictions_as_txt(self, predictions, subset, idx, prefix, precision): - return - - def get_runs_directory(self) -> str: - return os.path.join(self.temporary_directory, ".auto-sklearn", "runs") - - def get_numrun_directory(self, seed: int, num_run: int, budget: float) -> str: - return os.path.join( - self.get_runs_directory(), "%d_%d_%s" % (seed, num_run, budget) - ) - - def get_model_filename(self, seed: int, idx: int, budget: float) -> str: - return "%s.%s.%s.model" % (seed, idx, budget) - - -def compare_read_preds(read_preds1, read_preds2): - """ - compares read_preds attribute. An alternative to - assert Dict Equal as it contains np arrays, so we have - to use np testing utilities accordingly - """ - - # Both arrays should have the same splits - assert set(read_preds1.keys()) == set(read_preds2.keys()) - - for k, v in read_preds1.items(): - - # Each split should have the same elements - assert set(read_preds1[k].keys()) == set(read_preds2[k].keys()) - - # This level contains the scores/ensmebles/etc - for actual_k, actual_v in read_preds1[k].items(): - - # If it is a numpy array, make sure it is the same - if type(actual_v) is np.ndarray: - np.testing.assert_array_equal(actual_v, read_preds2[k][actual_k]) - else: - assert actual_v == read_preds2[k][actual_k] - - -class EnsembleBuilderMemMock(EnsembleBuilder): - def fit_ensemble(self, selected_keys): - return True - - def predict( - self, - set_: str, - ensemble: AbstractEnsemble, - selected_keys: list, - n_preds: int, - index_run: int, - ): - np.ones([10000000, 1000000]) diff --git a/test/test_ensemble_builder/test_ensemble.py b/test/test_ensemble_builder/test_ensemble.py deleted file mode 100644 index 469f617fb0..0000000000 --- a/test/test_ensemble_builder/test_ensemble.py +++ /dev/null @@ -1,780 +0,0 @@ -import os -import pickle -import shutil -import sys -import time - -import dask.distributed -import numpy as np -import pandas as pd -from smac.runhistory.runhistory import RunHistory, RunKey, RunValue - -from autosklearn.constants import BINARY_CLASSIFICATION, MULTILABEL_CLASSIFICATION -from autosklearn.ensemble_builder import ( - Y_ENSEMBLE, - Y_TEST, - Y_VALID, - EnsembleBuilder, - EnsembleBuilderManager, -) -from autosklearn.ensembles.singlebest_ensemble import SingleBest -from autosklearn.metrics import accuracy, log_loss, roc_auc - -import pytest -import unittest.mock - -this_directory = os.path.dirname(__file__) -sys.path.append(this_directory) -from ensemble_utils import ( # noqa (E402: module level import not at top of file) - BackendMock, - EnsembleBuilderMemMock, - MockMetric, - compare_read_preds, -) - - -@pytest.fixture(scope="function") -def ensemble_backend(request): - test_id = "%s_%s" % (request.module.__name__, request.node.name) - test_dir = os.path.join(this_directory, test_id) - - try: - shutil.rmtree(test_dir) - except: # noqa E722 - pass - - # Make sure the folders we wanna create do not already exist. - backend = BackendMock(test_dir) - - def get_finalizer(ensemble_backend): - def session_run_at_end(): - try: - shutil.rmtree(test_dir) - except: # noqa E722 - pass - - return session_run_at_end - - request.addfinalizer(get_finalizer(backend)) - - return backend - - -@pytest.fixture(scope="function") -def ensemble_run_history(request): - - run_history = RunHistory() - run_history._add( - RunKey( - config_id=3, instance_id='{"task_id": "breast_cancer"}', seed=1, budget=3.0 - ), - RunValue( - cost=0.11347517730496459, - time=0.21858787536621094, - status=None, - starttime=time.time(), - endtime=time.time(), - additional_info={ - "duration": 0.20323538780212402, - "num_run": 3, - "configuration_origin": "Random Search", - }, - ), - status=None, - origin=None, - ) - run_history._add( - RunKey( - config_id=6, instance_id='{"task_id": "breast_cancer"}', seed=1, budget=6.0 - ), - RunValue( - cost=2 * 0.11347517730496459, - time=2 * 0.21858787536621094, - status=None, - starttime=time.time(), - endtime=time.time(), - additional_info={ - "duration": 0.20323538780212402, - "num_run": 6, - "configuration_origin": "Random Search", - }, - ), - status=None, - origin=None, - ) - return run_history - - -def testRead(ensemble_backend): - - ensbuilder = EnsembleBuilder( - backend=ensemble_backend, - dataset_name="TEST", - task_type=BINARY_CLASSIFICATION, - metric=roc_auc, - seed=0, # important to find the test files - ) - - success = ensbuilder.compute_loss_per_model() - assert success, str(ensbuilder.read_preds) - assert len(ensbuilder.read_preds) == 3, ensbuilder.read_preds.keys() - assert len(ensbuilder.read_losses) == 3, ensbuilder.read_losses.keys() - - filename = os.path.join( - ensemble_backend.temporary_directory, - ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy", - ) - assert ensbuilder.read_losses[filename]["ens_loss"] == 0.5 - - filename = os.path.join( - ensemble_backend.temporary_directory, - ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy", - ) - assert ensbuilder.read_losses[filename]["ens_loss"] == 0.0 - - -@pytest.mark.parametrize( - "ensemble_nbest,max_models_on_disc,exp", - ( - (1, None, 1), - (1.0, None, 2), - (0.1, None, 1), - (0.9, None, 1), - (1, 2, 1), - (2, 1, 1), - ), -) -def testNBest(ensemble_backend, ensemble_nbest, max_models_on_disc, exp): - ensbuilder = EnsembleBuilder( - backend=ensemble_backend, - dataset_name="TEST", - task_type=BINARY_CLASSIFICATION, - metric=roc_auc, - seed=0, # important to find the test files - ensemble_nbest=ensemble_nbest, - max_models_on_disc=max_models_on_disc, - ) - - ensbuilder.compute_loss_per_model() - sel_keys = ensbuilder.get_n_best_preds() - - assert len(sel_keys) == exp - - fixture = os.path.join( - ensemble_backend.temporary_directory, - ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy", - ) - assert sel_keys[0] == fixture - - -@pytest.mark.parametrize( - "test_case,exp", - [ - # If None, no reduction - (None, 2), - # If Int, limit only on exceed - (4, 2), - (1, 1), - # If Float, translate float to # models. - # below, mock of each file is 100 Mb and 4 files .model and .npy (test/val/pred) - # per run (except for run3, there they are 5). Now, it takes 500MB for run 3 and - # another 500 MB of slack because we keep as much space as the largest model - # available as slack - (1499.0, 1), - (1500.0, 2), - (9999.0, 2), - ], -) -def testMaxModelsOnDisc(ensemble_backend, test_case, exp): - ensemble_nbest = 4 - ensbuilder = EnsembleBuilder( - backend=ensemble_backend, - dataset_name="TEST", - task_type=BINARY_CLASSIFICATION, - metric=roc_auc, - seed=0, # important to find the test files - ensemble_nbest=ensemble_nbest, - max_models_on_disc=test_case, - ) - - with unittest.mock.patch("os.path.getsize") as mock: - mock.return_value = 100 * 1024 * 1024 - ensbuilder.compute_loss_per_model() - sel_keys = ensbuilder.get_n_best_preds() - assert len(sel_keys) == exp, test_case - - -def testMaxModelsOnDisc2(ensemble_backend): - # Test for Extreme scenarios - # Make sure that the best predictions are kept - ensbuilder = EnsembleBuilder( - backend=ensemble_backend, - dataset_name="TEST", - task_type=BINARY_CLASSIFICATION, - metric=roc_auc, - seed=0, # important to find the test files - ensemble_nbest=50, - max_models_on_disc=10000.0, - ) - ensbuilder.read_preds = {} - for i in range(50): - ensbuilder.read_losses["pred" + str(i)] = { - "ens_loss": -i * 10, - "num_run": i, - "loaded": 1, - "seed": 1, - "disc_space_cost_mb": 50 * i, - } - ensbuilder.read_preds["pred" + str(i)] = {Y_ENSEMBLE: True} - sel_keys = ensbuilder.get_n_best_preds() - assert ["pred49", "pred48", "pred47"] == sel_keys - - # Make sure at least one model is kept alive - ensbuilder.max_models_on_disc = 0.0 - sel_keys = ensbuilder.get_n_best_preds() - assert ["pred49"] == sel_keys - - -@pytest.mark.parametrize( - "performance_range_threshold,exp", - ((0.0, 4), (0.1, 4), (0.3, 3), (0.5, 2), (0.6, 2), (0.8, 1), (1.0, 1), (1, 1)), -) -def testPerformanceRangeThreshold(ensemble_backend, performance_range_threshold, exp): - ensbuilder = EnsembleBuilder( - backend=ensemble_backend, - dataset_name="TEST", - task_type=BINARY_CLASSIFICATION, - metric=roc_auc, - seed=0, # important to find the test files - ensemble_nbest=100, - performance_range_threshold=performance_range_threshold, - ) - ensbuilder.read_losses = { - "A": {"ens_loss": -1, "num_run": 1, "loaded": -1, "seed": 1}, - "B": {"ens_loss": -2, "num_run": 2, "loaded": -1, "seed": 1}, - "C": {"ens_loss": -3, "num_run": 3, "loaded": -1, "seed": 1}, - "D": {"ens_loss": -4, "num_run": 4, "loaded": -1, "seed": 1}, - "E": {"ens_loss": -5, "num_run": 5, "loaded": -1, "seed": 1}, - } - ensbuilder.read_preds = { - key: {key_2: True for key_2 in (Y_ENSEMBLE, Y_VALID, Y_TEST)} - for key in ensbuilder.read_losses - } - sel_keys = ensbuilder.get_n_best_preds() - - assert len(sel_keys) == exp - - -@pytest.mark.parametrize( - "performance_range_threshold,ensemble_nbest,exp", - ( - (0.0, 1, 1), - (0.0, 1.0, 4), - (0.1, 2, 2), - (0.3, 4, 3), - (0.5, 1, 1), - (0.6, 10, 2), - (0.8, 0.5, 1), - (1, 1.0, 1), - ), -) -def testPerformanceRangeThresholdMaxBest( - ensemble_backend, performance_range_threshold, ensemble_nbest, exp -): - ensbuilder = EnsembleBuilder( - backend=ensemble_backend, - dataset_name="TEST", - task_type=BINARY_CLASSIFICATION, - metric=roc_auc, - seed=0, # important to find the test files - ensemble_nbest=ensemble_nbest, - performance_range_threshold=performance_range_threshold, - max_models_on_disc=None, - ) - ensbuilder.read_losses = { - "A": {"ens_loss": -1, "num_run": 1, "loaded": -1, "seed": 1}, - "B": {"ens_loss": -2, "num_run": 2, "loaded": -1, "seed": 1}, - "C": {"ens_loss": -3, "num_run": 3, "loaded": -1, "seed": 1}, - "D": {"ens_loss": -4, "num_run": 4, "loaded": -1, "seed": 1}, - "E": {"ens_loss": -5, "num_run": 5, "loaded": -1, "seed": 1}, - } - ensbuilder.read_preds = { - key: {key_2: True for key_2 in (Y_ENSEMBLE, Y_VALID, Y_TEST)} - for key in ensbuilder.read_losses - } - sel_keys = ensbuilder.get_n_best_preds() - - assert len(sel_keys) == exp - - -def testFallBackNBest(ensemble_backend): - - ensbuilder = EnsembleBuilder( - backend=ensemble_backend, - dataset_name="TEST", - task_type=BINARY_CLASSIFICATION, - metric=roc_auc, - seed=0, # important to find the test files - ensemble_nbest=1, - ) - - ensbuilder.compute_loss_per_model() - print() - print(ensbuilder.read_preds.keys()) - print(ensbuilder.read_losses.keys()) - print(ensemble_backend.temporary_directory) - - filename = os.path.join( - ensemble_backend.temporary_directory, - ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy", - ) - ensbuilder.read_losses[filename]["ens_loss"] = -1 - - filename = os.path.join( - ensemble_backend.temporary_directory, - ".auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy", - ) - ensbuilder.read_losses[filename]["ens_loss"] = -1 - - filename = os.path.join( - ensemble_backend.temporary_directory, - ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy", - ) - ensbuilder.read_losses[filename]["ens_loss"] = -1 - - sel_keys = ensbuilder.get_n_best_preds() - - fixture = os.path.join( - ensemble_backend.temporary_directory, - ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy", - ) - assert len(sel_keys) == 1 - assert sel_keys[0] == fixture - - -def testGetValidTestPreds(ensemble_backend): - - ensbuilder = EnsembleBuilder( - backend=ensemble_backend, - dataset_name="TEST", - task_type=BINARY_CLASSIFICATION, - metric=roc_auc, - seed=0, # important to find the test files - ensemble_nbest=1, - ) - - ensbuilder.compute_loss_per_model() - - # d1 is a dummt prediction. d2 and d3 have the same prediction with - # different name. num_run=2 is selected when doing sorted() - d1 = os.path.join( - ensemble_backend.temporary_directory, - ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy", - ) - d2 = os.path.join( - ensemble_backend.temporary_directory, - ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy", - ) - d3 = os.path.join( - ensemble_backend.temporary_directory, - ".auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy", - ) - - sel_keys = ensbuilder.get_n_best_preds() - assert len(sel_keys) == 1 - ensbuilder.get_valid_test_preds(selected_keys=sel_keys) - - # Number of read files should be three and - # predictions_ensemble_0_4_0.0.npy must not be in there - assert len(ensbuilder.read_preds) == 3 - assert ( - os.path.join( - ensemble_backend.temporary_directory, - ".auto-sklearn/runs/0_4_0.0/predictions_ensemble_0_4_0.0.npy", - ) - not in ensbuilder.read_preds - ) - - # not selected --> should still be None - assert ensbuilder.read_preds[d1][Y_VALID] is None - assert ensbuilder.read_preds[d1][Y_TEST] is None - assert ensbuilder.read_preds[d3][Y_VALID] is None - assert ensbuilder.read_preds[d3][Y_TEST] is None - - # selected --> read valid and test predictions - assert ensbuilder.read_preds[d2][Y_VALID] is not None - assert ensbuilder.read_preds[d2][Y_TEST] is not None - - -def testEntireEnsembleBuilder(ensemble_backend): - - ensbuilder = EnsembleBuilder( - backend=ensemble_backend, - dataset_name="TEST", - task_type=BINARY_CLASSIFICATION, - metric=roc_auc, - seed=0, # important to find the test files - ensemble_nbest=2, - ) - ensbuilder.SAVE2DISC = False - - ensbuilder.compute_loss_per_model() - - d2 = os.path.join( - ensemble_backend.temporary_directory, - ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy", - ) - - sel_keys = ensbuilder.get_n_best_preds() - assert len(sel_keys) > 0 - - ensemble = ensbuilder.fit_ensemble(selected_keys=sel_keys) - print(ensemble, sel_keys) - - n_sel_valid, n_sel_test = ensbuilder.get_valid_test_preds(selected_keys=sel_keys) - - # both valid and test prediction files are available - assert len(n_sel_valid) > 0 - assert n_sel_valid == n_sel_test - - y_valid = ensbuilder.predict( - set_="valid", - ensemble=ensemble, - selected_keys=n_sel_valid, - n_preds=len(sel_keys), - index_run=1, - ) - y_test = ensbuilder.predict( - set_="test", - ensemble=ensemble, - selected_keys=n_sel_test, - n_preds=len(sel_keys), - index_run=1, - ) - - # predictions for valid and test are the same - # --> should results in the same predictions - np.testing.assert_array_almost_equal(y_valid, y_test) - - # since d2 provides perfect predictions - # it should get a higher weight - # so that y_valid should be exactly y_valid_d2 - y_valid_d2 = ensbuilder.read_preds[d2][Y_VALID][:, 1] - np.testing.assert_array_almost_equal(y_valid, y_valid_d2) - - -def test_main(ensemble_backend): - - ensbuilder = EnsembleBuilder( - backend=ensemble_backend, - dataset_name="TEST", - task_type=MULTILABEL_CLASSIFICATION, # Multilabel Classification - metric=roc_auc, - seed=0, # important to find the test files - ensemble_nbest=2, - max_models_on_disc=None, - ) - ensbuilder.SAVE2DISC = False - - run_history, ensemble_nbest, _, _, _ = ensbuilder.main( - time_left=np.inf, - iteration=1, - return_predictions=False, - ) - - assert len(ensbuilder.read_preds) == 3 - assert ensbuilder.last_hash is not None - assert ensbuilder.y_true_ensemble is not None - - # Make sure the run history is ok - - # We expect at least 1 element to be in the ensemble - assert len(run_history) > 0 - - # As the data loader loads the same val/train/test - # we expect 1.0 as score and all keys available - expected_performance = { - "ensemble_val_score": 1.0, - "ensemble_test_score": 1.0, - "ensemble_optimization_score": 1.0, - } - - # Make sure that expected performance is a subset of the run history - assert all(item in run_history[0].items() for item in expected_performance.items()) - assert "Timestamp" in run_history[0] - assert isinstance(run_history[0]["Timestamp"], pd.Timestamp) - - assert os.path.exists( - os.path.join(ensemble_backend.internals_directory, "ensemble_read_preds.pkl") - ), os.listdir(ensemble_backend.internals_directory) - assert os.path.exists( - os.path.join(ensemble_backend.internals_directory, "ensemble_read_losses.pkl") - ), os.listdir(ensemble_backend.internals_directory) - - -def test_run_end_at(ensemble_backend): - with unittest.mock.patch("pynisher.enforce_limits") as pynisher_mock: - ensbuilder = EnsembleBuilder( - backend=ensemble_backend, - dataset_name="TEST", - task_type=MULTILABEL_CLASSIFICATION, # Multilabel Classification - metric=roc_auc, - seed=0, # important to find the test files - ensemble_nbest=2, - max_models_on_disc=None, - ) - ensbuilder.SAVE2DISC = False - - current_time = time.time() - - ensbuilder.run( - end_at=current_time + 10, iteration=1, pynisher_context="forkserver" - ) - # 4 seconds left because: 10 seconds - 5 seconds overhead - little overhead - # but then rounded to an integer - assert pynisher_mock.call_args_list[0][1]["wall_time_in_s"], 4 - - -def testLimit(ensemble_backend): - ensbuilder = EnsembleBuilderMemMock( - backend=ensemble_backend, - dataset_name="TEST", - task_type=BINARY_CLASSIFICATION, - metric=roc_auc, - seed=0, # important to find the test files - ensemble_nbest=10, - # small to trigger MemoryException - memory_limit=100, - ) - ensbuilder.SAVE2DISC = False - - read_losses_file = os.path.join( - ensemble_backend.internals_directory, "ensemble_read_losses.pkl" - ) - read_preds_file = os.path.join( - ensemble_backend.internals_directory, "ensemble_read_preds.pkl" - ) - - def mtime_mock(filename): - mtimes = { - "predictions_ensemble_0_1_0.0.npy": 0, - "predictions_valid_0_1_0.0.npy": 0.1, - "predictions_test_0_1_0.0.npy": 0.2, - "predictions_ensemble_0_2_0.0.npy": 1, - "predictions_valid_0_2_0.0.npy": 1.1, - "predictions_test_0_2_0.0.npy": 1.2, - "predictions_ensemble_0_3_100.0.npy": 2, - "predictions_valid_0_3_100.0.npy": 2.1, - "predictions_test_0_3_100.0.npy": 2.2, - } - return mtimes[os.path.split(filename)[1]] - - with unittest.mock.patch( - "logging.getLogger" - ) as get_logger_mock, unittest.mock.patch( - "logging.config.dictConfig" - ) as _, unittest.mock.patch( - "os.path.getmtime" - ) as mtime: - logger_mock = unittest.mock.Mock() - logger_mock.handlers = [] - get_logger_mock.return_value = logger_mock - mtime.side_effect = mtime_mock - - ensbuilder.run(time_left=1000, iteration=0, pynisher_context="fork") - assert os.path.exists(read_losses_file) - assert not os.path.exists(read_preds_file) - assert logger_mock.warning.call_count == 1 - ensbuilder.run(time_left=1000, iteration=0, pynisher_context="fork") - assert os.path.exists(read_losses_file) - assert not os.path.exists(read_preds_file) - assert logger_mock.warning.call_count == 2 - ensbuilder.run(time_left=1000, iteration=0, pynisher_context="fork") - assert os.path.exists(read_losses_file) - assert not os.path.exists(read_preds_file) - assert logger_mock.warning.call_count == 3 - - # it should try to reduce ensemble_nbest until it also failed at 2 - assert ensbuilder.ensemble_nbest == 1 - - ensbuilder.run(time_left=1000, iteration=0, pynisher_context="fork") - assert os.path.exists(read_losses_file) - assert not os.path.exists(read_preds_file) - assert logger_mock.warning.call_count == 4 - - # it should next reduce the number of models to read at most - assert ensbuilder.read_at_most == 1 - - # And then it still runs, but basically won't do anything any more except for - # raising error messages via the logger - ensbuilder.run(time_left=1000, iteration=0, pynisher_context="fork") - assert os.path.exists(read_losses_file) - assert not os.path.exists(read_preds_file) - assert logger_mock.warning.call_count == 4 - - # In the previous assert, reduction is tried until failure - # So that means we should have more than 1 memoryerror message - assert logger_mock.error.call_count >= 1, "{}".format( - logger_mock.error.call_args_list - ) - for i in range(len(logger_mock.error.call_args_list)): - assert "Memory Exception -- Unable to further reduce" in str( - logger_mock.error.call_args_list[i] - ) - - -def test_read_pickle_read_preds(ensemble_backend): - """ - This procedure test that we save the read predictions before - destroying the ensemble builder and that we are able to read - them safely after - """ - ensbuilder = EnsembleBuilder( - backend=ensemble_backend, - dataset_name="TEST", - task_type=MULTILABEL_CLASSIFICATION, # Multilabel Classification - metric=roc_auc, - seed=0, # important to find the test files - ensemble_nbest=2, - max_models_on_disc=None, - ) - ensbuilder.SAVE2DISC = False - - ensbuilder.main(time_left=np.inf, iteration=1, return_predictions=False) - - # Check that the memory was created - ensemble_memory_file = os.path.join( - ensemble_backend.internals_directory, "ensemble_read_preds.pkl" - ) - assert os.path.exists(ensemble_memory_file) - - # Make sure we pickle the correct read preads and hash - with (open(ensemble_memory_file, "rb")) as memory: - read_preds, last_hash = pickle.load(memory) - - compare_read_preds(read_preds, ensbuilder.read_preds) - assert last_hash == ensbuilder.last_hash - - ensemble_memory_file = os.path.join( - ensemble_backend.internals_directory, "ensemble_read_losses.pkl" - ) - assert os.path.exists(ensemble_memory_file) - - # Make sure we pickle the correct read scores - with (open(ensemble_memory_file, "rb")) as memory: - read_losses = pickle.load(memory) - - compare_read_preds(read_losses, ensbuilder.read_losses) - - # Then create a new instance, which should automatically read this file - ensbuilder2 = EnsembleBuilder( - backend=ensemble_backend, - dataset_name="TEST", - task_type=MULTILABEL_CLASSIFICATION, # Multilabel Classification - metric=roc_auc, - seed=0, # important to find the test files - ensemble_nbest=2, - max_models_on_disc=None, - ) - compare_read_preds(ensbuilder2.read_preds, ensbuilder.read_preds) - compare_read_preds(ensbuilder2.read_losses, ensbuilder.read_losses) - assert ensbuilder2.last_hash == ensbuilder.last_hash - - -@pytest.mark.parametrize("metric", [log_loss, accuracy]) -@unittest.mock.patch("os.path.exists") -def test_get_identifiers_from_run_history( - exists, metric, ensemble_run_history, ensemble_backend -): - exists.return_value = True - ensemble = SingleBest( - metric=log_loss, - seed=1, - run_history=ensemble_run_history, - backend=ensemble_backend, - ) - - # Just one model - assert len(ensemble.identifiers_) == 1 - - # That model must be the best - seed, num_run, budget = ensemble.identifiers_[0] - assert num_run == 3 - assert seed == 1 - assert budget == 3.0 - - -def test_ensemble_builder_process_realrun(dask_client_single_worker, ensemble_backend): - manager = EnsembleBuilderManager( - start_time=time.time(), - time_left_for_ensembles=1000, - backend=ensemble_backend, - dataset_name="Test", - task=BINARY_CLASSIFICATION, - metric=MockMetric, - ensemble_size=50, - ensemble_nbest=10, - max_models_on_disc=None, - seed=0, - precision=32, - max_iterations=1, - read_at_most=np.inf, - ensemble_memory_limit=None, - random_state=0, - ) - manager.build_ensemble(dask_client_single_worker) - future = manager.futures.pop() - dask.distributed.wait([future]) # wait for the ensemble process to finish - result = future.result() - history, _, _, _, _ = result - - assert "ensemble_optimization_score" in history[0] - assert history[0]["ensemble_optimization_score"] == 0.9 - assert "ensemble_val_score" in history[0] - assert history[0]["ensemble_val_score"] == 0.9 - assert "ensemble_test_score" in history[0] - assert history[0]["ensemble_test_score"] == 0.9 - - -def test_ensemble_builder_nbest_remembered( - ensemble_backend, - dask_client_single_worker, -): - """ - Makes sure ensemble builder returns the size of the ensemble that pynisher allowed - This way, we can remember it and not waste more time trying big ensemble sizes - """ - - manager = EnsembleBuilderManager( - start_time=time.time(), - time_left_for_ensembles=1000, - backend=ensemble_backend, - dataset_name="Test", - task=MULTILABEL_CLASSIFICATION, - metric=roc_auc, - ensemble_size=50, - ensemble_nbest=10, - max_models_on_disc=None, - seed=0, - precision=32, - read_at_most=np.inf, - ensemble_memory_limit=1000, - random_state=0, - max_iterations=None, - ) - - manager.build_ensemble(dask_client_single_worker, unit_test=True) - future = manager.futures[0] - dask.distributed.wait([future]) # wait for the ensemble process to finish - assert future.result() == ([], 5, None, None, None) - file_path = os.path.join( - ensemble_backend.internals_directory, "ensemble_read_preds.pkl" - ) - assert not os.path.exists(file_path) - - manager.build_ensemble(dask_client_single_worker, unit_test=True) - - future = manager.futures[0] - dask.distributed.wait([future]) # wait for the ensemble process to finish - assert not os.path.exists(file_path) - assert future.result() == ([], 2, None, None, None) diff --git a/test/test_ensemble_builder/test_ensemble_builder.py b/test/test_ensemble_builder/test_ensemble_builder.py new file mode 100644 index 0000000000..7c5e593864 --- /dev/null +++ b/test/test_ensemble_builder/test_ensemble_builder.py @@ -0,0 +1,638 @@ +from __future__ import annotations + +from typing import Callable + +import random +import time +from pathlib import Path + +import numpy as np + +from autosklearn.automl_common.common.utils.backend import Backend +from autosklearn.ensemble_building import EnsembleBuilder, Run +from autosklearn.util.functional import bound, pairs + +import pytest +from pytest_cases import fixture, parametrize +from unittest.mock import patch + +from test.conftest import DEFAULT_SEED + + +@fixture +def builder(make_ensemble_builder: Callable[..., EnsembleBuilder]) -> EnsembleBuilder: + """A default ensemble builder""" + return make_ensemble_builder() + + +@parametrize("kind", ["ensemble", "test"]) +def test_targets(builder: EnsembleBuilder, kind: str) -> None: + """ + Expects + ------- + * Should be able to load each of the targets + """ + assert builder.targets(kind) is not None + + +def test_available_runs(builder: EnsembleBuilder) -> None: + """ + Expects + ------- + * Should be able to read runs from the backends rundir where runs are tagged + {seed}_{numrun}_{budget} + """ + runsdir = Path(builder.backend.get_runs_directory()) + + ids = {(0, i, 0.0) for i in range(1, 10)} + paths = [runsdir / f"{s}_{n}_{b}" for s, n, b in ids] + + for path in paths: + path.mkdir() + + available_runs = builder.available_runs() + + assert len(available_runs) == len(ids) + for run_id in available_runs.keys(): + assert run_id in ids + + +def test_available_runs_with_bad_dir_contained(builder: EnsembleBuilder) -> None: + """ + Expects + ------- + * Should ignore dirs that aren't in format + """ + runsdir = Path(builder.backend.get_runs_directory()) + + ids = {(0, i, 0.0) for i in range(1, 10)} + paths = [runsdir / f"{s}_{n}_{b}" for s, n, b in ids] + + bad_path = runsdir / "Im_a_bad_path" + + for path in paths + [bad_path]: + path.mkdir() + + available_runs = builder.available_runs() + assert len(available_runs) == len(paths) + + +def test_requires_loss_update_with_modified_runs( + builder: EnsembleBuilder, + make_run: Callable[..., Run], +) -> None: + """ + Expects + ------- + * Should include runs that were modified, even if they have a loss + """ + run_okay = [make_run(loss=1) for _ in range(5)] + run_modified = [make_run(loss=1, modified=True) for _ in range(5)] + + runs = run_okay + run_modified + + requires_update = builder.requires_loss_update(runs) + + assert set(run_modified) == set(requires_update) + + +def test_requires_loss_update_with_no_loss( + builder: EnsembleBuilder, + make_run: Callable[..., Run], +) -> None: + """ + Expects + ------- + * Should include runs that have no loss + """ + run_okay = [make_run(loss=10) for _ in range(5)] + run_no_loss = [make_run() for _ in range(5)] + + runs = run_okay + run_no_loss + + requires_update = builder.requires_loss_update(runs) + + assert set(run_no_loss) == set(requires_update) + + +def test_candidates_no_filters( + builder: EnsembleBuilder, make_run: Callable[..., Run] +) -> None: + """ + Expects + ------- + * Should have nothing in common between candidates and discarded + * Should not filter out any viable runs if no filters set. Here a viable run + has a loss and ensemble predictions + """ + dummy = make_run(dummy=True) + runs = [make_run(loss=n) for n in range(10)] + + candidates, discarded = builder.candidate_selection( + runs, + dummy, + better_than_dummy=False, + nbest=None, + performance_range_threshold=None, + ) + + assert len(set(candidates) & discarded) == 0 + assert len(candidates) == len(runs) + assert len(discarded) == 0 + + +def test_candidates_filters_runs_with_no_predictions( + builder: EnsembleBuilder, make_run: Callable[..., Run] +) -> None: + """ + Expects + ------- + * Should have nothing in common between candidates and discarded + * Should filter out runs with no "ensemble" predictions + """ + bad_runs = [make_run(predictions=None) for _ in range(5)] + dummy = make_run(dummy=True, loss=2) + good_run = make_run(predictions="ensemble", loss=1) + + runs = bad_runs + [good_run] + + candidates, discarded = builder.candidate_selection(runs, dummy) + + assert len(set(candidates) & discarded) == 0 + assert len(candidates) == 1 + assert len(discarded) == len(bad_runs) + assert candidates[0].pred_path("ensemble").exists() + + +def test_candidates_filters_runs_with_no_loss( + builder: EnsembleBuilder, make_run: Callable[..., Run] +) -> None: + """ + Expects + ------- + * Should filter out runs with no loss + """ + bad_runs = [make_run(loss=None) for _ in range(5)] + dummy_run = make_run(dummy=True, loss=2) + good_run = make_run(loss=1) + + runs = bad_runs + [good_run] + + candidates, discarded = builder.candidate_selection(runs, dummy_run) + + assert len(candidates) == 1 + assert len(discarded) == len(bad_runs) + assert candidates[0].loss == 1 + + +def test_candidates_filters_out_better_than_dummy( + builder: EnsembleBuilder, + make_run: Callable[..., Run], +) -> None: + """ + Expects + ------- + * Should filter out runs worse than dummy + """ + bad_runs = [make_run(loss=1) for _ in range(2)] + dummy_run = make_run(dummy=True, loss=0) + good_runs = [make_run(loss=-1) for _ in range(3)] + + runs = bad_runs + good_runs + + candidates, discarded = builder.candidate_selection( + runs, dummy_run, better_than_dummy=True + ) + + assert set(candidates) + + assert len(candidates) == 3 + assert all(run.loss < dummy_run.loss for run in candidates) + + assert len(discarded) == 2 + assert all(run.loss >= dummy_run.loss for run in discarded) + + +def test_candidates_uses_dummy_if_no_candidates_better( + builder: EnsembleBuilder, + make_run: Callable[..., Run], +) -> None: + """ + Expects + ------- + * If no run is better than a dummy run, the candidates will then consist + of the dummy runs. + """ + runs = [make_run(loss=10) for _ in range(10)] + dummies = [make_run(dummy=True, loss=0) for _ in range(2)] + + candidates, discarded = builder.candidate_selection( + runs, + dummies, + better_than_dummy=True, + ) + + assert len(candidates) == 2 + assert all(run.is_dummy() for run in candidates) + + +@parametrize("nbest", [0, 1, 5, 1000]) +def test_candidates_nbest_int( + builder: EnsembleBuilder, + make_run: Callable[..., Run], + nbest: int, +) -> None: + """ + Expects + ------- + * Should only select the nbest candidates + * They should be ordered by loss + """ + n = 10 + expected = int(bound(nbest, bounds=(1, n))) + + dummy = make_run(dummy=True) + runs = [make_run(loss=i) for i in range(n)] + candidates, discarded = builder.candidate_selection(runs, dummy, nbest=nbest) + + assert len(candidates) == expected + + if len(candidates) > 1: + assert all(a.loss <= b.loss for a, b in pairs(candidates)) + + if any(discarded): + worst_candidate = candidates[-1] + assert all(worst_candidate.loss <= d.loss for d in discarded) + + +@parametrize("nbest", [0.0, 0.25, 0.5, 1.0]) +def test_candidates_nbest_float( + builder: EnsembleBuilder, + make_run: Callable[..., Run], + nbest: float, +) -> None: + """ + Expects + ------- + * Should select nbest percentage of candidates + * They should be ordered by loss + """ + n = 10 + expected = int(bound(nbest * n, bounds=(1, n))) + + dummy = make_run(dummy=True, loss=0) + runs = [make_run(id=i, loss=i) for i in range(2, n + 2)] + candidates, discarded = builder.candidate_selection(runs, dummy, nbest=nbest) + + assert len(candidates) == expected + + if len(candidates) > 1: + assert all(a.loss <= b.loss for a, b in pairs(candidates)) + + if any(discarded): + worst_candidate = candidates[-1] + assert all(worst_candidate.loss <= d.loss for d in discarded) + + +@parametrize("threshold", [0.0, 0.25, 0.5, 1.0]) +def test_candidates_performance_range_threshold( + builder: EnsembleBuilder, + make_run: Callable[..., Run], + threshold: float, +) -> None: + """ + Expects + ------- + * Should select runs that are `threshold` between the dummy loss and the best loss + This value is captured in `boundary`. + """ + worst_loss = 100 + best_loss = 0 + dummy_loss = 50 + + boundary = threshold * best_loss + (1 - threshold) * dummy_loss + + dummy = make_run(dummy=True, loss=dummy_loss) + runs = [make_run(loss=loss) for loss in np.linspace(best_loss, worst_loss, 101)] + + candidates, discarded = builder.candidate_selection( + runs, + dummy, + performance_range_threshold=threshold, + ) + + assert len(candidates) > 0 + + # When no run is better than threshold, we just get 1 candidate, + # Make sure it's the best + if len(candidates) == 1: + assert all(r.loss >= candidates[0].loss for r in discarded) + + else: + for run in candidates: + assert run.loss < boundary + + for run in discarded: + assert run.loss >= boundary + + +def test_requires_deletion_does_nothing_without_params( + builder: EnsembleBuilder, + make_run: Callable[..., Run], +) -> None: + """ + Expects + ------- + * All runs should be kept + """ + runs = [make_run() for _ in range(10)] + + keep, delete = builder.requires_deletion( + runs, + max_models=None, + memory_limit=None, + ) + + assert set(runs) == set(keep) + assert len(delete) == 0 + + +@parametrize("max_models", [0, 1, 2, 5]) +def test_requires_deletion_max_models( + builder: EnsembleBuilder, + max_models: int, + make_run: Callable[..., Run], +) -> None: + """ + Expects + ------- + * Should keep exactly as many models as `max_models` + * Should not have any in common between keep and delete + """ + runs = [make_run() for _ in range(10)] + keep, delete = builder.requires_deletion(runs=runs, max_models=max_models) + + assert len(keep) == max_models + assert len(delete) == len(runs) - max_models + + assert not any(set(keep) & set(delete)) + + +@parametrize("memory_limit, expected", [(0, 0), (100, 0), (200, 1), (5000, 49)]) +def test_requires_memory_limit( + builder: EnsembleBuilder, + make_run: Callable[..., Run], + memory_limit: int, + expected: int, +) -> None: + """ + Expects + ------- + * Should keep the expected amount of models + * The kept models should be sorted by lowest loss + * Should not have any models in common between keep and delete + * All models kept should be better than those deleted + """ + runs = [make_run(mem_usage=100, loss=-n) for n in range(50)] + random.shuffle(runs) + + keep, delete = builder.requires_deletion(runs=runs, memory_limit=memory_limit) + + # The cutoff for memory is (memory_limit - largest) + # E.g. + # 5 models at 100 ea = 500mb usage + # largest = 100mb + # memory_limit = 400mb + # cutoff = memory_limit - largest (400mb - 100mb) = 300mb + # We can store 300mb which means the 3 best models + assert len(keep) == expected + assert len(delete) == len(runs) - expected + + assert not any(set(keep) & set(delete)) + + if len(keep) > 2: + assert all(a.loss <= b.loss for a, b in pairs(keep)) + + best_deleted = min(r.loss for r in delete) + assert not any(run.loss > best_deleted for run in keep) + + +@parametrize("kind", ["ensemble", "test"]) +def test_loss_with_no_ensemble_targets( + builder: EnsembleBuilder, + make_run: Callable[..., Run], + kind: str, +) -> None: + """ + Expects + ------- + * Should give a loss of np.inf if run has no predictions of a given kind + """ + run = make_run(predictions=None) + + assert builder.loss(run, kind=kind) == np.inf + + +@parametrize("kind", ["ensemble", "test"]) +def test_loss_with_targets( + builder: EnsembleBuilder, + make_run: Callable[..., Run], + kind: str, +) -> None: + """ + Expects + ------- + * Should give a loss < np.inf if the predictions exist + """ + targets = builder.targets(kind) + + run = make_run(predictions={kind: targets}) + + assert builder.loss(run, kind) < np.inf + + +def test_delete_runs(builder: EnsembleBuilder, make_run: Callable[..., Run]) -> None: + """ + Expects + ------- + * Should delete runs so they can not be found again by the ensemble builder + """ + runs = [make_run(backend=builder.backend) for _ in range(5)] + assert all(run.dir.exists() for run in runs) + + builder.delete_runs(runs) + assert not any(run.dir.exists() for run in runs) + + loaded = builder.available_runs() + assert len(loaded) == 0 + + +def test_delete_runs_does_not_delete_dummy( + builder: EnsembleBuilder, + make_run: Callable[..., Run], +) -> None: + """ + Expects + ------- + * Should + """ + backend = builder.backend + normal_runs = [make_run(backend=backend) for _ in range(5)] + dummy_runs = [make_run(dummy=True, seed=i, backend=backend) for i in range(2)] + + runs = normal_runs + dummy_runs + assert all(run.dir.exists() for run in runs) + + builder.delete_runs(runs) + assert not any(run.dir.exists() for run in normal_runs) + assert all(dummy.dir.exists() for dummy in dummy_runs) + + loaded = builder.available_runs() + assert set(loaded.values()) == set(dummy_runs) + + +def test_fit_ensemble_produces_ensemble( + builder: EnsembleBuilder, + make_run: Callable[..., Run], +) -> None: + """ + Expects + ------- + * Should produce an ensemble if all runs have predictions + """ + targets = builder.targets("ensemble") + assert targets is not None + + predictions = targets + runs = [make_run(predictions={"ensemble": predictions}) for _ in range(10)] + + ensemble = builder.fit_ensemble(runs, targets) + + assert ensemble is not None + + +def test_fit_with_error_gives_no_ensemble( + builder: EnsembleBuilder, + make_run: Callable[..., Run], +) -> None: + """ + Expects + ------- + * A run without predictions will raise an error will cause `fit_ensemble` to fail + as it requires all runs to have valid predictions + """ + targets = builder.targets("ensemble") + assert targets is not None + + predictions = targets + + runs = [make_run(predictions={"ensemble": predictions}) for _ in range(10)] + bad_run = make_run(predictions=None) + + runs.append(bad_run) + + with pytest.raises(FileNotFoundError): + builder.fit_ensemble(runs, targets) + + +@parametrize("time_buffer", [1, 5]) +@parametrize("duration", [10, 20]) +def test_run_end_at(builder: EnsembleBuilder, time_buffer: int, duration: int) -> None: + """ + Expects + ------- + * The limits enforced by pynisher should account for the time_buffer and duration + to run for + a little bit of overhead that gets rounded to a second. + """ + with patch("pynisher.enforce_limits") as pynisher_mock: + builder.run( + end_at=time.time() + duration, + iteration=1, + time_buffer=time_buffer, + pynisher_context="forkserver", + ) + # The 1 comes from the small overhead in conjuction with rounding down + expected = duration - time_buffer - 1 + + # The 1 comes from the small overhead in conjuction with rounding down + expected = duration - time_buffer - 1 + assert pynisher_mock.call_args_list[0][1]["wall_time_in_s"] == expected + + +def test_deletion_will_not_break_current_ensemble( + make_backend: Callable[..., Backend], + make_ensemble_builder: Callable[..., EnsembleBuilder], + make_run: Callable[..., Run], +) -> None: + """ + Expects + ------- + * When running the builder, it's previous ensemble should not have it's runs deleted + until a new ensemble is built. + """ + # Make a builder with this backend and limit it to only allow 10 models on disc + builder = make_ensemble_builder( + max_models_on_disc=10, + seed=DEFAULT_SEED, + ) + + # Stick a dummy run and 10 bad runs into the backend + datamanager = builder.backend.load_datamanager() + targets = datamanager.data["Y_train"] + + bad_predictions = {"ensemble": np.zeros_like(targets)} + good_predictions = {"ensemble": targets} + + make_run(dummy=True, loss=10000, backend=builder.backend) + bad_runs = [ + make_run(backend=builder.backend, predictions=bad_predictions) + for _ in range(10) + ] + + ens_dir = Path(builder.backend.get_ensemble_dir()) + + # Make sure there's no ensemble and run with the candidates available + assert not ens_dir.exists() + builder.main(time_left=100) + + # Make sure an ensemble was built + assert ens_dir.exists() + first_builder_contents = set(ens_dir.iterdir()) + + # Create 10 new and better runs and put them in the backend + new_runs = [ + make_run(backend=builder.backend, predictions=good_predictions) + for _ in range(10) + ] + + # Now we make `save_ensemble` crash so that even though we run the builder, it does + # not manage to save the new ensemble + with patch.object(builder.backend, "save_ensemble", side_effect=ValueError): + try: + builder.main(time_left=100) + except Exception: + pass + + # Ensure that no new ensemble was created + second_builder_contents = set(ens_dir.iterdir()) + assert first_builder_contents == second_builder_contents + + # Now we make sure that the ensemble there still has access to all the bad models + # that it contained from the first run, even though the second crashed. + available_runs = builder.available_runs().values() + for run in bad_runs + new_runs: + assert run in available_runs + + # As a sanity check, run the builder one more time without crashing and make + # sure the bad runs are removed with the good ones kept. + # We remove its previous candidates so that it won't remember previous candidates + # and will fit a new ensemble + builder.previous_candidates_path.unlink() + builder.main(time_left=100) + available_runs = builder.available_runs().values() + + for run in bad_runs: + assert run not in available_runs + + for run in new_runs: + assert run in available_runs diff --git a/test/test_ensemble_builder/test_ensemble_builder_real.py b/test/test_ensemble_builder/test_ensemble_builder_real.py new file mode 100644 index 0000000000..fdad0a45c9 --- /dev/null +++ b/test/test_ensemble_builder/test_ensemble_builder_real.py @@ -0,0 +1,98 @@ +""" +This file tests the ensemble builder with real runs generated from running AutoML +""" +from __future__ import annotations + +from typing import Callable + +from autosklearn.automl import AutoML +from autosklearn.ensemble_building.builder import EnsembleBuilder + +from pytest_cases import parametrize_with_cases +from pytest_cases.filters import has_tag +from unittest.mock import MagicMock, patch + +import test.test_automl.cases as cases + + +@parametrize_with_cases( + "automl", + cases=cases, + filter=has_tag("fitted") & ~has_tag("no_ensemble"), +) +def case_real_runs( + automl: AutoML, + make_ensemble_builder: Callable[..., EnsembleBuilder], +) -> EnsembleBuilder: + """Uses real runs from a fitted automl instance which have an ensemble + + This will copy the ensemble builder based on the AutoML instance parameterss. This + includes ensemble_nbest, ensemble_size, etc... + """ + builder = make_ensemble_builder(automl=automl) + return builder + + +@parametrize_with_cases("builder", cases=case_real_runs) +def test_run_builds_valid_ensemble(builder: EnsembleBuilder) -> None: + """ + Expects + ------- + * Using the same builder as used in the real run should result in the same + candidate models for the ensemble. + * Check that there is no overlap between candidate models and those deleted + * The generated ensemble should not be empty + * If any deleted, should be no overlap with those deleted and those in ensemble + * If any deleted, they should all be worse than those in the ensemble + """ + # We need to clear previous candidates so the ensemble builder is presented with + # only "new" runs and has no information of previous candidates + if builder.previous_candidates_path.exists(): + builder.previous_candidates_path.unlink() + + # So we can capture the saved ensemble + mock_save = MagicMock() + builder.backend.save_ensemble = mock_save # type: ignore + + # So we can capture what was deleted + mock_delete = MagicMock() + builder.delete_runs = mock_delete # type: ignore + + # So we can capture the candidate runs used, we still wrap the actual fitting + with patch.object(builder, "fit_ensemble", wraps=builder.fit_ensemble) as mock_fit: + history, nbest = builder.main() + + # Check the ensemble was fitted once + mock_save.assert_called_once() + _, kwargs = mock_save.call_args + ens = kwargs["ensemble"] # `backend.save_ensemble(ens, ...)` + ensemble_ids = set(ens.get_selected_model_identifiers()) + assert len(ensemble_ids) > 0 + + # Check that the ids of runs in the ensemble were all candidates + candidates = mock_fit.call_args[0][0] # `fit_ensemble(candidates, ...)` + candidate_ids = {run.id for run in candidates} + assert ensemble_ids <= candidate_ids + + # Could be the case no run is deleted + if not mock_delete.called: + return + + args, _ = mock_delete.call_args + deleted = args[0] # `delete_runs(runs)` + + # If we deleted runs, we better make sure of a few things + if len(deleted) > 0: + deleted_ids = {run.id for run in deleted} + + # Make sure theres no overlap between candidates/ensemble and those deleted + assert not any(deleted_ids & candidate_ids) + assert not any(deleted_ids & ensemble_ids) + + # Make sure that the best deleted model is better than the worst candidate + best_deleted = min(deleted, key=lambda r: (r.loss, r.num_run)) + worst_candidate = max(candidates, key=lambda r: (r.loss, r.num_run)) + + a = (worst_candidate.loss, worst_candidate.num_run) + b = (best_deleted.loss, best_deleted.num_run) + assert a <= b diff --git a/test/test_ensemble_builder/test_manager.py b/test/test_ensemble_builder/test_manager.py new file mode 100644 index 0000000000..6e4039ca86 --- /dev/null +++ b/test/test_ensemble_builder/test_manager.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from typing import Callable + +from autosklearn.automl import AutoML +from autosklearn.ensemble_building import EnsembleBuilderManager + +from pytest_cases import parametrize_with_cases + +import test.test_automl.cases as cases +from test.conftest import DEFAULT_SEED + + +@parametrize_with_cases("automl", cases=cases, has_tag="fitted") +def case_real_runs( + automl: AutoML, + make_ensemble_builder_manager: Callable[..., EnsembleBuilderManager], +) -> EnsembleBuilderManager: + """Uses real runs from a fitted automl instance""" + manager = make_ensemble_builder_manager( + backend=automl._backend, + metric=automl._metrics[0], + task=automl._task, + dataset_name=automl._dataset_name, + seed=automl._seed, + logger_port=automl._logger_port, + random_state=DEFAULT_SEED, + ) + return manager diff --git a/test/test_ensemble_builder/test_run.py b/test/test_ensemble_builder/test_run.py new file mode 100644 index 0000000000..5c494208ca --- /dev/null +++ b/test/test_ensemble_builder/test_run.py @@ -0,0 +1,169 @@ +from __future__ import annotations + +from typing import Callable + +import pickle +import time +from pathlib import Path + +import numpy as np + +from autosklearn.ensemble_building import Run + +from pytest_cases import parametrize + + +def test_is_dummy(make_run: Callable[..., Run]) -> None: + """ + Expects + ------- + * We expect runs with an num_run (id) of 1 to be a dummy + """ + run = make_run(id=1) + assert run.is_dummy() + + run = make_run(id=2) + assert not run.is_dummy() + + +def test_was_modified(make_run: Callable[..., Run]) -> None: + """ + Expects + ------- + * Should properly indicate when a file was modified + """ + run = make_run() + assert not run.was_modified() + + time.sleep(0.2) # Just to give some time after creation + path = run.pred_path("ensemble") + path.touch() + + assert run.was_modified() + + +def test_record_modified_times_with_was_modified(make_run: Callable[..., Run]) -> None: + """ + Expects + ------- + * Updating the recorded times should not trigger `was_modified` + * Should update the recorded times so `was_modified` will give False after being + updated + """ + run = make_run() + path = run.pred_path("ensemble") + + time.sleep(0.2) + run.record_modified_times() + assert not run.was_modified() + + time.sleep(0.2) + path.touch() + assert run.was_modified() + + time.sleep(0.2) + run.record_modified_times() + assert not run.was_modified() + + +def test_predictions_pickled(make_run: Callable[..., Run]) -> None: + """ + Expects + ------- + * Should be able to load pickled predictions + + Note + ---- + * Not sure this should be supported + """ + run = make_run(predictions=[]) + x = np.array([0]) + + path = run.pred_path("ensemble") + with path.open("wb") as f: + pickle.dump(x, f) + + assert run.predictions("ensemble") is not None + + +@parametrize( + "precision, expected", [(16, np.float16), (32, np.float32), (64, np.float64)] +) +def test_predictions_precision( + make_run: Callable[..., Run], precision: int, expected: type +) -> None: + """ + Expects + ------- + * Loading predictions with a given precision should load the expected type + """ + run = make_run() + assert run.predictions(precision=precision).dtype == expected + + +def test_caching(make_run: Callable[..., Run]) -> None: + """ + Expects + ------- + * Attempting to load the same predictions again will cause the result to be cached + * Unloading the cache will cause it to reload and reread the predictions + """ + run = make_run() + + assert len(run._cache) == 0 + first_load = run.predictions() # Should cache result + assert len(run._cache) == 1 + + cache_load = run.predictions() # Should use cache result + assert len(run._cache) == 1 + + # The should be the same object + assert id(first_load) == id(cache_load) + + pickled_run = pickle.dumps(run) + unpickled_run = pickle.loads(pickled_run) + + assert len(unpickled_run._cache) == 0 + unpickled_load = unpickled_run.predictions() # Should have reloaded it + assert len(unpickled_run._cache) == 1 + + # Should not be the same object as before once pickled + assert id(unpickled_load) != id(first_load) + + +def test_equality(make_run: Callable[..., Run]) -> None: + """ + Expects + ------- + * Two runs with the same id's should be considered equal + * Otherwise, they should be considered different + """ + r1 = make_run(id=1, budget=49.3, seed=3) + r2 = make_run(id=1, budget=49.3, seed=3) + + assert r1 == r2 + + r3 = make_run(id=1, budget=0.0, seed=3) + + assert r1 != r3 + assert r2 != r3 + + +@parametrize( + "name, expected", + [ + ("0_0_0.0", True), + ("1_152_64.24", True), + ("123412_3462_100.0", True), + ("tmp_sf12198", False), + ("tmp_0_0_0.0", False), + ], +) +def test_valid(name: str, expected: bool) -> None: + """ + Expects + ------- + * Should be able to correctly consider valid run dir names + """ + path = Path(name) + assert Run.valid(path) == expected diff --git a/test/test_estimators/test_estimators.py b/test/test_estimators/test_estimators.py index 4dd13d4c17..8e3134e3a3 100644 --- a/test/test_estimators/test_estimators.py +++ b/test/test_estimators/test_estimators.py @@ -28,7 +28,7 @@ import autosklearn.pipeline.util as putil from autosklearn.automl import AutoMLClassifier from autosklearn.data.validation import InputValidator -from autosklearn.ensemble_builder import MODEL_FN_RE +from autosklearn.ensemble_building.run import Run from autosklearn.estimators import ( AutoSklearnClassifier, AutoSklearnEstimator, @@ -60,6 +60,8 @@ def test_fit_n_jobs(tmp_dir): Y_train += 1 Y_test += 1 + n_jobs = 2 + class get_smac_object_wrapper: def __call__(self, *args, **kwargs): self.n_jobs = kwargs["n_jobs"] @@ -78,49 +80,52 @@ def __call__(self, *args, **kwargs): per_run_time_limit=5, tmp_folder=os.path.join(tmp_dir, "backend"), seed=1, - initial_configurations_via_metalearning=0, - ensemble_size=5, - n_jobs=2, - include={"classifier": ["sgd"], "feature_preprocessor": ["no_preprocessing"]}, + n_jobs=n_jobs, get_smac_object_callback=get_smac_object_wrapper_instance, max_models_on_disc=None, ) - automl.fit(X_train, Y_train) # Test that the argument is correctly passed to SMAC - assert getattr(get_smac_object_wrapper_instance, "n_jobs") == 2 - assert getattr(get_smac_object_wrapper_instance, "dask_n_jobs") == 2 - assert getattr(get_smac_object_wrapper_instance, "dask_client_n_jobs") == 2 + assert get_smac_object_wrapper_instance.n_jobs == n_jobs + assert get_smac_object_wrapper_instance.dask_n_jobs == n_jobs + assert get_smac_object_wrapper_instance.dask_client_n_jobs == n_jobs - available_num_runs = set() - for run_key, run_value in automl.automl_.runhistory_.data.items(): + runhistory_data = automl.automl_.runhistory_.data + + successful_runs = { + run_value.additional_info["num_run"] + for run_value in runhistory_data.values() if ( run_value.additional_info is not None and "num_run" in run_value.additional_info - ): - available_num_runs.add(run_value.additional_info["num_run"]) + and run_value.status == StatusType.SUCCESS + ) + } + available_predictions = set() predictions = glob.glob( os.path.join( - automl.automl_._backend.get_runs_directory(), - "*", - "predictions_ensemble*.npy", + automl.automl_._backend.get_runs_directory(), "*", "predictions_ensemble_*" ) ) seeds = set() for prediction in predictions: prediction = os.path.split(prediction)[1] - match = re.match(MODEL_FN_RE, prediction.replace("predictions_ensemble", "")) + match = re.match(Run.RE_MODEL_PREDICTION_FILE, prediction) if match: - num_run = int(match.group(2)) - available_predictions.add(num_run) - seed = int(match.group(1)) - seeds.add(seed) + print(match) + seed, num_run, _ = match.groups() + available_predictions.add(int(num_run)) + seeds.add(int(seed)) # Remove the dummy prediction, it is not part of the runhistory - available_predictions.remove(1) - assert available_num_runs.issubset(available_predictions) + if 1 in available_predictions: + available_predictions.remove(1) + + # Make sure all predictions available are associated with a successful run + # Don't want a rogue prediction file + assert available_predictions <= successful_runs assert len(seeds) == 1 diff --git a/test/test_util/test_trials_callback.py b/test/test_util/test_trials_callback.py index b1328b9489..e20f0abc40 100644 --- a/test/test_util/test_trials_callback.py +++ b/test/test_util/test_trials_callback.py @@ -62,7 +62,6 @@ def test_trials_callback_execution(self): time_left_for_this_task=30, initial_configurations_via_metalearning=0, per_run_time_limit=10, - memory_limit=1024, delete_tmp_folder_after_terminate=False, n_jobs=1, include={"feature_preprocessor": ["pca"], "classifier": ["sgd"]},