Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions autosklearn/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
AbstractMultiObjectiveEnsemble,
)
from autosklearn.ensembles.ensemble_selection import EnsembleSelection
from autosklearn.ensembles.singlebest_ensemble import SingleBest
from autosklearn.ensembles.singlebest_ensemble import SingleBestFromRunhistory
from autosklearn.evaluation import ExecuteTaFuncWithQueue, get_cost_of_crash
from autosklearn.evaluation.abstract_evaluator import _fit_and_suppress_warnings
from autosklearn.evaluation.train_evaluator import TrainEvaluator, _fit_with_budget
Expand Down Expand Up @@ -1666,7 +1666,7 @@ def _load_best_individual_model(self):
return None

# SingleBest contains the best model found by AutoML
ensemble = SingleBest(
ensemble = SingleBestFromRunhistory(
metrics=self._metrics,
task_type=self._task,
seed=self._seed,
Expand All @@ -1693,13 +1693,12 @@ def _load_pareto_set(self) -> Sequence[VotingClassifier | VotingRegressor]:
raise ValueError("Pareto set only available if ensemble can be loaded.")

if isinstance(self.ensemble_, AbstractMultiObjectiveEnsemble):
pareto_set = self.ensemble_.get_pareto_set()
pareto_set = self.ensemble_.pareto_set
else:
self._logger.warning(
"Pareto set not available for single objective ensemble "
"method. The Pareto set will only include the single ensemble "
"constructed by %s",
type(self.ensemble_),
f"constructed by {type(self.ensemble_)},"
)
pareto_set = [self.ensemble_]

Expand Down Expand Up @@ -2148,7 +2147,7 @@ def show_models(self) -> dict[int, Any]:

ensemble_dict = {}

if self._ensemble_class is not None:
if self._ensemble_class is None:
warnings.warn(
"No models in the ensemble. Kindly provide an ensemble class."
)
Expand Down
10 changes: 9 additions & 1 deletion autosklearn/ensembles/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
from .abstract_ensemble import AbstractEnsemble, AbstractMultiObjectiveEnsemble
from .ensemble_selection import EnsembleSelection
from .singlebest_ensemble import SingleBest
from .multiobjective_dummy_ensemble import MultiObjectiveDummyEnsemble
from .singlebest_ensemble import (
SingleBest,
SingleBestFromRunhistory,
SingleModelEnsemble,
)

__all__ = [
"AbstractEnsemble",
"AbstractMultiObjectiveEnsemble",
"EnsembleSelection",
"SingleBestFromRunhistory",
"SingleBest",
"SingleModelEnsemble",
"MultiObjectiveDummyEnsemble",
]
43 changes: 33 additions & 10 deletions autosklearn/ensembles/abstract_ensemble.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Dict, List, Sequence, Tuple, Union
from typing import Any, Dict, List, Sequence, Tuple, Union

import numpy as np

Expand All @@ -18,19 +18,29 @@ def __init__(
self,
task_type: int,
metrics: Sequence[Scorer] | Scorer,
random_state: int | np.random.RandomState | None,
backend: Backend,
random_state: int | np.random.RandomState | None = None,
):
pass

def __getstate__(self) -> Dict[str, Any]:
# Cannot serialize a metric if
# it is user defined.
# That is, if doing pickle dump
# the metric won't be the same as the
# one in __main__. we don't use the metric
# in the EnsembleSelection so this should
# be fine
return {key: value for key, value in self.__dict__.items() if key != "metrics"}

@abstractmethod
def fit(
self,
base_models_predictions: np.ndarray | List[np.ndarray],
X_data: SUPPORTED_FEAT_TYPES,
true_targets: np.ndarray,
model_identifiers: List[Tuple[int, int, float]],
runs: Sequence[Run],
X_data: SUPPORTED_FEAT_TYPES | None = None,
) -> "AbstractEnsemble":
"""Fit an ensemble given predictions of base models and targets.

Expand Down Expand Up @@ -79,7 +89,7 @@ def predict(

Returns
-------
array : [n_data_points]
np.ndarray
"""
pass

Expand All @@ -97,7 +107,7 @@ def get_models_with_weights(

Returns
-------
array : [(weight_1, model_1), ..., (weight_n, model_n)]
List[Tuple[float, BasePipeline]]
"""

@abstractmethod
Expand All @@ -115,7 +125,7 @@ def get_identifiers_with_weights(

Returns
-------
array : [(identifier_1, weight_1), ..., (identifier_n, weight_n)]
List[Tuple[Tuple[int, int, float], float]
"""

@abstractmethod
Expand All @@ -133,12 +143,25 @@ def get_selected_model_identifiers(self) -> List[Tuple[int, int, float]]:
def get_validation_performance(self) -> float:
"""Return validation performance of ensemble.

Return
------
Returns
-------
float
"""


class AbstractMultiObjectiveEnsemble(AbstractEnsemble):
def get_pareto_set(self) -> Sequence[AbstractEnsemble]:
pass
@property
@abstractmethod
def pareto_set(self) -> Sequence[AbstractEnsemble]:
"""Get a sequence on ensembles that are on the pareto front

Raises
------
SklearnNotFittedError
If ``fit`` has not been called and the pareto set does not exist yet

Returns
-------
Sequence[AbstractEnsemble]
"""
...
47 changes: 20 additions & 27 deletions autosklearn/ensembles/ensemble_selection.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
from typing import Dict, List, Sequence, Tuple, Union

import random
import warnings
Expand All @@ -23,11 +23,11 @@ def __init__(
self,
task_type: int,
metrics: Sequence[Scorer] | Scorer,
random_state: Optional[Union[int, np.random.RandomState]],
backend: Backend,
ensemble_size: int = 50,
bagging: bool = False,
mode: str = "fast",
random_state: int | np.random.RandomState | None = None,
) -> None:
"""An ensemble of selected algorithms

Expand All @@ -43,14 +43,6 @@ def __init__(
The metric used to evaluate the models. If multiple metrics are passed,
ensemble selection only optimizes for the first

random_state: Optional[int | RandomState] = None
The random_state used for ensemble selection.

* None - Uses numpy's default RandomState object
* int - Successive calls to fit will produce the same results
* RandomState - Truly random, each call to fit will produce
different results, even with the same object.

backend : Backend
Gives access to the backend of Auto-sklearn. Not used by Ensemble Selection.

Expand All @@ -62,6 +54,14 @@ def __init__(
* 'slow' - The original method used in Rich Caruana's ensemble selection.
* 'fast' - A faster version of Rich Caruanas' ensemble selection.

random_state: int | RandomState | None = None
The random_state used for ensemble selection.

* None - Uses numpy's default RandomState object
* int - Successive calls to fit will produce the same results
* RandomState - Truly random, each call to fit will produce
different results, even with the same object.

References
----------
| Ensemble selection from libraries of models
Expand Down Expand Up @@ -92,23 +92,13 @@ def __init__(
# https://scikit-learn.org/stable/common_pitfalls.html#controlling-randomness
self.random_state = random_state

def __getstate__(self) -> Dict[str, Any]:
# Cannot serialize a metric if
# it is user defined.
# That is, if doing pickle dump
# the metric won't be the same as the
# one in __main__. we don't use the metric
# in the EnsembleSelection so this should
# be fine
return {key: value for key, value in self.__dict__.items() if key != "metrics"}

def fit(
self,
base_models_predictions: List[np.ndarray],
X_data: SUPPORTED_FEAT_TYPES,
true_targets: np.ndarray,
model_identifiers: List[Tuple[int, int, float]],
runs: Sequence[Run],
X_data: SUPPORTED_FEAT_TYPES | None = None,
) -> EnsembleSelection:
self.ensemble_size = int(self.ensemble_size)
if self.ensemble_size < 1:
Expand Down Expand Up @@ -141,20 +131,22 @@ def fit(
def _fit(
self,
predictions: List[np.ndarray],
X_data: SUPPORTED_FEAT_TYPES,
labels: np.ndarray,
*,
X_data: SUPPORTED_FEAT_TYPES | None = None,
) -> EnsembleSelection:
if self.mode == "fast":
self._fast(predictions, X_data, labels)
self._fast(predictions=predictions, X_data=X_data, labels=labels)
else:
self._slow(predictions, X_data, labels)
self._slow(predictions=predictions, X_data=X_data, labels=labels)
return self

def _fast(
self,
predictions: List[np.ndarray],
X_data: SUPPORTED_FEAT_TYPES,
labels: np.ndarray,
*,
X_data: SUPPORTED_FEAT_TYPES | None = None,
) -> None:
"""Fast version of Rich Caruana's ensemble selection method."""
self.num_input_models_ = len(predictions)
Expand Down Expand Up @@ -231,8 +223,9 @@ def _fast(
def _slow(
self,
predictions: List[np.ndarray],
X_data: SUPPORTED_FEAT_TYPES,
labels: np.ndarray,
*,
X_data: SUPPORTED_FEAT_TYPES | None = None,
) -> None:
"""Rich Caruana's ensemble selection method."""
self.num_input_models_ = len(predictions)
Expand Down Expand Up @@ -311,7 +304,7 @@ def _bagging(
# Bagging a set of models
indices = sorted(random.sample(range(0, n_models), bag_size))
bag = predictions[indices, :, :]
order, _ = self._fit(bag, labels)
order, _ = self._fit(predictions=bag, labels=labels)
order_of_each_bag.append(order)

return np.array(
Expand Down
Loading