Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions autosklearn/data/target_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,3 +386,20 @@ def _check_data(
self.type_of_target,
supported_output_types
))

@property
def classes_(self) -> np.ndarray:
"""
Complies with scikit learn classes_ attribute,
which consist of a ndarray of shape (n_classes,)
where n_classes are the number of classes seen while fitting
a encoder to the targets.
Returns
-------
classes_: np.ndarray
The unique classes seen during encoding of a classifier
"""
if self.encoder is None:
return np.array([])
else:
return self.encoder.categories_[0]
30 changes: 18 additions & 12 deletions autosklearn/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import dask.distributed
import joblib
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
from sklearn.utils.multiclass import type_of_target

from autosklearn.automl import AutoMLClassifier, AutoMLRegressor, AutoML
Expand Down Expand Up @@ -219,11 +219,11 @@ def __init__(
:meth:`autosklearn.metrics.make_scorer`. These are the `Built-in
Metrics`_.
If None is provided, a default metric is selected depending on the task.

scoring_functions : List[Scorer], optional (None)
List of scorers which will be calculated for each pipeline and results will be
List of scorers which will be calculated for each pipeline and results will be
available via ``cv_results``

load_models : bool, optional (True)
Whether to load the models after fitting Auto-sklearn.

Expand Down Expand Up @@ -266,13 +266,14 @@ def __init__(
self.smac_scenario_args = smac_scenario_args
self.logging_config = logging_config
self.metadata_directory = metadata_directory
self._metric = metric
self._scoring_functions = scoring_functions
self._load_models = load_models
self.metric = metric
self.scoring_functions = scoring_functions
self.load_models = load_models

self.automl_ = None # type: Optional[AutoML]
# n_jobs after conversion to a number (b/c default is None)
self._n_jobs = None

super().__init__()

def __getstate__(self):
Expand Down Expand Up @@ -323,8 +324,8 @@ def build_automl(
smac_scenario_args=smac_scenario_args,
logging_config=self.logging_config,
metadata_directory=self.metadata_directory,
metric=self._metric,
scoring_functions=self._scoring_functions
metric=self.metric,
scoring_functions=self.scoring_functions
)

return automl
Expand Down Expand Up @@ -353,7 +354,7 @@ def fit(self, **kwargs):
tmp_folder=self.tmp_folder,
output_folder=self.output_folder,
)
self.automl_.fit(load_models=self._load_models, **kwargs)
self.automl_.fit(load_models=self.load_models, **kwargs)

return self

Expand Down Expand Up @@ -516,7 +517,7 @@ def get_configuration_space(self, X, y):
return self.automl_.configuration_space


class AutoSklearnClassifier(AutoSklearnEstimator):
class AutoSklearnClassifier(AutoSklearnEstimator, ClassifierMixin):
"""
This class implements the classification task.

Expand Down Expand Up @@ -597,6 +598,11 @@ def fit(self, X, y,
dataset_name=dataset_name,
)

# After fit, a classifier is expected to define classes_
# A list of class labels known to the classifier, mapping each label
# to a numerical index used in the model representation our output.
self.classes_ = self.automl_.InputValidator.target_validator.classes_

return self

def predict(self, X, batch_size=None, n_jobs=1):
Expand Down Expand Up @@ -656,7 +662,7 @@ def _get_automl_class(self):
return AutoMLClassifier


class AutoSklearnRegressor(AutoSklearnEstimator):
class AutoSklearnRegressor(AutoSklearnEstimator, RegressorMixin):
"""
This class implements the regression task.

Expand Down
46 changes: 46 additions & 0 deletions test/test_automl/test_estimators.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import copy
import glob
import os
import inspect
import pickle
import re
import sys
Expand All @@ -15,6 +17,10 @@
import sklearn
import sklearn.dummy
import sklearn.datasets
from sklearn.base import clone
from sklearn.base import ClassifierMixin, RegressorMixin
from sklearn.base import is_classifier


from autosklearn.data.validation import InputValidator
import autosklearn.pipeline.util as putil
Expand Down Expand Up @@ -160,6 +166,10 @@ def test_type_of_target(mock_estimator):
])

cls = AutoSklearnClassifier(ensemble_size=0)
cls.automl_ = unittest.mock.Mock()
cls.automl_.InputValidator = unittest.mock.Mock()
cls.automl_.InputValidator.target_validator = unittest.mock.Mock()

# Illegal target types for classification: continuous,
# multiclass-multioutput, continuous-multioutput.
expected_msg = r".*Classification with data of type"
Expand Down Expand Up @@ -253,6 +263,10 @@ def test_cv_results(tmp_dir, output_dir):
ensemble_size=0,
scoring_functions=[autosklearn.metrics.precision,
autosklearn.metrics.roc_auc])

params = cls.get_params()
original_params = copy.deepcopy(params)

cls.fit(X_train, Y_train)
cv_results = cls.cv_results_
assert isinstance(cv_results, dict), type(cv_results)
Expand All @@ -275,6 +289,27 @@ def test_cv_results(tmp_dir, output_dir):
cv_results.items() if key.startswith('param_')]
assert all(cv_result_items), cv_results.items()

# Compare the state of the model parameters with the original parameters
new_params = clone(cls).get_params()
for param_name, original_value in original_params.items():
new_value = new_params[param_name]

# Taken from Sklearn code:
# We should never change or mutate the internal state of input
# parameters by default. To check this we use the joblib.hash function
# that introspects recursively any subobjects to compute a checksum.
# The only exception to this rule of immutable constructor parameters
# is possible RandomState instance but in this check we explicitly
# fixed the random_state params recursively to be integer seeds.
assert joblib.hash(new_value) == joblib.hash(original_value), (
"Estimator %s should not change or mutate "
" the parameter %s from %s to %s during fit."
% (cls, param_name, original_value, new_value))

# Comply with https://scikit-learn.org/dev/glossary.html#term-classes
is_classifier(cls)
assert hasattr(cls, 'classes_')


@unittest.mock.patch('autosklearn.estimators.AutoSklearnEstimator.build_automl')
def test_fit_n_jobs_negative(build_automl_patch):
Expand Down Expand Up @@ -614,3 +649,14 @@ def test_autosklearn2_classification_methods_returns_self(dask_client):
) >= 2 / 3, print_debug_information(automl)

pickle.dumps(automl_fitted)


@pytest.mark.parametrize("class_", [AutoSklearnClassifier, AutoSklearnRegressor,
AutoSklearn2Classifier])
def test_check_estimator_signature(class_):
# Make sure signature is store in self
expected_subclass = ClassifierMixin if 'Classifier' in str(class_) else RegressorMixin
assert issubclass(class_, expected_subclass)
estimator = class_()
for expected in list(inspect.signature(class_).parameters):
assert hasattr(estimator, expected)
2 changes: 1 addition & 1 deletion test/test_data/test_feature_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def input_data_featuretest(request):
['a', 'b', 'd', 'r', 'b', 'c'],
])
elif request.param == 'numpy_categoricalonly_nan':
array = np.random.randint(10, size=(100, 10))
array = np.random.randint(10, size=(100, 10)).astype('float')
array[50, 0:5] = np.nan
return array
elif request.param == 'numpy_numericalonly_nan':
Expand Down
6 changes: 6 additions & 0 deletions test/test_data/test_target_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,8 @@ def test_targetvalidator_inversetransform():
y_decoded = validator.inverse_transform(y)
assert ['a', 'a', 'b', 'c', 'a'] == y_decoded.tolist()

assert validator.classes_.tolist() == ['a', 'b', 'c']

validator = TargetValidator(is_classification=True)
multi_label = pd.DataFrame(
np.array([[1, 0, 0, 1], [0, 0, 1, 1], [0, 0, 0, 0]]),
Expand All @@ -461,6 +463,10 @@ def test_targetvalidator_inversetransform():
y_decoded = validator.inverse_transform(y)
np.testing.assert_array_almost_equal(y, y_decoded)

# Multilabel classification is not encoded
# For this reason, classes_ attribute does not contain a class
np.testing.assert_array_almost_equal(validator.classes_, np.array([]))


# Actual checks for the targets
@pytest.mark.parametrize(
Expand Down