Skip to content

Auto-Sklearn estimators can not be cloned #876

@alexitkes

Description

@alexitkes

Describe the bug

If I clone an auto-sklearn model, it crashes when fitting.

To Reproduce

Steps to reproduce the behavior:

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from autosklearn.classification import AutoSklearnClassifier
from sklearn.base import clone

bc = load_breast_cancer()
(X, y) = (bc['data'], bc['target'])
(X_train, X_test, y_train, y_test) = train_test_split(X, y, random_state=0, stratify=y)

clf = AutoSklearnClassifier(time_left_for_this_task=45, per_run_time_limit=15, n_jobs=-1)
clf.fit(X_train, y_train)
print(clf.score(X_train, y_train))
print(clf.score(X_test, y_test))

m = clf.get_models_with_weights()[0][1]

m.fit(X_train, y_train)
print(clf.score(X_train, y_train))
print(clf.score(X_test, y_test))
# Output:
# 0.9906103286384976
# 0.9440559440559441

# Can a model be fitted again? OK.
m.fit(X_train, y_train)
print(clf.score(X_train, y_train))
print(clf.score(X_test, y_test))
# Output:
# 0.9906103286384976
# 0.9440559440559441

# Clone it
m = clone(m)

# And can a clone be fitted
m.fit(X_train, y_train)
print(clf.score(X_train, y_train))
print(clf.score(X_test, y_test))
# Crash!
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-7-6f7f5e57c192> in <module>
----> 1 m.fit(X_train, y_train)
      2 print(clf.score(X_train, y_train))
      3 print(clf.score(X_test, y_test))

~/venv-auto/lib/python3.6/site-packages/autosklearn/pipeline/base.py in fit(self, X, y, **fit_params)
     89             a classification algorithm first.
     90         """
---> 91         X, fit_params = self.fit_transformer(X, y, **fit_params)
     92         self.fit_estimator(X, y, **fit_params)
     93         return self

~/venv-auto/lib/python3.6/site-packages/autosklearn/pipeline/classification.py in fit_transformer(self, X, y, fit_params)
     96 
     97         X, fit_params = super().fit_transformer(
---> 98             X, y, fit_params=fit_params)
     99 
    100         return X, fit_params

~/venv-auto/lib/python3.6/site-packages/autosklearn/pipeline/base.py in fit_transformer(self, X, y, fit_params)
     99         fit_params = {key.replace(":", "__"): value for key, value in
    100                       fit_params.items()}
--> 101         Xt, fit_params = self._fit(X, y, **fit_params)
    102         if fit_params is None:
    103             fit_params = {}

~/venv-auto/lib/python3.6/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params)
    313                 message_clsname='Pipeline',
    314                 message=self._log_message(step_idx),
--> 315                 **fit_params_steps[name])
    316             # Replace the transformer of the step with the fitted
    317             # transformer. This is necessary when loading the transformer

~/venv-auto/lib/python3.6/site-packages/joblib/memory.py in __call__(self, *args, **kwargs)
    350 
    351     def __call__(self, *args, **kwargs):
--> 352         return self.func(*args, **kwargs)
    353 
    354     def call_and_shelve(self, *args, **kwargs):

~/venv-auto/lib/python3.6/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
    726     with _print_elapsed_time(message_clsname, message):
    727         if hasattr(transformer, 'fit_transform'):
--> 728             res = transformer.fit_transform(X, y, **fit_params)
    729         else:
    730             res = transformer.fit(X, y, **fit_params).transform(X)

~/venv-auto/lib/python3.6/site-packages/autosklearn/pipeline/components/data_preprocessing/data_preprocessing.py in fit_transform(self, X, y)
     88 
     89     def fit_transform(self, X, y=None):
---> 90         return self.fit(X, y).transform(X)
     91 
     92     @staticmethod

~/venv-auto/lib/python3.6/site-packages/autosklearn/pipeline/components/data_preprocessing/data_preprocessing.py in fit(self, X, y)
     77         self.column_transformer = sklearn.compose.ColumnTransformer(
     78             transformers=sklearn_transf_spec,
---> 79             sparse_threshold=float(self.sparse_),
     80             )
     81         self.column_transformer.fit(X)

TypeError: float() argument must be a string or a number, not 'NoneType'

Expected behavior

Work just like the model that was cloned

Environment and installation:

Ubuntu 18.04.4
Python 3.6.9
Auto-Sklearn 0.7.0

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions