Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion autosklearn/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -1187,7 +1187,7 @@ def refit(self, X, y):
budget_type=self._budget_type,
logger=self._logger,
model=model,
train_indices=np.arange(len(X), dtype=int),
train_indices=np.arange(X.shape[0], dtype=int),
task_type=self._task,
)
break
Expand Down
43 changes: 37 additions & 6 deletions autosklearn/experimental/askl2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Any, Dict, List, Optional, Union

import dask.distributed
import scipy.sparse

from ConfigSpace import Configuration
import numpy as np
Expand Down Expand Up @@ -92,9 +93,14 @@ def __call__(

scenario = Scenario(scenario_dict)

initial_configurations = [
Configuration(configuration_space=scenario.cs, values=member)
for member in self.portfolio.values()]
initial_configurations = []
for member in self.portfolio.values():
try:
initial_configurations.append(
Configuration(configuration_space=scenario.cs, values=member)
)
except ValueError:
pass

rh2EPM = RunHistory2EPM4LogCost
return SMAC4AC(
Expand Down Expand Up @@ -134,9 +140,15 @@ def __call__(
from smac.scenario.scenario import Scenario

scenario = Scenario(scenario_dict)
initial_configurations = [
Configuration(configuration_space=scenario.cs, values=member)
for member in self.portfolio.values()]

initial_configurations = []
for member in self.portfolio.values():
try:
initial_configurations.append(
Configuration(configuration_space=scenario.cs, values=member)
)
except ValueError:
pass

rh2EPM = RunHistory2EPM4LogCost
ta_kwargs['budget_type'] = self.budget_type
Expand Down Expand Up @@ -341,6 +353,25 @@ def fit(self, X, y,
feat_type=None,
dataset_name=None):

# TODO
# regularly check https://github.com/scikit-learn/scikit-learn/issues/15336 whether
# histogram gradient boosting in scikit-learn finally support sparse data
is_sparse = scipy.sparse.issparse(X)
if is_sparse:
include_estimators = [
'extra_trees', 'passive_aggressive', 'random_forest', 'sgd', 'mlp',
]
else:
include_estimators = [
'extra_trees',
'passive_aggressive',
'random_forest',
'sgd',
'gradient_boosting',
'mlp',
]
self.include['classifier'] = include_estimators

if self.metric is None:
if len(y.shape) == 1 or y.shape[1] == 1:
self.metric = accuracy
Expand Down
25 changes: 25 additions & 0 deletions test/test_automl/test_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,31 @@ def test_autosklearn2_classification_methods_returns_self(dask_client):
pickle.dumps(automl_fitted)


def test_autosklearn2_classification_methods_returns_self_sparse(dask_client):
X_train, y_train, X_test, y_test = putil.get_dataset('breast_cancer', make_sparse=True)
automl = AutoSklearn2Classifier(time_left_for_this_task=60, ensemble_size=0,
delete_tmp_folder_after_terminate=False,
dask_client=dask_client)

automl_fitted = automl.fit(X_train, y_train)
assert automl is automl_fitted

automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5)
assert automl is automl_ensemble_fitted

automl_refitted = automl.refit(X_train.copy(), y_train.copy())
assert automl is automl_refitted

predictions = automl_fitted.predict(X_test)
assert sklearn.metrics.accuracy_score(
y_test, predictions
) >= 2 / 3, print_debug_information(automl)

assert "boosting" not in str(automl.get_configuration_space(X=X_train, y=y_train))

pickle.dumps(automl_fitted)


@pytest.mark.parametrize("class_", [AutoSklearnClassifier, AutoSklearnRegressor,
AutoSklearn2Classifier])
def test_check_estimator_signature(class_):
Expand Down