diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py index 27ba3c94ac..e4acee07a1 100644 --- a/autosklearn/pipeline/components/classification/__init__.py +++ b/autosklearn/pipeline/components/classification/__init__.py @@ -1,6 +1,7 @@ __author__ = 'feurerm' from collections import OrderedDict +from typing import Type import os from ..base import AutoSklearnClassificationAlgorithm, find_components, \ @@ -15,7 +16,7 @@ _addons = ThirdPartyComponents(AutoSklearnClassificationAlgorithm) -def add_classifier(classifier): +def add_classifier(classifier: Type[AutoSklearnClassificationAlgorithm]) -> None: _addons.add_component(classifier) diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py index cef83fa19f..3ba5981965 100644 --- a/autosklearn/pipeline/components/data_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/data_preprocessing/__init__.py @@ -1,6 +1,6 @@ import os from collections import OrderedDict -from typing import Dict, Optional +from typing import Dict, Optional, Type from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import CategoricalHyperparameter @@ -16,7 +16,7 @@ _addons = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm) -def add_preprocessor(preprocessor: AutoSklearnPreprocessingAlgorithm) -> None: +def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> None: _addons.add_component(preprocessor) diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py index 81f1020a38..8c42d30298 100644 --- a/autosklearn/pipeline/components/feature_preprocessing/__init__.py +++ b/autosklearn/pipeline/components/feature_preprocessing/__init__.py @@ -1,5 +1,6 @@ -from collections import OrderedDict import os +from collections import OrderedDict +from typing import Type from ..base import AutoSklearnPreprocessingAlgorithm, find_components, \ ThirdPartyComponents, AutoSklearnChoice @@ -13,7 +14,7 @@ _addons = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm) -def add_preprocessor(preprocessor): +def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> None: _addons.add_component(preprocessor) diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py index 28def8822f..d346f532c4 100644 --- a/autosklearn/pipeline/components/regression/__init__.py +++ b/autosklearn/pipeline/components/regression/__init__.py @@ -1,4 +1,5 @@ from collections import OrderedDict +from typing import Type import os from ..base import AutoSklearnRegressionAlgorithm, find_components, \ @@ -13,7 +14,7 @@ _addons = ThirdPartyComponents(AutoSklearnRegressionAlgorithm) -def add_regressor(regressor): +def add_regressor(regressor: Type[AutoSklearnRegressionAlgorithm]) -> None: _addons.add_component(regressor) diff --git a/doc/manual.rst b/doc/manual.rst index 7eccfdb7b2..252626666d 100644 --- a/doc/manual.rst +++ b/doc/manual.rst @@ -70,24 +70,28 @@ For a full list please have a look at the source code (in `autosklearn/pipeline/ We do also provide an example on how to restrict the classifiers to search over :ref:`sphx_glr_examples_40_advanced_example_interpretable_models.py`. -Turning off preprocessing -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Preprocessing in *auto-sklearn* is divided into data preprocessing and -feature preprocessing. Data preprocessing includes One-Hot encoding of -categorical features, imputation of missing values and the normalization of -features or samples. Dataprerocessing steps cannot be turned off as this ensures -autosklearn can actually pass the data to sklearn models without error. - +Data preprocessing +~~~~~~~~~~~~~~~~~~ +Data preprocessing includes One-Hot encoding of categorical features, imputation +of missing values and the normalization of features or samples. These ensure that +the data the gets to the sklearn models is well formed and can be used for +training models. + +While this is necessary in general, if you'd like to disable this step, please +refer to this :ref:`example `. + +Feature preprocessing +~~~~~~~~~~~~~~~~~~~~~ Feature preprocessing is a single transformer which implements for example feature selection or transformation of features into a different space (i.e. PCA). + This can be turned off by setting ``include={'feature_preprocessor'=["no_preprocessing"]}`` as shown in the example above. Resampling strategies ===================== -Examples for using holdout and cross-validation can be found in :ref:`auto-sklearn/examples/ ` +Examples for using holdout and cross-validation can be found in :ref:`auto-sklearn/examples/ `. Supported Inputs ================ diff --git a/examples/80_extending/example_extending_data_preprocessor.py b/examples/80_extending/example_extending_data_preprocessor.py index f5d27c15e5..6a92fa2bc9 100644 --- a/examples/80_extending/example_extending_data_preprocessor.py +++ b/examples/80_extending/example_extending_data_preprocessor.py @@ -23,22 +23,20 @@ class NoPreprocessing(AutoSklearnPreprocessingAlgorithm): def __init__(self, **kwargs): """ This preprocessors does not change the data """ - self.preprocessor = None + # Some internal checks makes sure parameters are set + for key, val in kwargs.items(): + setattr(self, key, val) def fit(self, X, Y=None): - self.preprocessor = 0 - self.fitted_ = True return self def transform(self, X): - if self.preprocessor is None: - raise NotImplementedError() return X @staticmethod def get_properties(dataset_properties=None): return { - 'shortname': 'no', + 'shortname': 'NoPreprocessing', 'name': 'NoPreprocessing', 'handles_regression': True, 'handles_classification': True, @@ -52,8 +50,7 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space(dataset_properties=None): - cs = ConfigurationSpace() - return cs + return ConfigurationSpace() # Return an empty configuration as there is None # Add NoPreprocessing component to auto-sklearn. @@ -82,6 +79,10 @@ def get_hyperparameter_search_space(dataset_properties=None): ) clf.fit(X_train, y_train) +# To check that models were found without issue when running examples +assert len(clf.get_models_with_weights()) > 0 +print(clf.sprint_statistics()) + ############################################################################ # Print prediction score and statistics # =====================================