automl · eddiebergman · Nov 3, 2021 · Oct 16, 2021 · Oct 16, 2021 · Nov 3, 2021
diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py
@@ -1,6 +1,7 @@
 __author__ = 'feurerm'
 
 from collections import OrderedDict
+from typing import Type
 import os
 
 from ..base import AutoSklearnClassificationAlgorithm, find_components, \
@@ -15,7 +16,7 @@
 _addons = ThirdPartyComponents(AutoSklearnClassificationAlgorithm)
 
 
-def add_classifier(classifier):
+def add_classifier(classifier: Type[AutoSklearnClassificationAlgorithm]) -> None:
     _addons.add_component(classifier)
 
 

diff --git a/autosklearn/pipeline/components/data_preprocessing/__init__.py b/autosklearn/pipeline/components/data_preprocessing/__init__.py
@@ -1,6 +1,6 @@
 import os
 from collections import OrderedDict
-from typing import Dict, Optional
+from typing import Dict, Optional, Type
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
@@ -16,7 +16,7 @@
 _addons = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm)
 
 
-def add_preprocessor(preprocessor: AutoSklearnPreprocessingAlgorithm) -> None:
+def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> None:
     _addons.add_component(preprocessor)
 
 

diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
@@ -1,5 +1,6 @@
-from collections import OrderedDict
 import os
+from collections import OrderedDict
+from typing import Type
 
 from ..base import AutoSklearnPreprocessingAlgorithm, find_components, \
     ThirdPartyComponents, AutoSklearnChoice
@@ -13,7 +14,7 @@
 _addons = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm)
 
 
-def add_preprocessor(preprocessor):
+def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> None:
     _addons.add_component(preprocessor)
 
 

diff --git a/autosklearn/pipeline/components/regression/__init__.py b/autosklearn/pipeline/components/regression/__init__.py
@@ -1,4 +1,5 @@
 from collections import OrderedDict
+from typing import Type
 import os
 
 from ..base import AutoSklearnRegressionAlgorithm, find_components, \
@@ -13,7 +14,7 @@
 _addons = ThirdPartyComponents(AutoSklearnRegressionAlgorithm)
 
 
-def add_regressor(regressor):
+def add_regressor(regressor: Type[AutoSklearnRegressionAlgorithm]) -> None:
     _addons.add_component(regressor)
 
 

diff --git a/doc/manual.rst b/doc/manual.rst
@@ -70,24 +70,28 @@ For a full list please have a look at the source code (in `autosklearn/pipeline/
 We do also provide an example on how to restrict the classifiers to search over
 :ref:`sphx_glr_examples_40_advanced_example_interpretable_models.py`.
 
-Turning off preprocessing
-~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Preprocessing in *auto-sklearn* is divided into data preprocessing and
-feature preprocessing. Data preprocessing includes One-Hot encoding of
-categorical features, imputation of missing values and the normalization of
-features or samples. Dataprerocessing steps cannot be turned off as this ensures
-autosklearn can actually pass the data to sklearn models without error.
-
+Data preprocessing
+~~~~~~~~~~~~~~~~~~
+Data preprocessing includes One-Hot encoding of categorical features, imputation
+of missing values and the normalization of features or samples. These ensure that
+the data the gets to the sklearn models is well formed and can be used for 
+training models.
+
+While this is necessary in general, if you'd like to disable this step, please
+refer to this :ref:`example <sphx_glr_examples_80_extending_example_extending_data_preprocessor.py>`.
+
+Feature preprocessing
+~~~~~~~~~~~~~~~~~~~~~
 Feature preprocessing is a single transformer which implements for example feature
 selection or transformation of features into a different space (i.e. PCA).
+
 This can be turned off by setting
 ``include={'feature_preprocessor'=["no_preprocessing"]}`` as shown in the example above.
 
 Resampling strategies
 =====================
 
-Examples for using holdout and cross-validation can be found in :ref:`auto-sklearn/examples/ <examples>`
+Examples for using holdout and cross-validation can be found in :ref:`auto-sklearn/examples/ <examples>`.
 
 Supported Inputs
 ================

diff --git a/examples/80_extending/example_extending_data_preprocessor.py b/examples/80_extending/example_extending_data_preprocessor.py
@@ -23,22 +23,20 @@ class NoPreprocessing(AutoSklearnPreprocessingAlgorithm):
 
     def __init__(self, **kwargs):
         """ This preprocessors does not change the data """
-        self.preprocessor = None
+        # Some internal checks makes sure parameters are set
+        for key, val in kwargs.items():
+            setattr(self, key, val)
 
     def fit(self, X, Y=None):
-        self.preprocessor = 0
-        self.fitted_ = True
         return self
 
     def transform(self, X):
-        if self.preprocessor is None:
-            raise NotImplementedError()
         return X
 
     @staticmethod
     def get_properties(dataset_properties=None):
         return {
-            'shortname': 'no',
+            'shortname': 'NoPreprocessing',
             'name': 'NoPreprocessing',
             'handles_regression': True,
             'handles_classification': True,
@@ -52,8 +50,7 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
-        cs = ConfigurationSpace()
-        return cs
+        return ConfigurationSpace()  # Return an empty configuration as there is None
 
 
 # Add NoPreprocessing component to auto-sklearn.
@@ -82,6 +79,10 @@ def get_hyperparameter_search_space(dataset_properties=None):
 )
 clf.fit(X_train, y_train)
 
+# To check that models were found without issue when running examples
+assert len(clf.get_models_with_weights()) > 0
+print(clf.sprint_statistics())
+
 ############################################################################
 # Print prediction score and statistics
 # =====================================