Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion autosklearn/pipeline/components/classification/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
__author__ = 'feurerm'

from collections import OrderedDict
from typing import Type
import os

from ..base import AutoSklearnClassificationAlgorithm, find_components, \
Expand All @@ -15,7 +16,7 @@
_addons = ThirdPartyComponents(AutoSklearnClassificationAlgorithm)


def add_classifier(classifier):
def add_classifier(classifier: Type[AutoSklearnClassificationAlgorithm]) -> None:
_addons.add_component(classifier)


Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
from collections import OrderedDict
from typing import Dict, Optional
from typing import Dict, Optional, Type

from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import CategoricalHyperparameter
Expand All @@ -16,7 +16,7 @@
_addons = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm)


def add_preprocessor(preprocessor: AutoSklearnPreprocessingAlgorithm) -> None:
def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> None:
_addons.add_component(preprocessor)


Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from collections import OrderedDict
import os
from collections import OrderedDict
from typing import Type

from ..base import AutoSklearnPreprocessingAlgorithm, find_components, \
ThirdPartyComponents, AutoSklearnChoice
Expand All @@ -13,7 +14,7 @@
_addons = ThirdPartyComponents(AutoSklearnPreprocessingAlgorithm)


def add_preprocessor(preprocessor):
def add_preprocessor(preprocessor: Type[AutoSklearnPreprocessingAlgorithm]) -> None:
_addons.add_component(preprocessor)


Expand Down
3 changes: 2 additions & 1 deletion autosklearn/pipeline/components/regression/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import OrderedDict
from typing import Type
import os

from ..base import AutoSklearnRegressionAlgorithm, find_components, \
Expand All @@ -13,7 +14,7 @@
_addons = ThirdPartyComponents(AutoSklearnRegressionAlgorithm)


def add_regressor(regressor):
def add_regressor(regressor: Type[AutoSklearnRegressionAlgorithm]) -> None:
_addons.add_component(regressor)


Expand Down
24 changes: 14 additions & 10 deletions doc/manual.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,24 +70,28 @@ For a full list please have a look at the source code (in `autosklearn/pipeline/
We do also provide an example on how to restrict the classifiers to search over
:ref:`sphx_glr_examples_40_advanced_example_interpretable_models.py`.

Turning off preprocessing
~~~~~~~~~~~~~~~~~~~~~~~~~

Preprocessing in *auto-sklearn* is divided into data preprocessing and
feature preprocessing. Data preprocessing includes One-Hot encoding of
categorical features, imputation of missing values and the normalization of
features or samples. Dataprerocessing steps cannot be turned off as this ensures
autosklearn can actually pass the data to sklearn models without error.

Data preprocessing
~~~~~~~~~~~~~~~~~~
Data preprocessing includes One-Hot encoding of categorical features, imputation
of missing values and the normalization of features or samples. These ensure that
the data the gets to the sklearn models is well formed and can be used for
training models.

While this is necessary in general, if you'd like to disable this step, please
refer to this :ref:`example <sphx_glr_examples_80_extending_example_extending_data_preprocessor.py>`.

Feature preprocessing
~~~~~~~~~~~~~~~~~~~~~
Feature preprocessing is a single transformer which implements for example feature
selection or transformation of features into a different space (i.e. PCA).

This can be turned off by setting
``include={'feature_preprocessor'=["no_preprocessing"]}`` as shown in the example above.

Resampling strategies
=====================

Examples for using holdout and cross-validation can be found in :ref:`auto-sklearn/examples/ <examples>`
Examples for using holdout and cross-validation can be found in :ref:`auto-sklearn/examples/ <examples>`.

Supported Inputs
================
Expand Down
17 changes: 9 additions & 8 deletions examples/80_extending/example_extending_data_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,20 @@ class NoPreprocessing(AutoSklearnPreprocessingAlgorithm):

def __init__(self, **kwargs):
""" This preprocessors does not change the data """
self.preprocessor = None
# Some internal checks makes sure parameters are set
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you mean by some internal checks?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not entirely sure where it was but there is a check somewhere in the pipeline that checks that certain attributes are set on the object from the **kwargs hence the snippet right below this

for key, val in kwargs.items():
    setattr(self, key, val)

for key, val in kwargs.items():
setattr(self, key, val)

def fit(self, X, Y=None):
self.preprocessor = 0
self.fitted_ = True
return self

def transform(self, X):
if self.preprocessor is None:
raise NotImplementedError()
return X

@staticmethod
def get_properties(dataset_properties=None):
return {
'shortname': 'no',
'shortname': 'NoPreprocessing',
'name': 'NoPreprocessing',
'handles_regression': True,
'handles_classification': True,
Expand All @@ -52,8 +50,7 @@ def get_properties(dataset_properties=None):

@staticmethod
def get_hyperparameter_search_space(dataset_properties=None):
cs = ConfigurationSpace()
return cs
return ConfigurationSpace() # Return an empty configuration as there is None


# Add NoPreprocessing component to auto-sklearn.
Expand Down Expand Up @@ -82,6 +79,10 @@ def get_hyperparameter_search_space(dataset_properties=None):
)
clf.fit(X_train, y_train)

# To check that models were found without issue when running examples
assert len(clf.get_models_with_weights()) > 0
print(clf.sprint_statistics())

############################################################################
# Print prediction score and statistics
# =====================================
Expand Down