automl · mfeurer · Jan 29, 2020 · Sep 17, 2019 · Oct 2, 2019 · Oct 9, 2019
diff --git a/.gitignore b/.gitignore
@@ -53,3 +53,4 @@ number_submission
 .pypirc
 dmypy.json
 *.log
+.noseids
diff --git a/autosklearn/automl.py b/autosklearn/automl.py
@@ -148,7 +148,7 @@ def fit(
         metric: Scorer,
         X_test: Optional[np.ndarray] = None,
         y_test: Optional[np.ndarray] = None,
-        feat_type: Optional[List[bool]] = None,
+        feat_type: Optional[List[str]] = None,
         dataset_name: Optional[str] = None,
         only_return_configuration_space: Optional[bool] = False,
         load_models: bool = True,

diff --git a/autosklearn/data/abstract_data_manager.py b/autosklearn/data/abstract_data_manager.py
@@ -1,9 +1,9 @@
-# -*- encoding: utf-8 -*-
 import abc
 import numpy as np
 import scipy.sparse
 
-from autosklearn.pipeline.implementations.OneHotEncoder import OneHotEncoder
+from autosklearn.pipeline.components.data_preprocessing.data_preprocessing \
+    import DataPreprocessor
 from autosklearn.util import predict_RAM_usage
 
 
@@ -16,9 +16,8 @@ def perform_one_hot_encoding(sparse, categorical, data):
 
     rvals = []
     if any(categorical):
-        encoder = OneHotEncoder(categorical_features=categorical,
-                                dtype=np.float32,
-                                sparse=sparse)
+        encoder = DataPreprocessor(
+            categorical_features=categorical, force_sparse_output=sparse)
         rvals.append(encoder.fit_transform(data[0]))
         for d in data[1:]:
             rvals.append(encoder.transform(d))

diff --git a/autosklearn/data/xy_data_manager.py b/autosklearn/data/xy_data_manager.py
@@ -61,6 +61,6 @@ def __init__(self, X, y, X_test, y_test, task, feat_type, dataset_name):
         if self.feat_type is None:
             self.feat_type = ['Numerical'] * X.shape[1]
         if X.shape[1] != len(self.feat_type):
-            raise ValueError('X and feat type must have the same dimensions, '
+            raise ValueError('X and feat_type must have the same number of columns, '
                              'but are %d and %d.' %
                              (X.shape[1], len(self.feat_type)))
diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py
@@ -156,7 +156,7 @@ def __init__(self, backend, queue, metric,
                 raise ValueError(feat)
         if np.sum(categorical_mask) > 0:
             self._init_params = {
-                'categorical_encoding:one_hot_encoding:categorical_features':
+                'data_preprocessing:categorical_features':
                     categorical_mask
             }
         else:

diff --git a/autosklearn/metalearning/files/accuracy_binary.classification_dense/configurations.csv b/autosklearn/metalearning/files/accuracy_binary.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/accuracy_binary.classification_sparse/configurations.csv b/autosklearn/metalearning/files/accuracy_binary.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/accuracy_multiclass.classification_dense/configurations.csv b/autosklearn/metalearning/files/accuracy_multiclass.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/accuracy_multiclass.classification_sparse/configurations.csv b/autosklearn/metalearning/files/accuracy_multiclass.classification_sparse/configurations.csv
diff --git a/...learn/metalearning/files/average_precision_binary.classification_dense/configurations.csv b/...learn/metalearning/files/average_precision_binary.classification_dense/configurations.csv
diff --git a/...earn/metalearning/files/average_precision_binary.classification_sparse/configurations.csv b/...earn/metalearning/files/average_precision_binary.classification_sparse/configurations.csv
diff --git a/...n/metalearning/files/average_precision_multiclass.classification_dense/configurations.csv b/...n/metalearning/files/average_precision_multiclass.classification_dense/configurations.csv
diff --git a/.../metalearning/files/average_precision_multiclass.classification_sparse/configurations.csv b/.../metalearning/files/average_precision_multiclass.classification_sparse/configurations.csv
diff --git a/...learn/metalearning/files/balanced_accuracy_binary.classification_dense/configurations.csv b/...learn/metalearning/files/balanced_accuracy_binary.classification_dense/configurations.csv
diff --git a/...earn/metalearning/files/balanced_accuracy_binary.classification_sparse/configurations.csv b/...earn/metalearning/files/balanced_accuracy_binary.classification_sparse/configurations.csv
diff --git a/...n/metalearning/files/balanced_accuracy_multiclass.classification_dense/configurations.csv b/...n/metalearning/files/balanced_accuracy_multiclass.classification_dense/configurations.csv
diff --git a/.../metalearning/files/balanced_accuracy_multiclass.classification_sparse/configurations.csv b/.../metalearning/files/balanced_accuracy_multiclass.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/f1_binary.classification_dense/configurations.csv b/autosklearn/metalearning/files/f1_binary.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/f1_binary.classification_sparse/configurations.csv b/autosklearn/metalearning/files/f1_binary.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/f1_macro_binary.classification_dense/configurations.csv b/autosklearn/metalearning/files/f1_macro_binary.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/f1_macro_binary.classification_sparse/configurations.csv b/autosklearn/metalearning/files/f1_macro_binary.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/f1_macro_multiclass.classification_dense/configurations.csv b/autosklearn/metalearning/files/f1_macro_multiclass.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/f1_macro_multiclass.classification_sparse/configurations.csv b/autosklearn/metalearning/files/f1_macro_multiclass.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/f1_micro_binary.classification_dense/configurations.csv b/autosklearn/metalearning/files/f1_micro_binary.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/f1_micro_binary.classification_sparse/configurations.csv b/autosklearn/metalearning/files/f1_micro_binary.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/f1_micro_multiclass.classification_dense/configurations.csv b/autosklearn/metalearning/files/f1_micro_multiclass.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/f1_micro_multiclass.classification_sparse/configurations.csv b/autosklearn/metalearning/files/f1_micro_multiclass.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/f1_multiclass.classification_dense/configurations.csv b/autosklearn/metalearning/files/f1_multiclass.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/f1_multiclass.classification_sparse/configurations.csv b/autosklearn/metalearning/files/f1_multiclass.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/f1_weighted_binary.classification_dense/configurations.csv b/autosklearn/metalearning/files/f1_weighted_binary.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/f1_weighted_binary.classification_sparse/configurations.csv b/autosklearn/metalearning/files/f1_weighted_binary.classification_sparse/configurations.csv
diff --git a/...sklearn/metalearning/files/f1_weighted_multiclass.classification_dense/configurations.csv b/...sklearn/metalearning/files/f1_weighted_multiclass.classification_dense/configurations.csv
diff --git a/...klearn/metalearning/files/f1_weighted_multiclass.classification_sparse/configurations.csv b/...klearn/metalearning/files/f1_weighted_multiclass.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/log_loss_binary.classification_dense/configurations.csv b/autosklearn/metalearning/files/log_loss_binary.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/log_loss_binary.classification_sparse/configurations.csv b/autosklearn/metalearning/files/log_loss_binary.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/log_loss_multiclass.classification_dense/configurations.csv b/autosklearn/metalearning/files/log_loss_multiclass.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/log_loss_multiclass.classification_sparse/configurations.csv b/autosklearn/metalearning/files/log_loss_multiclass.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/pac_score_binary.classification_dense/configurations.csv b/autosklearn/metalearning/files/pac_score_binary.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/pac_score_binary.classification_sparse/configurations.csv b/autosklearn/metalearning/files/pac_score_binary.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/pac_score_multiclass.classification_dense/configurations.csv b/autosklearn/metalearning/files/pac_score_multiclass.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/pac_score_multiclass.classification_sparse/configurations.csv b/autosklearn/metalearning/files/pac_score_multiclass.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/precision_binary.classification_dense/configurations.csv b/autosklearn/metalearning/files/precision_binary.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/precision_binary.classification_sparse/configurations.csv b/autosklearn/metalearning/files/precision_binary.classification_sparse/configurations.csv
diff --git a/...sklearn/metalearning/files/precision_macro_binary.classification_dense/configurations.csv b/...sklearn/metalearning/files/precision_macro_binary.classification_dense/configurations.csv
diff --git a/...klearn/metalearning/files/precision_macro_binary.classification_sparse/configurations.csv b/...klearn/metalearning/files/precision_macro_binary.classification_sparse/configurations.csv
diff --git a/...arn/metalearning/files/precision_macro_multiclass.classification_dense/configurations.csv b/...arn/metalearning/files/precision_macro_multiclass.classification_dense/configurations.csv
diff --git a/...rn/metalearning/files/precision_macro_multiclass.classification_sparse/configurations.csv b/...rn/metalearning/files/precision_macro_multiclass.classification_sparse/configurations.csv
diff --git a/...sklearn/metalearning/files/precision_micro_binary.classification_dense/configurations.csv b/...sklearn/metalearning/files/precision_micro_binary.classification_dense/configurations.csv
diff --git a/...klearn/metalearning/files/precision_micro_binary.classification_sparse/configurations.csv b/...klearn/metalearning/files/precision_micro_binary.classification_sparse/configurations.csv
diff --git a/...arn/metalearning/files/precision_micro_multiclass.classification_dense/configurations.csv b/...arn/metalearning/files/precision_micro_multiclass.classification_dense/configurations.csv
diff --git a/...rn/metalearning/files/precision_micro_multiclass.classification_sparse/configurations.csv b/...rn/metalearning/files/precision_micro_multiclass.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/precision_multiclass.classification_dense/configurations.csv b/autosklearn/metalearning/files/precision_multiclass.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/precision_multiclass.classification_sparse/configurations.csv b/autosklearn/metalearning/files/precision_multiclass.classification_sparse/configurations.csv
diff --git a/...earn/metalearning/files/precision_weighted_binary.classification_dense/configurations.csv b/...earn/metalearning/files/precision_weighted_binary.classification_dense/configurations.csv
diff --git a/...arn/metalearning/files/precision_weighted_binary.classification_sparse/configurations.csv b/...arn/metalearning/files/precision_weighted_binary.classification_sparse/configurations.csv
diff --git a/.../metalearning/files/precision_weighted_multiclass.classification_dense/configurations.csv b/.../metalearning/files/precision_weighted_multiclass.classification_dense/configurations.csv
diff --git a/...metalearning/files/precision_weighted_multiclass.classification_sparse/configurations.csv b/...metalearning/files/precision_weighted_multiclass.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/recall_binary.classification_dense/configurations.csv b/autosklearn/metalearning/files/recall_binary.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/recall_binary.classification_sparse/configurations.csv b/autosklearn/metalearning/files/recall_binary.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/recall_macro_binary.classification_dense/configurations.csv b/autosklearn/metalearning/files/recall_macro_binary.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/recall_macro_binary.classification_sparse/configurations.csv b/autosklearn/metalearning/files/recall_macro_binary.classification_sparse/configurations.csv
diff --git a/...klearn/metalearning/files/recall_macro_multiclass.classification_dense/configurations.csv b/...klearn/metalearning/files/recall_macro_multiclass.classification_dense/configurations.csv
diff --git a/...learn/metalearning/files/recall_macro_multiclass.classification_sparse/configurations.csv b/...learn/metalearning/files/recall_macro_multiclass.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/recall_micro_binary.classification_dense/configurations.csv b/autosklearn/metalearning/files/recall_micro_binary.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/recall_micro_binary.classification_sparse/configurations.csv b/autosklearn/metalearning/files/recall_micro_binary.classification_sparse/configurations.csv
diff --git a/...klearn/metalearning/files/recall_micro_multiclass.classification_dense/configurations.csv b/...klearn/metalearning/files/recall_micro_multiclass.classification_dense/configurations.csv
diff --git a/...learn/metalearning/files/recall_micro_multiclass.classification_sparse/configurations.csv b/...learn/metalearning/files/recall_micro_multiclass.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/recall_multiclass.classification_dense/configurations.csv b/autosklearn/metalearning/files/recall_multiclass.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/recall_multiclass.classification_sparse/configurations.csv b/autosklearn/metalearning/files/recall_multiclass.classification_sparse/configurations.csv
diff --git a/...sklearn/metalearning/files/recall_weighted_binary.classification_dense/configurations.csv b/...sklearn/metalearning/files/recall_weighted_binary.classification_dense/configurations.csv
diff --git a/...klearn/metalearning/files/recall_weighted_binary.classification_sparse/configurations.csv b/...klearn/metalearning/files/recall_weighted_binary.classification_sparse/configurations.csv
diff --git a/...arn/metalearning/files/recall_weighted_multiclass.classification_dense/configurations.csv b/...arn/metalearning/files/recall_weighted_multiclass.classification_dense/configurations.csv
diff --git a/...rn/metalearning/files/recall_weighted_multiclass.classification_sparse/configurations.csv b/...rn/metalearning/files/recall_weighted_multiclass.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/roc_auc_binary.classification_dense/configurations.csv b/autosklearn/metalearning/files/roc_auc_binary.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/roc_auc_binary.classification_sparse/configurations.csv b/autosklearn/metalearning/files/roc_auc_binary.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/files/roc_auc_multiclass.classification_dense/configurations.csv b/autosklearn/metalearning/files/roc_auc_multiclass.classification_dense/configurations.csv
diff --git a/autosklearn/metalearning/files/roc_auc_multiclass.classification_sparse/configurations.csv b/autosklearn/metalearning/files/roc_auc_multiclass.classification_sparse/configurations.csv
diff --git a/autosklearn/metalearning/metafeatures/metafeatures.py b/autosklearn/metalearning/metafeatures/metafeatures.py
@@ -12,11 +12,11 @@
 import sklearn.model_selection
 from sklearn.utils import check_array
 from sklearn.multiclass import OneVsRestClassifier
-
 from sklearn.impute import SimpleImputer
-from autosklearn.pipeline.implementations.OneHotEncoder import OneHotEncoder
 from sklearn.preprocessing import StandardScaler
 
+from autosklearn.pipeline.components.data_preprocessing.data_preprocessing \
+    import DataPreprocessor
 from autosklearn.util.logging_ import get_logger
 from .metafeature import MetaFeature, HelperFunction, DatasetMetafeatures, \
     MetaFeatureValue
@@ -947,16 +947,9 @@ def calculate_all_metafeatures(X, y, categorical, dataset_name,
                 # TODO make sure this is done as efficient as possible (no copy for
                 # sparse matrices because of wrong sparse format)
                 sparse = scipy.sparse.issparse(X)
-                if any(categorical):
-                    ohe = OneHotEncoder(categorical_features=categorical, sparse=True)
-                    X_transformed = ohe.fit_transform(X)
-                else:
-                    X_transformed = X
-                imputer = SimpleImputer(strategy='mean', copy=False)
-                X_transformed = imputer.fit_transform(X_transformed)
-                center = not scipy.sparse.isspmatrix(X_transformed)
-                standard_scaler = StandardScaler(copy=False, with_mean=center)
-                X_transformed = standard_scaler.fit_transform(X_transformed)
+                DPP = DataPreprocessor(
+                    categorical_features=categorical, force_sparse_output=True)
+                X_transformed = DPP.fit_transform(X)
                 categorical_transformed = [False] * X_transformed.shape[1]
 
                 # Densify the transformed matrix

diff --git a/autosklearn/pipeline/base.py b/autosklearn/pipeline/base.py
@@ -193,15 +193,15 @@ def set_hyperparameters(self, configuration, init_params=None):
             else:
                 sub_init_params_dict = None
 
-            if isinstance(node, (AutoSklearnChoice, AutoSklearnComponent)):
+            if isinstance(node, (AutoSklearnChoice, AutoSklearnComponent, BasePipeline)):
                 node.set_hyperparameters(configuration=sub_configuration,
                                          init_params=sub_init_params_dict)
             else:
                 raise NotImplementedError('Not supported yet!')
 
         return self
 
-    def get_hyperparameter_search_space(self):
+    def get_hyperparameter_search_space(self, dataset_properties=None):
         """Return the configuration space for the CASH problem.
 
         Returns

diff --git a/autosklearn/pipeline/classification.py b/autosklearn/pipeline/classification.py
@@ -8,20 +8,14 @@
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.forbidden import ForbiddenEqualsClause, ForbiddenAndConjunction
 
+from autosklearn.pipeline.components.data_preprocessing.data_preprocessing \
+    import DataPreprocessor
 from autosklearn.pipeline.components import classification as \
     classification_components
-from autosklearn.pipeline.components.data_preprocessing import rescaling as \
-    rescaling_components
 from autosklearn.pipeline.components.data_preprocessing.balancing.balancing import \
     Balancing
-from autosklearn.pipeline.components.data_preprocessing.imputation.imputation \
-    import Imputation
-from autosklearn.pipeline.components.data_preprocessing.one_hot_encoding \
-    import OHEChoice
 from autosklearn.pipeline.components import feature_preprocessing as \
     feature_preprocessing_components
-from autosklearn.pipeline.components.data_preprocessing.variance_threshold.variance_threshold \
-    import VarianceThreshold
 from autosklearn.pipeline.base import BasePipeline
 from autosklearn.pipeline.constants import SPARSE
 
@@ -41,7 +35,7 @@ class SimpleClassificationPipeline(ClassifierMixin, BasePipeline):
 
     Parameters
     ----------
-    configuration : ConfigSpace.configuration_space.Configuration
+    config : ConfigSpace.configuration_space.Configuration
         The configuration to evaluate.
 
     random_state : int, RandomState instance or None, optional (default=None)
@@ -91,7 +85,7 @@ def fit_transformer(self, X, y, fit_params=None):
             balancing = Balancing(strategy='weighting')
             _init_params, _fit_params = balancing.get_weights(
                 y, self.configuration['classifier:__choice__'],
-                self.configuration['preprocessor:__choice__'],
+                self.configuration['feature_preprocessor:__choice__'],
                 {}, {})
             _init_params.update(self._init_params)
             self.set_hyperparameters(configuration=self.configuration,
@@ -181,7 +175,7 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
             exclude=exclude, include=include, pipeline=self.steps)
 
         classifiers = cs.get_hyperparameter('classifier:__choice__').choices
-        preprocessors = cs.get_hyperparameter('preprocessor:__choice__').choices
+        preprocessors = cs.get_hyperparameter('feature_preprocessor:__choice__').choices
         available_classifiers = self._final_estimator.get_available_components(
             dataset_properties)
 
@@ -197,23 +191,21 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
                 if 'densifier' in preprocessors:
                     while True:
                         try:
+                            forb_cls = ForbiddenEqualsClause(
+                                cs.get_hyperparameter('classifier:__choice__'), key)
+                            forb_fpp = ForbiddenEqualsClause(cs.get_hyperparameter(
+                                'feature_preprocessor:__choice__'), 'densifier')
                             cs.add_forbidden_clause(
-                                ForbiddenAndConjunction(
-                                    ForbiddenEqualsClause(
-                                        cs.get_hyperparameter(
-                                            'classifier:__choice__'), key),
-                                    ForbiddenEqualsClause(
-                                        cs.get_hyperparameter(
-                                            'preprocessor:__choice__'), 'densifier')
-                                ))
+                                ForbiddenAndConjunction(forb_cls, forb_fpp))
                             # Success
                             break
                         except ValueError:
                             # Change the default and try again
                             try:
                                 default = possible_default_classifier.pop()
                             except IndexError:
-                                raise ValueError("Cannot find a legal default configuration.")
+                                raise ValueError(
+                                    "Cannot find a legal default configuration.")
                             cs.get_hyperparameter(
                                 'classifier:__choice__').default_value = default
 
@@ -236,7 +228,7 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
                         ForbiddenEqualsClause(cs.get_hyperparameter(
                             "classifier:__choice__"), c),
                         ForbiddenEqualsClause(cs.get_hyperparameter(
-                            "preprocessor:__choice__"), f)))
+                            "feature_preprocessor:__choice__"), f)))
                     break
                 except KeyError:
                     break
@@ -265,7 +257,7 @@ def _get_hyperparameter_search_space(self, include=None, exclude=None,
                 try:
                     cs.add_forbidden_clause(ForbiddenAndConjunction(
                         ForbiddenEqualsClause(cs.get_hyperparameter(
-                            "preprocessor:__choice__"), f),
+                            "feature_preprocessor:__choice__"), f),
                         ForbiddenEqualsClause(cs.get_hyperparameter(
                             "classifier:__choice__"), c)))
                     break
@@ -290,27 +282,20 @@ def _get_pipeline(self):
 
         default_dataset_properties = {'target_type': 'classification'}
 
-        # Add the always active preprocessing components
-
-        steps.extend(
-            [["categorical_encoding", OHEChoice(default_dataset_properties)],
-             ["imputation", Imputation()],
-             ["variance_threshold", VarianceThreshold()],
-             ["rescaling",
-              rescaling_components.RescalingChoice(default_dataset_properties)],
-             ["balancing", Balancing()]])
-
-        # Add the preprocessing component
-        steps.append(['preprocessor',
-                      feature_preprocessing_components.FeaturePreprocessorChoice(
-                          default_dataset_properties)])
-
-        # Add the classification component
-        steps.append(['classifier',
-                      classification_components.ClassifierChoice(
-                          default_dataset_properties)])
+        steps.extend([
+            ["data_preprocessing",
+                DataPreprocessor(dataset_properties=default_dataset_properties)],
+            ["balancing",
+                Balancing()],
+            ["feature_preprocessor",
+                feature_preprocessing_components.FeaturePreprocessorChoice(
+                    default_dataset_properties)],
+            ['classifier',
+                classification_components.ClassifierChoice(
+                    default_dataset_properties)]
+        ])
+
         return steps
 
     def _get_estimator_hyperparameter_name(self):
         return "classifier"
-
diff --git a/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py b/autosklearn/pipeline/components/data_preprocessing/balancing/balancing.py
@@ -55,7 +55,7 @@ def get_weights(self, Y, classifier, preprocessor, init_params, fit_params):
             if classifier in clf_:
                 fit_params['classifier:sample_weight'] = sample_weights
             if preprocessor in pre_:
-                fit_params['preprocessor:sample_weight'] = sample_weights
+                fit_params['feature_preprocessor:sample_weight'] = sample_weights
 
         # Classifiers which can adjust sample weights themselves via the
         # argument `class_weight`
@@ -66,7 +66,7 @@ def get_weights(self, Y, classifier, preprocessor, init_params, fit_params):
         if classifier in clf_:
             init_params['classifier:class_weight'] = 'balanced'
         if preprocessor in pre_:
-            init_params['preprocessor:class_weight'] = 'balanced'
+            init_params['feature_preprocessor:class_weight'] = 'balanced'
 
         clf_ = ['ridge']
         if classifier in clf_:

diff --git a/...reprocessing/one_hot_encoding/__init__.py → ...ocessing/categorical_encoding/__init__.py b/...reprocessing/one_hot_encoding/__init__.py → ...ocessing/categorical_encoding/__init__.py
@@ -94,4 +94,4 @@ def set_hyperparameters(self, configuration, init_params=None):
         return self
 
     def transform(self, X):
-        return self.choice.transform(X)
+        return self.choice.transform(X)
diff --git a/...rocessing/one_hot_encoding/no_encoding.py → ...ssing/categorical_encoding/no_encoding.py b/...rocessing/one_hot_encoding/no_encoding.py → ...ssing/categorical_encoding/no_encoding.py
@@ -1,15 +1,7 @@
-import numpy as np
-
-import autosklearn.pipeline.implementations.OneHotEncoder
-
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
-    UniformFloatHyperparameter
-from ConfigSpace.conditions import EqualsCondition
-
 from autosklearn.pipeline.components.base import \
     AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import *
+from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
 
 
 class NoEncoding(AutoSklearnPreprocessingAlgorithm):

diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/one_hot_encoding.py
@@ -0,0 +1,49 @@
+import scipy.sparse
+
+from sklearn.preprocessing import OneHotEncoder as DenseOneHotEncoder
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+from autosklearn.pipeline.implementations.SparseOneHotEncoder import SparseOneHotEncoder
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+
+
+class OneHotEncoder(AutoSklearnPreprocessingAlgorithm):
+    def __init__(self, random_state=None):
+        self.random_state = random_state
+
+    def fit(self, X, y=None):
+        if scipy.sparse.issparse(X):
+            self.preprocessor = SparseOneHotEncoder()
+        else:
+            self.preprocessor = DenseOneHotEncoder(
+                sparse=False, categories='auto', handle_unknown='ignore')
+        self.preprocessor.fit(X, y)
+        return self
+
+    def transform(self, X):
+        if self.preprocessor is None:
+            raise NotImplementedError()
+        return self.preprocessor.transform(X)
+
+    def fit_transform(self, X, y=None):
+        return self.fit(X, y).transform(X)
+
+    @staticmethod
+    def get_properties(dataset_properties=None):
+        return {'shortname': '1Hot',
+                'name': 'One Hot Encoder',
+                'handles_regression': True,
+                'handles_classification': True,
+                'handles_multiclass': True,
+                'handles_multilabel': True,
+                # TODO find out of this is right!
+                'handles_sparse': True,
+                'handles_dense': True,
+                'input': (DENSE, SPARSE, UNSIGNED_DATA),
+                'output': (INPUT,), }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties=None):
+        return ConfigurationSpace()
diff --git a/autosklearn/pipeline/components/data_preprocessing/category_shift/__init__.py b/autosklearn/pipeline/components/data_preprocessing/category_shift/__init__.py
diff --git a/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py b/autosklearn/pipeline/components/data_preprocessing/category_shift/category_shift.py
@@ -0,0 +1,55 @@
+import autosklearn.pipeline.implementations.CategoryShift
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+
+
+class CategoryShift(AutoSklearnPreprocessingAlgorithm):
+    """ Add 3 to every category.
+    Down in the pipeline, category 2 will be attribute to missing values,
+    category 1 will be assigned to low occurence categories, and category 0
+    is not used, so to provide compatibility with sparse matrices.
+    """
+
+    def __init__(self, random_state=None):
+        pass
+
+    def fit(self, X, y=None):
+        self.preprocessor = autosklearn.pipeline.implementations.CategoryShift\
+            .CategoryShift()
+        self.preprocessor.fit(X, y)
+        return self
+
+    def transform(self, X):
+        if self.preprocessor is None:
+            raise NotImplementedError()
+        return self.preprocessor.transform(X)
+
+    def fit_transform(self, X, y=None):
+        return self.fit(X, y).transform(X)
+
+    @staticmethod
+    def get_properties(dataset_properties=None):
+        return {'shortname': 'CategShift',
+                'name': 'Category Shift',
+                'handles_missing_values': True,
+                'handles_nominal_values': True,
+                'handles_numerical_features': True,
+                'prefers_data_scaled': False,
+                'prefers_data_normalized': False,
+                'handles_regression': True,
+                'handles_classification': True,
+                'handles_multiclass': True,
+                'handles_multilabel': True,
+                'is_deterministic': True,
+                # TODO find out of this is right!
+                'handles_sparse': True,
+                'handles_dense': True,
+                'input': (DENSE, SPARSE, UNSIGNED_DATA),
+                'output': (INPUT,),
+                'preferred_dtype': None}
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties=None):
+        return ConfigurationSpace()
-Original file line number
+Diff line change
@@ Expand Up / @@ -53,3 +53,4 @@ number_submission @@
     .pypirc
     dmypy.json
     *.log
+    .noseids