-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Closed
Labels
Description
As noted in openml/automlbenchmark#370, autosklearn fails to process sparse labels for regression. This extends further and also comes up in classification.
To reproduce:
import traceback
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from autosklearn.regression import AutoSklearnRegressor
from autosklearn.classification import AutoSklearnClassifier
# Create np arrays
X = np.random.random(size=(1000, 5))
y = np.random.random(size=(1000, 1))
X[X < .9] = 0
y[y < .9] = 0
# Convert to scipy sparse matrices
X_sparse = csr_matrix(X)
y_sparse = csr_matrix(y)
model = AutoSklearnRegressor(time_left_for_this_task=30)
try:
model.fit(X_sparse, y_sparse)
except Exception:
print(f'\nRegression\n{type(X)=}\n{type(y)=}\n')
traceback.print_exc()
# Turn it into binary classification
y = np.random.random(size=(1000, 1))
y[y >= .9] = 1
y[y < .9] = 0
y_sparse = csr_matrix(y)
model = AutoSklearnClassifier(time_left_for_this_task=30)
try:
model.fit(X_sparse, y_sparse)
except Exception as e:
print(f'\nBinary Classification\n{type(X)=}\n{type(y)=}\n')
traceback.print_exc()
# Test regression with pandas sparse structures
y = np.random.random(size=(1000))
y[y < 0.9] = 0
y_sparse = pd.Series(y, dtype="Sparse[float]")
model = AutoSklearnRegressor(time_left_for_this_task=30)
try:
model.fit(X_sparse, y_sparse)
except Exception as e:
print(f'\nRegression\n{type(X)=}\n{type(y)=}\n')
traceback.print_exc()
# Turn it into multi label classification
y = np.random.random(size=(1000, 1))
y[y >= .9] = 1
y[y < .9] = 0
y[0:10] = 2
y_sparse = csr_matrix(y)
model = AutoSklearnClassifier(time_left_for_this_task=30)
try:
model.fit(X_sparse, y_sparse)
except Exception as e:
print(f'\n Multilabel Classification\n{type(X)=}\n{type(y)=}\n')
traceback.print_exc()
# Turn it into multi class classification
y = np.random.random(size=(1000, 2))
y[y >= .9] = 1
y[y < .9] = 0
y_sparse = csr_matrix(y)
model = AutoSklearnClassifier(time_left_for_this_task=30)
try:
model.fit(X_sparse, y_sparse)
except Exception as e:
print(f'\n Multiclass Classification\n{type(X)=}\n{type(y)=}\n')
traceback.print_exc()
Regression
type(X)=<class 'numpy.ndarray'>
type(y)=<class 'numpy.ndarray'>
Traceback (most recent call last):
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/test_case2.py", line 21, in <module>
model.fit(X_sparse, y_sparse)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/estimators.py", line 1068, in fit
target_type = type_of_target(y)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/.venv/lib/python3.9/site-packages/sklearn/utils/multiclass.py", line 288, in type_of_target
if y.ndim > 2 or (y.dtype == object and len(y) and
TypeError: len() of unsized object
Binary Classification
type(X)=<class 'numpy.ndarray'>
type(y)=<class 'numpy.ndarray'>
Traceback (most recent call last):
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/test_case2.py", line 35, in <module>
model.fit(X_sparse, y_sparse)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/estimators.py", line 924, in fit
target_type = type_of_target(y)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/.venv/lib/python3.9/site-packages/sklearn/utils/multiclass.py", line 288, in type_of_target
if y.ndim > 2 or (y.dtype == object and len(y) and
TypeError: len() of unsized object
Multilabel Classification
type(X)=<class 'numpy.ndarray'>
type(y)=<class 'numpy.ndarray'>
Traceback (most recent call last):
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/test_case2.py", line 50, in <module>
model.fit(X_sparse, y_sparse)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/estimators.py", line 924, in fit
target_type = type_of_target(y)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/.venv/lib/python3.9/site-packages/sklearn/utils/multiclass.py", line 288, in type_of_target
if y.ndim > 2 or (y.dtype == object and len(y) and
TypeError: len() of unsized object
Multiclass Classification
type(X)=<class 'numpy.ndarray'>
type(y)=<class 'numpy.ndarray'>
[ERROR] [2021-08-06 13:37:44,690:Client-AutoML(1):b73360ac-f6aa-11eb-af00-44850026e7d8] Dummy prediction failed with run state StatusType.CRASHED and additional output: {'traceback': 'TypeError: float() argument must be a string or a number, not \'csr_matrix\'\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/evaluation/__init__.py", line 40, in fit_predict_try_except_decorator\n return ta(queue=queue, **kwargs)\n File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/evaluation/train_evaluator.py", line 1166, in eval_holdout\n evaluator.fit_predict_and_loss(iterative=iterative)\n File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/evaluation/train_evaluator.py", line 503, in fit_predict_and_loss\n self._partial_fit_and_predict_standard(\n File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/evaluation/train_evaluator.py", line 881, in _partial_fit_and_predict_standard\n self.Y_train_targets[train_indices] = self.Y_train.iloc[train_indices] if hasattr(\nValueError: setting an array element with a sequence.\n', 'error': "ValueError('setting an array element with a sequence.')", 'configuration_origin': 'DUMMY'}.
Traceback (most recent call last):
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/test_case2.py", line 64, in <module>
model.fit(X_sparse, y_sparse)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/estimators.py", line 941, in fit
super().fit(
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/estimators.py", line 340, in fit
self.automl_.fit(load_models=self.load_models, **kwargs)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/automl.py", line 1655, in fit
return super().fit(
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/automl.py", line 642, in fit
self.num_run += self._do_dummy_prediction(datamanager, num_run=1)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/automl.py", line 422, in _do_dummy_prediction
raise ValueError(
ValueError: Dummy prediction failed with run state StatusType.CRASHED and additional output: {'traceback': 'TypeError: float() argument must be a string or a number, not \'csr_matrix\'\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/evaluation/__init__.py", line 40, in fit_predict_try_except_decorator\n return ta(queue=queue, **kwargs)\n File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/evaluation/train_evaluator.py", line 1166, in eval_holdout\n evaluator.fit_predict_and_loss(iterative=iterative)\n File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/evaluation/train_evaluator.py", line 503, in fit_predict_and_loss\n self._partial_fit_and_predict_standard(\n File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/evaluation/train_evaluator.py", line 881, in _partial_fit_and_predict_standard\n self.Y_train_targets[train_indices] = self.Y_train.iloc[train_indices] if hasattr(\nValueError: setting an array element with a sequence.\n', 'error': "ValueError('setting an array element with a sequence.')", 'configuration_origin': 'DUMMY'}.
^CProcess ForkProcess-3:
Traceback (most recent call last):
File "/home/skantify/.pyenv/versions/3.9.5/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/home/skantify/.pyenv/versions/3.9.5/lib/python3.9/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/util/logging_.py", line 318, in start_log_server
receiver.serve_until_stopped()
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/util/logging_.py", line 348, in serve_until_stopped
rd, wr, ex = select.select([self.socket.fileno()],
KeyboardInterrupt
^CException ignored in: <function AutoML.__del__ at 0x7fbfad362ca0>
Traceback (most recent call last):
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/automl.py", line 1622, in __del__
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/util/backend.py", line 112, in delete_directories
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/util/backend.py", line 87, in temporary_directory
AttributeError: 'NoneType' object has no attribute 'path'
(.venv) ➜ auto-sklearn git:(sparse_y_fix) ✗
(.venv) ➜ auto-sklearn git:(sparse_y_fix) ✗ python test_case2.py
Regression
type(X)=<class 'numpy.ndarray'>
type(y)=<class 'numpy.ndarray'>
Traceback (most recent call last):
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/test_case2.py", line 21, in <module>
model.fit(X_sparse, y_sparse)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/estimators.py", line 1068, in fit
target_type = type_of_target(y)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/.venv/lib/python3.9/site-packages/sklearn/utils/multiclass.py", line 288, in type_of_target
if y.ndim > 2 or (y.dtype == object and len(y) and
TypeError: len() of unsized object
Binary Classification
type(X)=<class 'numpy.ndarray'>
type(y)=<class 'numpy.ndarray'>
Traceback (most recent call last):
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/test_case2.py", line 35, in <module>
model.fit(X_sparse, y_sparse)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/estimators.py", line 924, in fit
target_type = type_of_target(y)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/.venv/lib/python3.9/site-packages/sklearn/utils/multiclass.py", line 288, in type_of_target
if y.ndim > 2 or (y.dtype == object and len(y) and
TypeError: len() of unsized object
Multilabel Classification
type(X)=<class 'numpy.ndarray'>
type(y)=<class 'numpy.ndarray'>
Traceback (most recent call last):
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/test_case2.py", line 62, in <module>
model.fit(X_sparse, y_sparse)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/estimators.py", line 924, in fit
target_type = type_of_target(y)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/.venv/lib/python3.9/site-packages/sklearn/utils/multiclass.py", line 288, in type_of_target
if y.ndim > 2 or (y.dtype == object and len(y) and
TypeError: len() of unsized object
Multiclass Classification
type(X)=<class 'numpy.ndarray'>
type(y)=<class 'numpy.ndarray'>
[ERROR] [2021-08-06 13:39:49,383:Client-AutoML(1):01812118-f6ab-11eb-b07c-44850026e7d8] Dummy prediction failed with run state StatusType.CRASHED and additional output: {'traceback': 'TypeError: float() argument must be a string or a number, not \'csr_matrix\'\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/evaluation/__init__.py", line 40, in fit_predict_try_except_decorator\n return ta(queue=queue, **kwargs)\n File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/evaluation/train_evaluator.py", line 1166, in eval_holdout\n evaluator.fit_predict_and_loss(iterative=iterative)\n File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/evaluation/train_evaluator.py", line 503, in fit_predict_and_loss\n self._partial_fit_and_predict_standard(\n File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/evaluation/train_evaluator.py", line 881, in _partial_fit_and_predict_standard\n self.Y_train_targets[train_indices] = self.Y_train.iloc[train_indices] if hasattr(\nValueError: setting an array element with a sequence.\n', 'error': "ValueError('setting an array element with a sequence.')", 'configuration_origin': 'DUMMY'}.
Traceback (most recent call last):
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/test_case2.py", line 76, in <module>
model.fit(X_sparse, y_sparse)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/estimators.py", line 941, in fit
super().fit(
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/estimators.py", line 340, in fit
self.automl_.fit(load_models=self.load_models, **kwargs)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/automl.py", line 1655, in fit
return super().fit(
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/automl.py", line 642, in fit
self.num_run += self._do_dummy_prediction(datamanager, num_run=1)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/automl.py", line 422, in _do_dummy_prediction
raise ValueError(
ValueError: Dummy prediction failed with run state StatusType.CRASHED and additional output: {'traceback': 'TypeError: float() argument must be a string or a number, not \'csr_matrix\'\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/evaluation/__init__.py", line 40, in fit_predict_try_except_decorator\n return ta(queue=queue, **kwargs)\n File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/evaluation/train_evaluator.py", line 1166, in eval_holdout\n evaluator.fit_predict_and_loss(iterative=iterative)\n File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/evaluation/train_evaluator.py", line 503, in fit_predict_and_loss\n self._partial_fit_and_predict_standard(\n File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/evaluation/train_evaluator.py", line 881, in _partial_fit_and_predict_standard\n self.Y_train_targets[train_indices] = self.Y_train.iloc[train_indices] if hasattr(\nValueError: setting an array element with a sequence.\n', 'error': "ValueError('setting an array element with a sequence.')", 'configuration_origin': 'DUMMY'}.
^CError in atexit._run_exitfuncs:
Traceback (most recent call last):
File "/home/skantify/.pyenv/versions/3.9.5/lib/python3.9/multiprocessing/popen_fork.py", line 27, in poll
Process ForkProcess-59:
pid, sts = os.waitpid(self.pid, flag)
KeyboardInterrupt
Traceback (most recent call last):
File "/home/skantify/.pyenv/versions/3.9.5/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/home/skantify/.pyenv/versions/3.9.5/lib/python3.9/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/util/logging_.py", line 318, in start_log_server
receiver.serve_until_stopped()
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/util/logging_.py", line 348, in serve_until_stopped
rd, wr, ex = select.select([self.socket.fileno()],
KeyboardInterrupt
Exception ignored in: <function AutoML.__del__ at 0x7f4368201ca0>
Traceback (most recent call last):
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/automl.py", line 1615, in __del__
File "/home/skantify/code/asklearn/automlbenchmark_issue_370/auto-sklearn/autosklearn/automl.py", line 329, in _clean_logger
File "/home/skantify/.pyenv/versions/3.9.5/lib/python3.9/multiprocessing/process.py", line 165, in is_alive
File "/home/skantify/.pyenv/versions/3.9.5/lib/python3.9/multiprocessing/popen_fork.py", line 27, in poll
AttributeError: 'NoneType' object has no attribute 'waitpid'