Skip to content

Commit a5a2ae4

Browse files
Merge 3fffcd3 into a275763
2 parents a275763 + 3fffcd3 commit a5a2ae4

File tree

8 files changed

+329
-212
lines changed

8 files changed

+329
-212
lines changed

autosklearn/ensemble_builder.py

Lines changed: 113 additions & 107 deletions
Large diffs are not rendered by default.

autosklearn/ensembles/ensemble_selection.py

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from autosklearn.constants import TASK_TYPES
88
from autosklearn.ensembles.abstract_ensemble import AbstractEnsemble
9-
from autosklearn.metrics import Scorer, calculate_score
9+
from autosklearn.metrics import Scorer, calculate_loss
1010
from autosklearn.pipeline.base import BasePipeline
1111

1212

@@ -100,7 +100,7 @@ def _fast(
100100
dtype=np.float64,
101101
)
102102
for i in range(ensemble_size):
103-
scores = np.zeros(
103+
losses = np.zeros(
104104
(len(predictions)),
105105
dtype=np.float64,
106106
)
@@ -129,24 +129,23 @@ def _fast(
129129
out=fant_ensemble_prediction
130130
)
131131

132-
# Calculate score is versatile and can return a dict of score
132+
# calculate_loss is versatile and can return a dict of losses
133133
# when scoring_functions=None, we know it will be a float
134-
calculated_score = cast(
134+
losses[j] = cast(
135135
float,
136-
calculate_score(
136+
calculate_loss(
137137
solution=labels,
138138
prediction=fant_ensemble_prediction,
139139
task_type=self.task_type,
140140
metric=self.metric,
141141
scoring_functions=None
142142
)
143143
)
144-
scores[j] = self.metric._optimum - calculated_score
145144

146-
all_best = np.argwhere(scores == np.nanmin(scores)).flatten()
145+
all_best = np.argwhere(losses == np.nanmin(losses)).flatten()
147146
best = self.random_state.choice(all_best)
148147
ensemble.append(predictions[best])
149-
trajectory.append(scores[best])
148+
trajectory.append(losses[best])
150149
order.append(best)
151150

152151
# Handle special case
@@ -155,7 +154,7 @@ def _fast(
155154

156155
self.indices_ = order
157156
self.trajectory_ = trajectory
158-
self.train_score_ = trajectory[-1]
157+
self.train_loss_ = trajectory[-1]
159158

160159
def _slow(
161160
self,
@@ -172,30 +171,29 @@ def _slow(
172171
ensemble_size = self.ensemble_size
173172

174173
for i in range(ensemble_size):
175-
scores = np.zeros(
174+
losses = np.zeros(
176175
[np.shape(predictions)[0]],
177176
dtype=np.float64,
178177
)
179178
for j, pred in enumerate(predictions):
180179
ensemble.append(pred)
181180
ensemble_prediction = np.mean(np.array(ensemble), axis=0)
182-
# Calculate score is versatile and can return a dict of score
181+
# calculate_loss is versatile and can return a dict of losses
183182
# when scoring_functions=None, we know it will be a float
184-
calculated_score = cast(
183+
losses[j] = cast(
185184
float,
186-
calculate_score(
185+
calculate_loss(
187186
solution=labels,
188187
prediction=ensemble_prediction,
189188
task_type=self.task_type,
190189
metric=self.metric,
191190
scoring_functions=None
192191
)
193192
)
194-
scores[j] = self.metric._optimum - calculated_score
195193
ensemble.pop()
196-
best = np.nanargmin(scores)
194+
best = np.nanargmin(losses)
197195
ensemble.append(predictions[best])
198-
trajectory.append(scores[best])
196+
trajectory.append(losses[best])
199197
order.append(best)
200198

201199
# Handle special case
@@ -210,7 +208,7 @@ def _slow(
210208
trajectory,
211209
dtype=np.float64,
212210
)
213-
self.train_score_ = trajectory[-1]
211+
self.train_loss_ = trajectory[-1]
214212

215213
def _calculate_weights(self) -> None:
216214
ensemble_members = Counter(self.indices_).most_common()

autosklearn/evaluation/abstract_evaluator.py

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from autosklearn.pipeline.implementations.util import (
2020
convert_multioutput_multiclass_to_multilabel
2121
)
22-
from autosklearn.metrics import calculate_score
22+
from autosklearn.metrics import calculate_loss, Scorer
2323
from autosklearn.util.logging_ import get_named_client_logger
2424

2525
from ConfigSpace import Configuration
@@ -236,17 +236,18 @@ def _get_model(self):
236236
init_params=self._init_params)
237237
return model
238238

239-
def _loss(self, y_true, y_hat, scoring_functions=None):
240-
"""Auto-sklearn follows a minimization goal, so the make_scorer
241-
sign is used as a guide to obtain the value to reduce.
242-
243-
On this regard, to optimize a metric:
244-
1- score is calculared with calculate_score, with the caveat, that if
245-
for the metric greater is not better, a negative score is returned.
246-
2- the err (the optimization goal) is then:
247-
optimum - (metric.sign * actual_score)
248-
For accuracy for example: optimum(1) - (+1 * actual score)
249-
For logloss for example: optimum(0) - (-1 * actual score)
239+
def _loss(self, y_true: np.ndarray, y_hat: np.ndarray,
240+
scoring_functions: typing.Optional[typing.List[Scorer]] = None
241+
) -> typing.Union[float, typing.Dict[str, float]]:
242+
"""Auto-sklearn follows a minimization goal.
243+
The calculate_loss internally translate a score function to
244+
a minimization problem.
245+
246+
For a dummy prediction, the worst result is assumed.
247+
248+
Parameters
249+
----------
250+
y_true
250251
"""
251252
scoring_functions = (
252253
self.scoring_functions
@@ -255,23 +256,14 @@ def _loss(self, y_true, y_hat, scoring_functions=None):
255256
)
256257
if not isinstance(self.configuration, Configuration):
257258
if scoring_functions:
258-
return {self.metric.name: 1.0}
259+
return {self.metric.name: self.metric._worst_possible_result}
259260
else:
260-
return 1.0
261+
return self.metric._worst_possible_result
261262

262-
score = calculate_score(
263+
return calculate_loss(
263264
y_true, y_hat, self.task_type, self.metric,
264265
scoring_functions=scoring_functions)
265266

266-
if hasattr(score, '__len__'):
267-
err = {metric.name: metric._optimum - score[metric.name]
268-
for metric in scoring_functions}
269-
err[self.metric.name] = self.metric._optimum - score[self.metric.name]
270-
else:
271-
err = self.metric._optimum - score
272-
273-
return err
274-
275267
def finish_up(self, loss, train_loss, opt_pred, valid_pred, test_pred,
276268
additional_run_info, file_output, final_call, status):
277269
"""This function does everything necessary after the fitting is done:

autosklearn/evaluation/test_evaluator.py

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
AbstractEvaluator,
66
_fit_and_suppress_warnings,
77
)
8-
from autosklearn.metrics import calculate_score, CLASSIFICATION_METRICS, REGRESSION_METRICS
9-
from autosklearn.constants import CLASSIFICATION_TASKS
8+
from autosklearn.metrics import calculate_loss
109

1110

1211
__all__ = [
@@ -71,7 +70,7 @@ def predict_and_loss(self, train=False):
7170
if train:
7271
Y_pred = self.predict_function(self.X_train, self.model,
7372
self.task_type, self.Y_train)
74-
score = calculate_score(
73+
err = calculate_loss(
7574
solution=self.Y_train,
7675
prediction=Y_pred,
7776
task_type=self.task_type,
@@ -80,23 +79,13 @@ def predict_and_loss(self, train=False):
8079
else:
8180
Y_pred = self.predict_function(self.X_test, self.model,
8281
self.task_type, self.Y_train)
83-
score = calculate_score(
82+
err = calculate_loss(
8483
solution=self.Y_test,
8584
prediction=Y_pred,
8685
task_type=self.task_type,
8786
metric=self.metric,
8887
scoring_functions=self.scoring_functions)
8988

90-
if hasattr(score, '__len__'):
91-
if self.task_type in CLASSIFICATION_TASKS:
92-
err = {key: metric._optimum - score[key] for key, metric in
93-
CLASSIFICATION_METRICS.items() if key in score}
94-
else:
95-
err = {key: metric._optimum - score[key] for key, metric in
96-
REGRESSION_METRICS.items() if key in score}
97-
else:
98-
err = self.metric._optimum - score
99-
10089
return err, Y_pred, None, None
10190

10291

autosklearn/metrics/__init__.py

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from abc import ABCMeta, abstractmethod
22
from functools import partial
3-
from typing import Any, Callable, Dict, List, Optional, Union
3+
from typing import Any, Callable, Dict, List, Optional, Union, cast
44

55
import numpy as np
66

@@ -353,7 +353,7 @@ def calculate_score(
353353
for metric_ in scoring_functions:
354354

355355
try:
356-
score_dict[metric_.name] = metric_(solution, cprediction)
356+
score_dict[metric_.name] = metric_._sign * metric_(solution, cprediction)
357357
except ValueError as e:
358358
print(e, e.args[0])
359359
if e.args[0] == "Mean Squared Logarithmic Error cannot be used when " \
@@ -369,7 +369,7 @@ def calculate_score(
369369
# handle?
370370

371371
try:
372-
score_dict[metric_.name] = metric_(solution, prediction)
372+
score_dict[metric_.name] = metric_._sign * metric_(solution, prediction)
373373
except ValueError as e:
374374
if e.args[0] == 'multiclass format is not supported':
375375
continue
@@ -397,10 +397,73 @@ def get_metric_score(
397397
solution: np.ndarray,
398398
task_type: int
399399
) -> float:
400+
# We match the behaviour of GridSearchCV
401+
# In scikit learn, the exact value of the score_func
402+
# is returned (not that of the 'Scorer' which might be
403+
# negative in functions like mse, as scikit learn
404+
# maximizes.) If an user wants to use GridSearchCV
405+
# They are expected to pass neg_mean_squared_error
406+
# For this reason we multiply back by metric_._sign
400407
if task_type in REGRESSION_TASKS:
401408
# TODO put this into the regression metric itself
402409
cprediction = sanitize_array(prediction)
403-
score = metric_(solution, cprediction)
410+
score = metric_._sign * metric_(solution, cprediction)
404411
else:
405-
score = metric_(solution, prediction)
412+
score = metric_._sign * metric_(solution, prediction)
406413
return score
414+
415+
416+
def calculate_loss(
417+
solution: np.ndarray,
418+
prediction: np.ndarray,
419+
task_type: int,
420+
metric: Scorer,
421+
scoring_functions: Optional[List[Scorer]] = None
422+
) -> Union[float, Dict[str, float]]:
423+
"""
424+
Returns a loss (a magnitude that allows casting the
425+
optimization problem, as a minimization one) for the
426+
given Auto-Sklearn Scorer object
427+
Parameters
428+
----------
429+
solution: np.ndarray
430+
The ground truth of the targets
431+
prediction: np.ndarray
432+
The best estimate from the model, of the given targets
433+
task_type: int
434+
To understand if the problem task is classification
435+
or regression
436+
metric: Scorer
437+
Object that host a function to calculate how good the
438+
prediction is according to the solution.
439+
scoring_functions: List[Scorer]
440+
A list of metrics to calculate multiple losses
441+
Returns
442+
-------
443+
float or Dict[str, float]
444+
A loss function for each of the provided scorer objects
445+
"""
446+
score = calculate_score(
447+
solution=solution,
448+
prediction=prediction,
449+
task_type=task_type,
450+
metric=metric,
451+
scoring_functions=scoring_functions,
452+
)
453+
454+
if scoring_functions:
455+
score = cast(Dict, score)
456+
# we expect a dict() object for which we should calculate the loss
457+
loss_dict = dict()
458+
for metric_ in scoring_functions + [metric]:
459+
# TODO: When metrics are annotated with type_of_target support
460+
# we can remove this check
461+
if metric_.name not in score:
462+
continue
463+
# maybe metric argument is not in scoring_functions
464+
# so append it to the list. Rather than check if such
465+
# is the case, redefining loss_dict[metric] is less expensive
466+
loss_dict[metric_.name] = metric_._optimum - metric_._sign * score[metric_.name]
467+
return loss_dict
468+
else:
469+
return metric._optimum - metric._sign * cast(float, score)

test/test_automl/test_automl.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ def test_automl_outputs(backend, dask_client):
308308
'start_time_100',
309309
'datamanager.pkl',
310310
'ensemble_read_preds.pkl',
311-
'ensemble_read_scores.pkl',
311+
'ensemble_read_losses.pkl',
312312
'runs',
313313
'ensembles',
314314
'ensemble_history.json',
@@ -625,7 +625,8 @@ def test_load_best_individual_model(metric, backend, dask_client):
625625
if metric.name == 'balanced_accuracy':
626626
assert automl.score(X_test, Y_test) > 0.9
627627
elif metric.name == 'log_loss':
628-
assert automl.score(X_test, Y_test) <= 0.2
628+
# Seen values in github actions of 0.6978304740364537
629+
assert automl.score(X_test, Y_test) <= 0.72
629630
else:
630631
raise ValueError(metric.name)
631632

0 commit comments

Comments
 (0)