Skip to content

Commit b7abdb8

Browse files
franchuteriveracharlesfu4
authored andcommitted
771 worst possible result (automl#845)
* Initial Commit * Make worst result a function * worst possible result in metric * Fixing the name of the scorers
1 parent 067b382 commit b7abdb8

File tree

8 files changed

+95
-41
lines changed

8 files changed

+95
-41
lines changed

autosklearn/automl.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ def __init__(self,
9696
get_smac_object_callback=None,
9797
smac_scenario_args=None,
9898
logging_config=None,
99+
metric=None,
99100
):
100101
super(AutoML, self).__init__()
101102
self._backend = backend
@@ -133,7 +134,7 @@ def __init__(self,
133134
self._stopwatch = StopWatch()
134135
self._logger = None
135136
self._task = None
136-
self._metric = None
137+
self._metric = metric
137138
self._label_num = None
138139
self._parser = None
139140
self.models_ = None

autosklearn/evaluation/__init__.py

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,18 @@
1212
from smac.tae.execute_ta_run import StatusType, BudgetExhaustedException, \
1313
TAEAbortException
1414
from smac.tae.execute_func import AbstractTAFunc
15+
1516
from ConfigSpace import Configuration
1617
from sklearn.model_selection._split import _RepeatedSplits, BaseShuffleSplit,\
1718
BaseCrossValidator
19+
from autosklearn.metrics import Scorer
1820

1921
import autosklearn.evaluation.train_evaluator
2022
import autosklearn.evaluation.test_evaluator
2123
import autosklearn.evaluation.util
2224

23-
WORST_POSSIBLE_RESULT = 1.0
24-
2525

26-
def fit_predict_try_except_decorator(ta, queue, **kwargs):
26+
def fit_predict_try_except_decorator(ta, queue, cost_for_crash, **kwargs):
2727

2828
try:
2929
return ta(queue=queue, **kwargs)
@@ -35,13 +35,32 @@ def fit_predict_try_except_decorator(ta, queue, **kwargs):
3535
exception_traceback = traceback.format_exc()
3636
error_message = repr(e)
3737

38-
queue.put({'loss': WORST_POSSIBLE_RESULT,
38+
queue.put({'loss': cost_for_crash,
3939
'additional_run_info': {'traceback': exception_traceback,
4040
'error': error_message},
4141
'status': StatusType.CRASHED,
4242
'final_queue_element': True})
4343

4444

45+
def get_cost_of_crash(metric):
46+
47+
# The metric must always be defined to extract optimum/worst
48+
if not isinstance(metric, Scorer):
49+
raise ValueError("The metric must be stricly be an instance of Scorer")
50+
51+
# Autosklearn optimizes the err. This function translates
52+
# worst_possible_result to be a minimization problem.
53+
# For metrics like accuracy that are bounded to [0,1]
54+
# metric.optimum==1 is the worst cost.
55+
# A simple guide is to use greater_is_better embedded as sign
56+
if metric._sign < 0:
57+
worst_possible_result = metric._worst_possible_result
58+
else:
59+
worst_possible_result = metric._optimum - metric._worst_possible_result
60+
61+
return worst_possible_result
62+
63+
4564
# TODO potentially log all inputs to this class to pickle them in order to do
4665
# easier debugging of potential crashes
4766
class ExecuteTaFuncWithQueue(AbstractTAFunc):
@@ -78,15 +97,21 @@ def __init__(self, backend, autosklearn_seed, resampling_strategy, metric,
7897
raise ValueError('Unknown resampling strategy %s' %
7998
resampling_strategy)
8099

81-
eval_function = functools.partial(fit_predict_try_except_decorator,
82-
ta=eval_function)
100+
self.worst_possible_result = get_cost_of_crash(metric)
101+
102+
eval_function = functools.partial(
103+
fit_predict_try_except_decorator,
104+
ta=eval_function,
105+
cost_for_crash=self.worst_possible_result,
106+
)
107+
83108
super().__init__(
84109
ta=eval_function,
85110
stats=stats,
86111
runhistory=runhistory,
87112
run_obj=run_obj,
88113
par_factor=par_factor,
89-
cost_for_crash=WORST_POSSIBLE_RESULT,
114+
cost_for_crash=self.worst_possible_result,
90115
)
91116

92117
self.backend = backend
@@ -250,7 +275,7 @@ def run(self, config, instance=None,
250275
if status in [StatusType.SUCCESS, StatusType.DONOTADVANCE]:
251276
cost = result
252277
else:
253-
cost = WORST_POSSIBLE_RESULT
278+
cost = self.worst_possible_result
254279

255280
except Empty:
256281
info = None
@@ -265,12 +290,12 @@ def run(self, config, instance=None,
265290
}
266291
else:
267292
raise ValueError(obj.exit_status)
268-
cost = WORST_POSSIBLE_RESULT
293+
cost = self.worst_possible_result
269294

270295
elif obj.exit_status is TAEAbortException:
271296
info = None
272297
status = StatusType.ABORT
273-
cost = WORST_POSSIBLE_RESULT
298+
cost = self.worst_possible_result
274299
additional_run_info = {'error': 'Your configuration of '
275300
'auto-sklearn does not work!'}
276301

@@ -285,7 +310,7 @@ def run(self, config, instance=None,
285310
cost = result
286311
else:
287312
status = StatusType.CRASHED
288-
cost = WORST_POSSIBLE_RESULT
313+
cost = self.worst_possible_result
289314
additional_run_info['info'] = 'Run treated as crashed ' \
290315
'because the pynisher exit ' \
291316
'status %s is unknown.' % \
@@ -294,7 +319,7 @@ def run(self, config, instance=None,
294319
info = None
295320
additional_run_info = {'error': 'Result queue is empty'}
296321
status = StatusType.CRASHED
297-
cost = WORST_POSSIBLE_RESULT
322+
cost = self.worst_possible_result
298323

299324
if (
300325
(self.budget_type is None or budget == 0)

autosklearn/evaluation/abstract_evaluator.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,17 @@ def _get_model(self):
227227
return model
228228

229229
def _loss(self, y_true, y_hat, all_scoring_functions=None):
230+
"""Auto-sklearn follows a minimization goal, so the make_scorer
231+
sign is used as a guide to obtain the value to reduce.
232+
233+
On this regard, to optimize a metric:
234+
1- score is calculared with calculate_score, with the caveat, that if
235+
for the metric greater is not better, a negative score is returned.
236+
2- the err (the optimization goal) is then:
237+
optimum - (metric.sign * actual_score)
238+
For accuracy for example: optimum(1) - (+1 * actual score)
239+
For logloss for example: optimum(0) - (-1 * actual score)
240+
"""
230241
all_scoring_functions = (
231242
self.all_scoring_functions
232243
if all_scoring_functions is None

autosklearn/metrics/__init__.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,16 @@
99
from autosklearn.constants import REGRESSION_TASKS, TASK_TYPES
1010
from .util import sanitize_array
1111

12+
from smac.utils.constants import MAXINT
13+
1214

1315
class Scorer(object, metaclass=ABCMeta):
14-
def __init__(self, name, score_func, optimum, sign, kwargs):
16+
def __init__(self, name, score_func, optimum, worst_possible_result, sign, kwargs):
1517
self.name = name
1618
self._kwargs = kwargs
1719
self._score_func = score_func
1820
self._optimum = optimum
21+
self._worst_possible_result = worst_possible_result
1922
self._sign = sign
2023

2124
@abstractmethod
@@ -136,7 +139,7 @@ def __call__(self, y_true, y_pred, sample_weight=None):
136139
return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
137140

138141

139-
def make_scorer(name, score_func, optimum=1, greater_is_better=True,
142+
def make_scorer(name, score_func, optimum=1, worst_possible_result=0, greater_is_better=True,
140143
needs_proba=False, needs_threshold=False, **kwargs):
141144
"""Make a scorer from a performance metric or loss function.
142145
@@ -181,7 +184,7 @@ def make_scorer(name, score_func, optimum=1, greater_is_better=True,
181184
cls = _ThresholdScorer
182185
else:
183186
cls = _PredictScorer
184-
return cls(name, score_func, optimum, sign, kwargs)
187+
return cls(name, score_func, optimum, worst_possible_result, sign, kwargs)
185188

186189

187190
# Standard regression scores
@@ -190,14 +193,17 @@ def make_scorer(name, score_func, optimum=1, greater_is_better=True,
190193
mean_squared_error = make_scorer('mean_squared_error',
191194
sklearn.metrics.mean_squared_error,
192195
optimum=0,
196+
worst_possible_result=MAXINT,
193197
greater_is_better=False)
194198
mean_absolute_error = make_scorer('mean_absolute_error',
195199
sklearn.metrics.mean_absolute_error,
196200
optimum=0,
201+
worst_possible_result=MAXINT,
197202
greater_is_better=False)
198203
median_absolute_error = make_scorer('median_absolute_error',
199204
sklearn.metrics.median_absolute_error,
200205
optimum=0,
206+
worst_possible_result=MAXINT,
201207
greater_is_better=False)
202208

203209
# Standard Classification Scores
@@ -225,6 +231,7 @@ def make_scorer(name, score_func, optimum=1, greater_is_better=True,
225231
log_loss = make_scorer('log_loss',
226232
sklearn.metrics.log_loss,
227233
optimum=0,
234+
worst_possible_result=MAXINT,
228235
greater_is_better=False,
229236
needs_proba=True)
230237
# TODO what about mathews correlation coefficient etc?

autosklearn/smbo.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from autosklearn.metalearning.mismbo import suggest_via_metalearning
2222
from autosklearn.data.abstract_data_manager import AbstractDataManager
2323
from autosklearn.data.competition_data_manager import CompetitionDataManager
24-
from autosklearn.evaluation import ExecuteTaFuncWithQueue, WORST_POSSIBLE_RESULT
24+
from autosklearn.evaluation import ExecuteTaFuncWithQueue, get_cost_of_crash
2525
from autosklearn.util.logging_ import get_logger
2626
from autosklearn.metalearning.metalearning.meta_base import MetaBase
2727
from autosklearn.metalearning.metafeatures.metafeatures import \
@@ -237,6 +237,7 @@ def __init__(self, config_space, dataset_name,
237237
self.resampling_strategy_args = resampling_strategy_args
238238

239239
# and a bunch of useful limits
240+
self.worst_possible_result = get_cost_of_crash(self.metric)
240241
self.total_walltime_limit = int(total_walltime_limit)
241242
self.func_eval_time_limit = int(func_eval_time_limit)
242243
self.memory_limit = memory_limit
@@ -444,7 +445,7 @@ def run_smbo(self):
444445
'run_obj': 'quality',
445446
'shared-model': self.shared_mode,
446447
'wallclock_limit': total_walltime_limit,
447-
'cost_for_crash': WORST_POSSIBLE_RESULT,
448+
'cost_for_crash': self.worst_possible_result,
448449
}
449450
if self.smac_scenario_args is not None:
450451
for arg in [

test/test_automl/test_automl.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,9 @@ def test_do_dummy_prediction(self):
300300

301301
auto = autosklearn.automl.AutoML(
302302
backend_api, 20, 5,
303-
initial_configurations_via_metalearning=25)
303+
initial_configurations_via_metalearning=25,
304+
metric=accuracy,
305+
)
304306
setup_logger()
305307
auto._logger = get_logger('test_do_dummy_predictions')
306308
auto._backend._make_internals_directory()
@@ -332,6 +334,7 @@ def test_fail_if_dummy_prediction_fails(self, ta_run_mock):
332334
time_for_this_task,
333335
per_run_time,
334336
initial_configurations_via_metalearning=25,
337+
metric=accuracy,
335338
)
336339
setup_logger()
337340
auto._logger = get_logger('test_fail_if_dummy_prediction_fails')

test/test_evaluation/test_evaluation.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111
import pynisher
1212
from smac.tae.execute_ta_run import StatusType, BudgetExhaustedException
1313
from smac.stats.stats import Stats
14+
from smac.utils.constants import MAXINT
1415

1516
from autosklearn.evaluation import ExecuteTaFuncWithQueue
16-
from autosklearn.metrics import accuracy
17+
from autosklearn.metrics import accuracy, log_loss
1718

1819
this_directory = os.path.dirname(__file__)
1920
sys.path.append(this_directory)
@@ -151,10 +152,13 @@ def test_eval_with_limits_holdout_fail_memory_error(self, pynisher_mock):
151152
logger=self.logger,
152153
stats=self.stats,
153154
memory_limit=3072,
154-
metric=accuracy)
155+
metric=log_loss)
155156
info = ta.start(None, instance=None, cutoff=30)
156157
self.assertEqual(info[0], StatusType.MEMOUT)
157-
self.assertEqual(info[1], 1.0)
158+
159+
# For logloss, worst possible result is MAXINT
160+
worst_possible_result = MAXINT
161+
self.assertEqual(info[1], worst_possible_result)
158162
self.assertIsInstance(info[2], float)
159163

160164
@unittest.mock.patch('pynisher.enforce_limits')

0 commit comments

Comments
 (0)