771 worst possible result (automl#845)

franchuterivera · charlesfu4 · commit b7abdb8ed4fe · 2020-06-17T22:44:54.000+02:00
* Initial Commit

* Make worst result a function

* worst possible result in metric

* Fixing the name of the scorers
diff --git a/autosklearn/automl.py b/autosklearn/automl.py
@@ -96,6 +96,7 @@ def __init__(self,
                  get_smac_object_callback=None,
                  smac_scenario_args=None,
                  logging_config=None,
+                 metric=None,
                  ):
         super(AutoML, self).__init__()
         self._backend = backend
@@ -133,7 +134,7 @@ def __init__(self,
         self._stopwatch = StopWatch()
         self._logger = None
         self._task = None
-        self._metric = None
+        self._metric = metric
         self._label_num = None
         self._parser = None
         self.models_ = None
diff --git a/autosklearn/evaluation/__init__.py b/autosklearn/evaluation/__init__.py
@@ -12,18 +12,18 @@
 from smac.tae.execute_ta_run import StatusType, BudgetExhaustedException, \
     TAEAbortException
 from smac.tae.execute_func import AbstractTAFunc
+
 from ConfigSpace import Configuration
 from sklearn.model_selection._split import _RepeatedSplits, BaseShuffleSplit,\
     BaseCrossValidator
+from autosklearn.metrics import Scorer
 
 import autosklearn.evaluation.train_evaluator
 import autosklearn.evaluation.test_evaluator
 import autosklearn.evaluation.util
 
-WORST_POSSIBLE_RESULT = 1.0
-
 
-def fit_predict_try_except_decorator(ta, queue, **kwargs):
+def fit_predict_try_except_decorator(ta, queue, cost_for_crash, **kwargs):
 
     try:
         return ta(queue=queue, **kwargs)
@@ -35,13 +35,32 @@ def fit_predict_try_except_decorator(ta, queue, **kwargs):
         exception_traceback = traceback.format_exc()
         error_message = repr(e)
 
-        queue.put({'loss': WORST_POSSIBLE_RESULT,
+        queue.put({'loss': cost_for_crash,
                    'additional_run_info': {'traceback': exception_traceback,
                                            'error': error_message},
                    'status': StatusType.CRASHED,
                    'final_queue_element': True})
 
 
+def get_cost_of_crash(metric):
+
+    # The metric must always be defined to extract optimum/worst
+    if not isinstance(metric, Scorer):
+        raise ValueError("The metric must be stricly be an instance of Scorer")
+
+    # Autosklearn optimizes the err. This function translates
+    # worst_possible_result to be a minimization problem.
+    # For metrics like accuracy that are bounded to [0,1]
+    # metric.optimum==1 is the worst cost.
+    # A simple guide is to use greater_is_better embedded as sign
+    if metric._sign < 0:
+        worst_possible_result = metric._worst_possible_result
+    else:
+        worst_possible_result = metric._optimum - metric._worst_possible_result
+
+    return worst_possible_result
+
+
 # TODO potentially log all inputs to this class to pickle them in order to do
 # easier debugging of potential crashes
 class ExecuteTaFuncWithQueue(AbstractTAFunc):
@@ -78,15 +97,21 @@ def __init__(self, backend, autosklearn_seed, resampling_strategy, metric,
             raise ValueError('Unknown resampling strategy %s' %
                              resampling_strategy)
 
-        eval_function = functools.partial(fit_predict_try_except_decorator,
-                                          ta=eval_function)
+        self.worst_possible_result = get_cost_of_crash(metric)
+
+        eval_function = functools.partial(
+            fit_predict_try_except_decorator,
+            ta=eval_function,
+            cost_for_crash=self.worst_possible_result,
+        )
+
         super().__init__(
             ta=eval_function,
             stats=stats,
             runhistory=runhistory,
             run_obj=run_obj,
             par_factor=par_factor,
-            cost_for_crash=WORST_POSSIBLE_RESULT,
+            cost_for_crash=self.worst_possible_result,
         )
 
         self.backend = backend
@@ -250,7 +275,7 @@ def run(self, config, instance=None,
                 if status in [StatusType.SUCCESS, StatusType.DONOTADVANCE]:
                     cost = result
                 else:
-                    cost = WORST_POSSIBLE_RESULT
+                    cost = self.worst_possible_result
 
             except Empty:
                 info = None
@@ -265,12 +290,12 @@ def run(self, config, instance=None,
                     }
                 else:
                     raise ValueError(obj.exit_status)
-                cost = WORST_POSSIBLE_RESULT
+                cost = self.worst_possible_result
 
         elif obj.exit_status is TAEAbortException:
             info = None
             status = StatusType.ABORT
-            cost = WORST_POSSIBLE_RESULT
+            cost = self.worst_possible_result
             additional_run_info = {'error': 'Your configuration of '
                                             'auto-sklearn does not work!'}
 
@@ -285,7 +310,7 @@ def run(self, config, instance=None,
                     cost = result
                 else:
                     status = StatusType.CRASHED
-                    cost = WORST_POSSIBLE_RESULT
+                    cost = self.worst_possible_result
                     additional_run_info['info'] = 'Run treated as crashed ' \
                                                   'because the pynisher exit ' \
                                                   'status %s is unknown.' % \
@@ -294,7 +319,7 @@ def run(self, config, instance=None,
                 info = None
                 additional_run_info = {'error': 'Result queue is empty'}
                 status = StatusType.CRASHED
-                cost = WORST_POSSIBLE_RESULT
+                cost = self.worst_possible_result
 
         if (
             (self.budget_type is None or budget == 0)
diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py
@@ -227,6 +227,17 @@ def _get_model(self):
         return model
 
     def _loss(self, y_true, y_hat, all_scoring_functions=None):
+        """Auto-sklearn follows a minimization goal, so the make_scorer
+        sign is used as a guide to obtain the value to reduce.
+
+        On this regard, to optimize a metric:
+            1- score is calculared with calculate_score, with the caveat, that if
+            for the metric greater is not better, a negative score is returned.
+            2- the err (the optimization goal) is then:
+                optimum - (metric.sign * actual_score)
+                For accuracy for example: optimum(1) - (+1 * actual score)
+                For logloss for example: optimum(0) - (-1 * actual score)
+        """
         all_scoring_functions = (
             self.all_scoring_functions
             if all_scoring_functions is None
diff --git a/autosklearn/metrics/__init__.py b/autosklearn/metrics/__init__.py
@@ -9,13 +9,16 @@
 from autosklearn.constants import REGRESSION_TASKS, TASK_TYPES
 from .util import sanitize_array
 
+from smac.utils.constants import MAXINT
+
 
 class Scorer(object, metaclass=ABCMeta):
-    def __init__(self, name, score_func, optimum, sign, kwargs):
+    def __init__(self, name, score_func, optimum, worst_possible_result, sign, kwargs):
         self.name = name
         self._kwargs = kwargs
         self._score_func = score_func
         self._optimum = optimum
+        self._worst_possible_result = worst_possible_result
         self._sign = sign
 
     @abstractmethod
@@ -136,7 +139,7 @@ def __call__(self, y_true, y_pred, sample_weight=None):
             return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
 
 
-def make_scorer(name, score_func, optimum=1, greater_is_better=True,
+def make_scorer(name, score_func, optimum=1, worst_possible_result=0, greater_is_better=True,
                 needs_proba=False, needs_threshold=False, **kwargs):
     """Make a scorer from a performance metric or loss function.
 
@@ -181,7 +184,7 @@ def make_scorer(name, score_func, optimum=1, greater_is_better=True,
         cls = _ThresholdScorer
     else:
         cls = _PredictScorer
-    return cls(name, score_func, optimum, sign, kwargs)
+    return cls(name, score_func, optimum, worst_possible_result, sign, kwargs)
 
 
 # Standard regression scores
@@ -190,14 +193,17 @@ def make_scorer(name, score_func, optimum=1, greater_is_better=True,
 mean_squared_error = make_scorer('mean_squared_error',
                                  sklearn.metrics.mean_squared_error,
                                  optimum=0,
+                                 worst_possible_result=MAXINT,
                                  greater_is_better=False)
 mean_absolute_error = make_scorer('mean_absolute_error',
                                   sklearn.metrics.mean_absolute_error,
                                   optimum=0,
+                                  worst_possible_result=MAXINT,
                                   greater_is_better=False)
 median_absolute_error = make_scorer('median_absolute_error',
                                     sklearn.metrics.median_absolute_error,
                                     optimum=0,
+                                    worst_possible_result=MAXINT,
                                     greater_is_better=False)
 
 # Standard Classification Scores
@@ -225,6 +231,7 @@ def make_scorer(name, score_func, optimum=1, greater_is_better=True,
 log_loss = make_scorer('log_loss',
                        sklearn.metrics.log_loss,
                        optimum=0,
+                       worst_possible_result=MAXINT,
                        greater_is_better=False,
                        needs_proba=True)
 # TODO what about mathews correlation coefficient etc?
diff --git a/autosklearn/smbo.py b/autosklearn/smbo.py
@@ -21,7 +21,7 @@
 from autosklearn.metalearning.mismbo import suggest_via_metalearning
 from autosklearn.data.abstract_data_manager import AbstractDataManager
 from autosklearn.data.competition_data_manager import CompetitionDataManager
-from autosklearn.evaluation import ExecuteTaFuncWithQueue, WORST_POSSIBLE_RESULT
+from autosklearn.evaluation import ExecuteTaFuncWithQueue, get_cost_of_crash
 from autosklearn.util.logging_ import get_logger
 from autosklearn.metalearning.metalearning.meta_base import MetaBase
 from autosklearn.metalearning.metafeatures.metafeatures import \
@@ -237,6 +237,7 @@ def __init__(self, config_space, dataset_name,
         self.resampling_strategy_args = resampling_strategy_args
 
         # and a bunch of useful limits
+        self.worst_possible_result = get_cost_of_crash(self.metric)
         self.total_walltime_limit = int(total_walltime_limit)
         self.func_eval_time_limit = int(func_eval_time_limit)
         self.memory_limit = memory_limit
@@ -444,7 +445,7 @@ def run_smbo(self):
             'run_obj': 'quality',
             'shared-model': self.shared_mode,
             'wallclock_limit': total_walltime_limit,
-            'cost_for_crash': WORST_POSSIBLE_RESULT,
+            'cost_for_crash': self.worst_possible_result,
         }
         if self.smac_scenario_args is not None:
             for arg in [
diff --git a/test/test_automl/test_automl.py b/test/test_automl/test_automl.py
@@ -300,7 +300,9 @@ def test_do_dummy_prediction(self):
 
             auto = autosklearn.automl.AutoML(
                 backend_api, 20, 5,
-                initial_configurations_via_metalearning=25)
+                initial_configurations_via_metalearning=25,
+                metric=accuracy,
+            )
             setup_logger()
             auto._logger = get_logger('test_do_dummy_predictions')
             auto._backend._make_internals_directory()
@@ -332,6 +334,7 @@ def test_fail_if_dummy_prediction_fails(self, ta_run_mock):
                                          time_for_this_task,
                                          per_run_time,
                                          initial_configurations_via_metalearning=25,
+                                         metric=accuracy,
                                          )
         setup_logger()
         auto._logger = get_logger('test_fail_if_dummy_prediction_fails')
diff --git a/test/test_evaluation/test_evaluation.py b/test/test_evaluation/test_evaluation.py
@@ -11,9 +11,10 @@
 import pynisher
 from smac.tae.execute_ta_run import StatusType, BudgetExhaustedException
 from smac.stats.stats import Stats
+from smac.utils.constants import MAXINT
 
 from autosklearn.evaluation import ExecuteTaFuncWithQueue
-from autosklearn.metrics import accuracy
+from autosklearn.metrics import accuracy, log_loss
 
 this_directory = os.path.dirname(__file__)
 sys.path.append(this_directory)
@@ -151,10 +152,13 @@ def test_eval_with_limits_holdout_fail_memory_error(self, pynisher_mock):
                                     logger=self.logger,
                                     stats=self.stats,
                                     memory_limit=3072,
-                                    metric=accuracy)
+                                    metric=log_loss)
         info = ta.start(None, instance=None, cutoff=30)
         self.assertEqual(info[0], StatusType.MEMOUT)
-        self.assertEqual(info[1], 1.0)
+
+        # For logloss, worst possible result is MAXINT
+        worst_possible_result = MAXINT
+        self.assertEqual(info[1], worst_possible_result)
         self.assertIsInstance(info[2], float)
 
     @unittest.mock.patch('pynisher.enforce_limits')
diff --git a/test/test_metric/test_metrics.py b/test/test_metric/test_metrics.py