Example multiple metric (#1045)

rabsr · ra-amex · web-flow · commit 4d19551a7824 · 2021-01-05T10:30:58.000+01:00
* Adding example which depicts on how to calc multiple score per run

* Update abstract_evaluator.py

Co-authored-by: Rohit Agarwal &lt;rohit.agarwal4@aexp.com&gt;
diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py
@@ -19,7 +19,7 @@
 from autosklearn.pipeline.implementations.util import (
     convert_multioutput_multiclass_to_multilabel
 )
-from autosklearn.metrics import calculate_score, CLASSIFICATION_METRICS, REGRESSION_METRICS
+from autosklearn.metrics import calculate_score
 from autosklearn.util.logging_ import get_named_client_logger
 
 from ConfigSpace import Configuration
@@ -264,14 +264,9 @@ def _loss(self, y_true, y_hat, scoring_functions=None):
             scoring_functions=scoring_functions)
 
         if hasattr(score, '__len__'):
-            # TODO: instead of using self.metric, it should use all metrics given by key.
-            # But now this throws error...
-            if self.task_type in CLASSIFICATION_TASKS:
-                err = {key: metric._optimum - score[key] for key, metric in
-                       CLASSIFICATION_METRICS.items() if key in score}
-            else:
-                err = {key: metric._optimum - score[key] for key, metric in
-                       REGRESSION_METRICS.items() if key in score}
+            err = {metric.name: metric._optimum - score[metric.name]
+                   for metric in scoring_functions}
+            err[self.metric.name] = self.metric._optimum - score[self.metric.name]
         else:
             err = self.metric._optimum - score
 
diff --git a/examples/40_advanced/example_calc_multiple_metrics.py b/examples/40_advanced/example_calc_multiple_metrics.py
@@ -0,0 +1,66 @@
+# -*- encoding: utf-8 -*-
+"""
+=======
+Metrics
+=======
+
+In *Auto-sklearn*, model is optimized over a metric, either built-in or
+custom metric. Moreover, it is also possible to calculate multiple metrics
+per run. The following examples show how to calculate metrics built-in
+and self-defined metrics for a classification problem.
+"""
+
+import autosklearn.classification
+import custom_metrics
+import pandas as pd
+import sklearn.datasets
+import sklearn.metrics
+from autosklearn.metrics import balanced_accuracy, precision, recall, f1
+
+
+def get_metric_result(cv_results):
+    results = pd.DataFrame.from_dict(cv_results)
+    results = results[results['status'] == "Success"]
+    cols = ['rank_test_scores', 'param_classifier:__choice__', 'mean_test_score']
+    cols.extend([key for key in cv_results.keys() if key.startswith('metric_')])
+    return results[cols]
+
+
+if __name__ == "__main__":
+    ############################################################################
+    # Data Loading
+    # ============
+
+    X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
+    X_train, X_test, y_train, y_test = \
+        sklearn.model_selection.train_test_split(X, y, random_state=1)
+
+    ############################################################################
+    # Build and fit a classifier
+    # ==========================
+
+    error_rate = autosklearn.metrics.make_scorer(
+        name='custom_error',
+        score_func=custom_metrics.error,
+        optimum=0,
+        greater_is_better=False,
+        needs_proba=False,
+        needs_threshold=False
+    )
+    cls = autosklearn.classification.AutoSklearnClassifier(
+        time_left_for_this_task=120,
+        per_run_time_limit=30,
+        scoring_functions=[balanced_accuracy, precision, recall, f1, error_rate]
+    )
+    cls.fit(X_train, y_train, X_test, y_test)
+
+    ###########################################################################
+    # Get the Score of the final ensemble
+    # ===================================
+
+    predictions = cls.predict(X_test)
+    print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
+
+    print("#" * 80)
+    print("Metric results")
+    print(get_metric_result(cls.cv_results_).to_string(index=False))