|
15 | 15 | import autosklearn
|
16 | 16 | from autosklearn.classification import AutoSklearnClassifier
|
17 | 17 | import autosklearn.experimental.selector
|
18 |
| -from autosklearn.metrics import Scorer |
| 18 | +from autosklearn.metrics import Scorer, balanced_accuracy, roc_auc, log_loss, accuracy |
19 | 19 |
|
| 20 | +metrics = (balanced_accuracy, roc_auc, log_loss) |
| 21 | +selector_files = {} |
20 | 22 | this_directory = pathlib.Path(__file__).resolve().parent
|
21 |
| -training_data_file = this_directory / 'askl2_training_data.json' |
22 |
| -with open(training_data_file) as fh: |
23 |
| - training_data = json.load(fh) |
24 |
| - fh.seek(0) |
25 |
| - m = hashlib.md5() |
26 |
| - m.update(fh.read().encode('utf8')) |
27 |
| -training_data_hash = m.hexdigest()[:10] |
28 |
| -selector_filename = "askl2_selector_%s_%s_%s.pkl" % ( |
29 |
| - autosklearn.__version__, |
30 |
| - sklearn.__version__, |
31 |
| - training_data_hash |
32 |
| -) |
33 |
| -selector_directory = os.environ.get('XDG_CACHE_HOME') |
34 |
| -if selector_directory is None: |
35 |
| - selector_directory = pathlib.Path.home() |
36 |
| -selector_directory = pathlib.Path(selector_directory).joinpath('auto-sklearn').expanduser() |
37 |
| -selector_file = selector_directory / selector_filename |
38 |
| -metafeatures = pd.DataFrame(training_data['metafeatures']) |
39 |
| -y_values = np.array(training_data['y_values']) |
40 |
| -strategies = training_data['strategies'] |
41 |
| -minima_for_methods = training_data['minima_for_methods'] |
42 |
| -maxima_for_methods = training_data['maxima_for_methods'] |
43 |
| -if not selector_file.exists(): |
44 |
| - selector = autosklearn.experimental.selector.OneVSOneSelector( |
45 |
| - configuration=training_data['configuration'], |
46 |
| - default_strategy_idx=strategies.index('RF_SH-eta4-i_holdout_iterative_es_if'), |
47 |
| - rng=1, |
| 23 | +for metric in metrics: |
| 24 | + training_data_file = this_directory / metric.name / 'askl2_training_data.json' |
| 25 | + with open(training_data_file) as fh: |
| 26 | + training_data = json.load(fh) |
| 27 | + fh.seek(0) |
| 28 | + m = hashlib.md5() |
| 29 | + m.update(fh.read().encode('utf8')) |
| 30 | + training_data_hash = m.hexdigest()[:10] |
| 31 | + selector_filename = "askl2_selector_%s_%s_%s_%s.pkl" % ( |
| 32 | + autosklearn.__version__, |
| 33 | + sklearn.__version__, |
| 34 | + metric.name, |
| 35 | + training_data_hash |
48 | 36 | )
|
49 |
| - selector.fit( |
50 |
| - X=metafeatures, |
51 |
| - y=y_values, |
52 |
| - methods=strategies, |
53 |
| - minima=minima_for_methods, |
54 |
| - maxima=maxima_for_methods, |
55 |
| - ) |
56 |
| - selector_file.parent.mkdir(exist_ok=True, parents=True) |
57 |
| - try: |
58 |
| - with open(selector_file, 'wb') as fh: |
59 |
| - pickle.dump(selector, fh) |
60 |
| - except Exception as e: |
61 |
| - print("AutoSklearn2Classifier needs to create a selector file under " |
62 |
| - "the user's home directory or XDG_CACHE_HOME. Nevertheless " |
63 |
| - "the path {} is not writable.".format(selector_file)) |
64 |
| - raise e |
| 37 | + selector_directory = os.environ.get('XDG_CACHE_HOME') |
| 38 | + if selector_directory is None: |
| 39 | + selector_directory = pathlib.Path.home() |
| 40 | + selector_directory = pathlib.Path(selector_directory).joinpath('auto-sklearn').expanduser() |
| 41 | + selector_files[metric.name] = selector_directory / selector_filename |
| 42 | + metafeatures = pd.DataFrame(training_data['metafeatures']) |
| 43 | + strategies = training_data['strategies'] |
| 44 | + y_values = pd.DataFrame(training_data['y_values'], columns=strategies, index=metafeatures.index) |
| 45 | + minima_for_methods = training_data['minima_for_methods'] |
| 46 | + maxima_for_methods = training_data['maxima_for_methods'] |
| 47 | + default_strategies = training_data['tie_break_order'] |
| 48 | + if not selector_files[metric.name].exists(): |
| 49 | + selector = autosklearn.experimental.selector.OVORF( |
| 50 | + configuration=training_data['configuration'], |
| 51 | + random_state=np.random.RandomState(1), |
| 52 | + n_estimators=500, |
| 53 | + tie_break_order=default_strategies, |
| 54 | + ) |
| 55 | + selector = autosklearn.experimental.selector.FallbackWrapper(selector, default_strategies) |
| 56 | + selector.fit( |
| 57 | + X=metafeatures, |
| 58 | + y=y_values, |
| 59 | + minima=minima_for_methods, |
| 60 | + maxima=maxima_for_methods, |
| 61 | + ) |
| 62 | + selector_files[metric.name].parent.mkdir(exist_ok=True, parents=True) |
| 63 | + |
| 64 | + try: |
| 65 | + with open(selector_files[metric.name], 'wb') as fh: |
| 66 | + pickle.dump(selector, fh) |
| 67 | + except Exception as e: |
| 68 | + print("AutoSklearn2Classifier needs to create a selector file under " |
| 69 | + "the user's home directory or XDG_CACHE_HOME. Nevertheless " |
| 70 | + "the path {} is not writable.".format(selector_files[metric.name])) |
| 71 | + raise e |
65 | 72 |
|
66 | 73 |
|
67 | 74 | class SmacObjectCallback:
|
@@ -296,7 +303,7 @@ def __init__(
|
296 | 303 | Attributes
|
297 | 304 | ----------
|
298 | 305 |
|
299 |
| - cv_results\_ : dict of numpy (masked) ndarrays |
| 306 | + cv_results_ : dict of numpy (masked) ndarrays |
300 | 307 | A dict with keys as column headers and values as columns, that can be
|
301 | 308 | imported into a pandas ``DataFrame``.
|
302 | 309 |
|
@@ -346,10 +353,22 @@ def fit(self, X, y,
|
346 | 353 | feat_type=None,
|
347 | 354 | dataset_name=None):
|
348 | 355 |
|
| 356 | + if self.metric is None: |
| 357 | + if len(y.shape) == 1 or y.shape[1] == 1: |
| 358 | + self.metric = accuracy |
| 359 | + else: |
| 360 | + self.metric = log_loss |
| 361 | + |
| 362 | + if self.metric in metrics: |
| 363 | + metric_name = self.metric.name |
| 364 | + selector_file = selector_files[metric_name] |
| 365 | + else: |
| 366 | + metric_name = 'balanced_accuracy' |
| 367 | + selector_file = selector_files[metric_name] |
349 | 368 | with open(selector_file, 'rb') as fh:
|
350 | 369 | selector = pickle.load(fh)
|
351 | 370 |
|
352 |
| - metafeatures = np.array([len(np.unique(y)), X.shape[1], X.shape[0]]) |
| 371 | + metafeatures = pd.DataFrame({dataset_name: [X.shape[1], X.shape[0]]}).transpose() |
353 | 372 | selection = np.argmax(selector.predict(metafeatures))
|
354 | 373 | automl_policy = strategies[selection]
|
355 | 374 |
|
@@ -400,7 +419,9 @@ def fit(self, X, y,
|
400 | 419 | else:
|
401 | 420 | resampling_strategy_kwargs = None
|
402 | 421 |
|
403 |
| - portfolio_file = this_directory / 'askl2_portfolios' / ('%s.json' % automl_policy) |
| 422 | + portfolio_file = ( |
| 423 | + this_directory / metric_name / 'askl2_portfolios' / ('%s.json' % automl_policy) |
| 424 | + ) |
404 | 425 | with open(portfolio_file) as fh:
|
405 | 426 | portfolio_json = json.load(fh)
|
406 | 427 | portfolio = portfolio_json['portfolio']
|
|
0 commit comments