Skip to content

Commit 73ab8c3

Browse files
authored
Askl2 more metadata (#1179)
* update askl2 metadata * updates * update Auto-sklearn 2.0 data and selector * fix rebase error
1 parent 674eee4 commit 73ab8c3

36 files changed

+44041
-12931
lines changed

MANIFEST.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@ recursive-include autosklearn/metalearning/files *.txt
44
include autosklearn/util/logging.yaml
55
include requirements.txt
66
include autosklearn/requirements.txt
7-
recursive-include autosklearn/experimental/askl2_portfolios *.json
7+
recursive-include autosklearn/experimental/ *.json
88
include autosklearn/experimental/askl2_training_data.json
99
include LICENSE.txt

autosklearn/experimental/askl2.py

Lines changed: 68 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -15,53 +15,60 @@
1515
import autosklearn
1616
from autosklearn.classification import AutoSklearnClassifier
1717
import autosklearn.experimental.selector
18-
from autosklearn.metrics import Scorer
18+
from autosklearn.metrics import Scorer, balanced_accuracy, roc_auc, log_loss, accuracy
1919

20+
metrics = (balanced_accuracy, roc_auc, log_loss)
21+
selector_files = {}
2022
this_directory = pathlib.Path(__file__).resolve().parent
21-
training_data_file = this_directory / 'askl2_training_data.json'
22-
with open(training_data_file) as fh:
23-
training_data = json.load(fh)
24-
fh.seek(0)
25-
m = hashlib.md5()
26-
m.update(fh.read().encode('utf8'))
27-
training_data_hash = m.hexdigest()[:10]
28-
selector_filename = "askl2_selector_%s_%s_%s.pkl" % (
29-
autosklearn.__version__,
30-
sklearn.__version__,
31-
training_data_hash
32-
)
33-
selector_directory = os.environ.get('XDG_CACHE_HOME')
34-
if selector_directory is None:
35-
selector_directory = pathlib.Path.home()
36-
selector_directory = pathlib.Path(selector_directory).joinpath('auto-sklearn').expanduser()
37-
selector_file = selector_directory / selector_filename
38-
metafeatures = pd.DataFrame(training_data['metafeatures'])
39-
y_values = np.array(training_data['y_values'])
40-
strategies = training_data['strategies']
41-
minima_for_methods = training_data['minima_for_methods']
42-
maxima_for_methods = training_data['maxima_for_methods']
43-
if not selector_file.exists():
44-
selector = autosklearn.experimental.selector.OneVSOneSelector(
45-
configuration=training_data['configuration'],
46-
default_strategy_idx=strategies.index('RF_SH-eta4-i_holdout_iterative_es_if'),
47-
rng=1,
23+
for metric in metrics:
24+
training_data_file = this_directory / metric.name / 'askl2_training_data.json'
25+
with open(training_data_file) as fh:
26+
training_data = json.load(fh)
27+
fh.seek(0)
28+
m = hashlib.md5()
29+
m.update(fh.read().encode('utf8'))
30+
training_data_hash = m.hexdigest()[:10]
31+
selector_filename = "askl2_selector_%s_%s_%s_%s.pkl" % (
32+
autosklearn.__version__,
33+
sklearn.__version__,
34+
metric.name,
35+
training_data_hash
4836
)
49-
selector.fit(
50-
X=metafeatures,
51-
y=y_values,
52-
methods=strategies,
53-
minima=minima_for_methods,
54-
maxima=maxima_for_methods,
55-
)
56-
selector_file.parent.mkdir(exist_ok=True, parents=True)
57-
try:
58-
with open(selector_file, 'wb') as fh:
59-
pickle.dump(selector, fh)
60-
except Exception as e:
61-
print("AutoSklearn2Classifier needs to create a selector file under "
62-
"the user's home directory or XDG_CACHE_HOME. Nevertheless "
63-
"the path {} is not writable.".format(selector_file))
64-
raise e
37+
selector_directory = os.environ.get('XDG_CACHE_HOME')
38+
if selector_directory is None:
39+
selector_directory = pathlib.Path.home()
40+
selector_directory = pathlib.Path(selector_directory).joinpath('auto-sklearn').expanduser()
41+
selector_files[metric.name] = selector_directory / selector_filename
42+
metafeatures = pd.DataFrame(training_data['metafeatures'])
43+
strategies = training_data['strategies']
44+
y_values = pd.DataFrame(training_data['y_values'], columns=strategies, index=metafeatures.index)
45+
minima_for_methods = training_data['minima_for_methods']
46+
maxima_for_methods = training_data['maxima_for_methods']
47+
default_strategies = training_data['tie_break_order']
48+
if not selector_files[metric.name].exists():
49+
selector = autosklearn.experimental.selector.OVORF(
50+
configuration=training_data['configuration'],
51+
random_state=np.random.RandomState(1),
52+
n_estimators=500,
53+
tie_break_order=default_strategies,
54+
)
55+
selector = autosklearn.experimental.selector.FallbackWrapper(selector, default_strategies)
56+
selector.fit(
57+
X=metafeatures,
58+
y=y_values,
59+
minima=minima_for_methods,
60+
maxima=maxima_for_methods,
61+
)
62+
selector_files[metric.name].parent.mkdir(exist_ok=True, parents=True)
63+
64+
try:
65+
with open(selector_files[metric.name], 'wb') as fh:
66+
pickle.dump(selector, fh)
67+
except Exception as e:
68+
print("AutoSklearn2Classifier needs to create a selector file under "
69+
"the user's home directory or XDG_CACHE_HOME. Nevertheless "
70+
"the path {} is not writable.".format(selector_files[metric.name]))
71+
raise e
6572

6673

6774
class SmacObjectCallback:
@@ -296,7 +303,7 @@ def __init__(
296303
Attributes
297304
----------
298305
299-
cv_results\_ : dict of numpy (masked) ndarrays
306+
cv_results_ : dict of numpy (masked) ndarrays
300307
A dict with keys as column headers and values as columns, that can be
301308
imported into a pandas ``DataFrame``.
302309
@@ -346,10 +353,22 @@ def fit(self, X, y,
346353
feat_type=None,
347354
dataset_name=None):
348355

356+
if self.metric is None:
357+
if len(y.shape) == 1 or y.shape[1] == 1:
358+
self.metric = accuracy
359+
else:
360+
self.metric = log_loss
361+
362+
if self.metric in metrics:
363+
metric_name = self.metric.name
364+
selector_file = selector_files[metric_name]
365+
else:
366+
metric_name = 'balanced_accuracy'
367+
selector_file = selector_files[metric_name]
349368
with open(selector_file, 'rb') as fh:
350369
selector = pickle.load(fh)
351370

352-
metafeatures = np.array([len(np.unique(y)), X.shape[1], X.shape[0]])
371+
metafeatures = pd.DataFrame({dataset_name: [X.shape[1], X.shape[0]]}).transpose()
353372
selection = np.argmax(selector.predict(metafeatures))
354373
automl_policy = strategies[selection]
355374

@@ -400,7 +419,9 @@ def fit(self, X, y,
400419
else:
401420
resampling_strategy_kwargs = None
402421

403-
portfolio_file = this_directory / 'askl2_portfolios' / ('%s.json' % automl_policy)
422+
portfolio_file = (
423+
this_directory / metric_name / 'askl2_portfolios' / ('%s.json' % automl_policy)
424+
)
404425
with open(portfolio_file) as fh:
405426
portfolio_json = json.load(fh)
406427
portfolio = portfolio_json['portfolio']

0 commit comments

Comments
 (0)