Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ recursive-include autosklearn/metalearning/files *.txt
include autosklearn/util/logging.yaml
include requirements.txt
include autosklearn/requirements.txt
recursive-include autosklearn/experimental/askl2_portfolios *.json
recursive-include autosklearn/experimental/ *.json
include autosklearn/experimental/askl2_training_data.json
include LICENSE.txt
115 changes: 68 additions & 47 deletions autosklearn/experimental/askl2.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,53 +15,60 @@
import autosklearn
from autosklearn.classification import AutoSklearnClassifier
import autosklearn.experimental.selector
from autosklearn.metrics import Scorer
from autosklearn.metrics import Scorer, balanced_accuracy, roc_auc, log_loss, accuracy

metrics = (balanced_accuracy, roc_auc, log_loss)
selector_files = {}
this_directory = pathlib.Path(__file__).resolve().parent
training_data_file = this_directory / 'askl2_training_data.json'
with open(training_data_file) as fh:
training_data = json.load(fh)
fh.seek(0)
m = hashlib.md5()
m.update(fh.read().encode('utf8'))
training_data_hash = m.hexdigest()[:10]
selector_filename = "askl2_selector_%s_%s_%s.pkl" % (
autosklearn.__version__,
sklearn.__version__,
training_data_hash
)
selector_directory = os.environ.get('XDG_CACHE_HOME')
if selector_directory is None:
selector_directory = pathlib.Path.home()
selector_directory = pathlib.Path(selector_directory).joinpath('auto-sklearn').expanduser()
selector_file = selector_directory / selector_filename
metafeatures = pd.DataFrame(training_data['metafeatures'])
y_values = np.array(training_data['y_values'])
strategies = training_data['strategies']
minima_for_methods = training_data['minima_for_methods']
maxima_for_methods = training_data['maxima_for_methods']
if not selector_file.exists():
selector = autosklearn.experimental.selector.OneVSOneSelector(
configuration=training_data['configuration'],
default_strategy_idx=strategies.index('RF_SH-eta4-i_holdout_iterative_es_if'),
rng=1,
for metric in metrics:
training_data_file = this_directory / metric.name / 'askl2_training_data.json'
with open(training_data_file) as fh:
training_data = json.load(fh)
fh.seek(0)
m = hashlib.md5()
m.update(fh.read().encode('utf8'))
training_data_hash = m.hexdigest()[:10]
selector_filename = "askl2_selector_%s_%s_%s_%s.pkl" % (
autosklearn.__version__,
sklearn.__version__,
metric.name,
training_data_hash
)
selector.fit(
X=metafeatures,
y=y_values,
methods=strategies,
minima=minima_for_methods,
maxima=maxima_for_methods,
)
selector_file.parent.mkdir(exist_ok=True, parents=True)
try:
with open(selector_file, 'wb') as fh:
pickle.dump(selector, fh)
except Exception as e:
print("AutoSklearn2Classifier needs to create a selector file under "
"the user's home directory or XDG_CACHE_HOME. Nevertheless "
"the path {} is not writable.".format(selector_file))
raise e
selector_directory = os.environ.get('XDG_CACHE_HOME')
if selector_directory is None:
selector_directory = pathlib.Path.home()
selector_directory = pathlib.Path(selector_directory).joinpath('auto-sklearn').expanduser()
selector_files[metric.name] = selector_directory / selector_filename
metafeatures = pd.DataFrame(training_data['metafeatures'])
strategies = training_data['strategies']
y_values = pd.DataFrame(training_data['y_values'], columns=strategies, index=metafeatures.index)
minima_for_methods = training_data['minima_for_methods']
maxima_for_methods = training_data['maxima_for_methods']
default_strategies = training_data['tie_break_order']
if not selector_files[metric.name].exists():
selector = autosklearn.experimental.selector.OVORF(
configuration=training_data['configuration'],
random_state=np.random.RandomState(1),
n_estimators=500,
tie_break_order=default_strategies,
)
selector = autosklearn.experimental.selector.FallbackWrapper(selector, default_strategies)
selector.fit(
X=metafeatures,
y=y_values,
minima=minima_for_methods,
maxima=maxima_for_methods,
)
selector_files[metric.name].parent.mkdir(exist_ok=True, parents=True)

try:
with open(selector_files[metric.name], 'wb') as fh:
pickle.dump(selector, fh)
except Exception as e:
print("AutoSklearn2Classifier needs to create a selector file under "
"the user's home directory or XDG_CACHE_HOME. Nevertheless "
"the path {} is not writable.".format(selector_files[metric.name]))
raise e


class SmacObjectCallback:
Expand Down Expand Up @@ -296,7 +303,7 @@ def __init__(
Attributes
----------

cv_results\_ : dict of numpy (masked) ndarrays
cv_results_ : dict of numpy (masked) ndarrays
A dict with keys as column headers and values as columns, that can be
imported into a pandas ``DataFrame``.

Expand Down Expand Up @@ -346,10 +353,22 @@ def fit(self, X, y,
feat_type=None,
dataset_name=None):

if self.metric is None:
if len(y.shape) == 1 or y.shape[1] == 1:
self.metric = accuracy
else:
self.metric = log_loss

if self.metric in metrics:
metric_name = self.metric.name
selector_file = selector_files[metric_name]
else:
metric_name = 'balanced_accuracy'
selector_file = selector_files[metric_name]
with open(selector_file, 'rb') as fh:
selector = pickle.load(fh)

metafeatures = np.array([len(np.unique(y)), X.shape[1], X.shape[0]])
metafeatures = pd.DataFrame({dataset_name: [X.shape[1], X.shape[0]]}).transpose()
selection = np.argmax(selector.predict(metafeatures))
automl_policy = strategies[selection]

Expand Down Expand Up @@ -400,7 +419,9 @@ def fit(self, X, y,
else:
resampling_strategy_kwargs = None

portfolio_file = this_directory / 'askl2_portfolios' / ('%s.json' % automl_policy)
portfolio_file = (
this_directory / metric_name / 'askl2_portfolios' / ('%s.json' % automl_policy)
)
with open(portfolio_file) as fh:
portfolio_json = json.load(fh)
portfolio = portfolio_json['portfolio']
Expand Down
Loading