Skip to content

Commit 07f669e

Browse files
author
Github Actions
committed
Eddie Bergman: Update example to use predefined_split properly (#1340)
1 parent 1551e15 commit 07f669e

File tree

71 files changed

+1237
-845
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+1237
-845
lines changed

development/_downloads/bb7d59d9ddc2ff29f0d6eb99747a3347/example_resampling.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@
141141
},
142142
"outputs": [],
143143
"source": [
144-
"resampling_strategy = sklearn.model_selection.PredefinedSplit(\n test_fold=np.where(X_train[:, 0] < np.mean(X_train[:, 0]))[0]\n)\n\nautoml = autosklearn.classification.AutoSklearnClassifier(\n time_left_for_this_task=120,\n per_run_time_limit=30,\n tmp_folder='/tmp/autosklearn_resampling_example_tmp',\n disable_evaluator_output=False,\n resampling_strategy=resampling_strategy,\n)\nautoml.fit(X_train, y_train, dataset_name='breast_cancer')"
144+
"selected_indices = (X_train[:, 0] < np.mean(X_train[:, 0])).astype(int)\nresampling_strategy = sklearn.model_selection.PredefinedSplit(\n test_fold=selected_indices\n)\n\nautoml = autosklearn.classification.AutoSklearnClassifier(\n time_left_for_this_task=120,\n per_run_time_limit=30,\n tmp_folder='/tmp/autosklearn_resampling_example_tmp',\n disable_evaluator_output=False,\n resampling_strategy=resampling_strategy,\n)\nautoml.fit(X_train, y_train, dataset_name='breast_cancer')\n\nprint(automl.sprint_statistics())"
145145
]
146146
},
147147
{
Binary file not shown.

development/_downloads/c6beb850ad22be83885d2737cca63b33/example_resampling.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,9 @@
9898
# data by the first feature. In practice, one would use a splitting according
9999
# to the use case at hand.
100100

101+
selected_indices = (X_train[:, 0] < np.mean(X_train[:, 0])).astype(int)
101102
resampling_strategy = sklearn.model_selection.PredefinedSplit(
102-
test_fold=np.where(X_train[:, 0] < np.mean(X_train[:, 0]))[0]
103+
test_fold=selected_indices
103104
)
104105

105106
automl = autosklearn.classification.AutoSklearnClassifier(
@@ -111,6 +112,8 @@
111112
)
112113
automl.fit(X_train, y_train, dataset_name='breast_cancer')
113114

115+
print(automl.sprint_statistics())
116+
114117
############################################################################
115118
# For custom resampling strategies (i.e. resampling strategies that are not
116119
# defined as strings by Auto-sklearn) it is necessary to perform a refit:
Binary file not shown.
-567 Bytes
Loading
39 KB
Loading
1.37 KB
Loading
-516 Bytes
Loading
699 Bytes
Loading
4.22 KB
Loading

0 commit comments

Comments
 (0)