Skip to content

Commit a406274

Browse files
Further feedback from comments
1 parent a1cca83 commit a406274

File tree

7 files changed

+82
-35
lines changed

7 files changed

+82
-35
lines changed

.github/workflows/pytest.yml

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ jobs:
5656
python setup.py sdist
5757
last_dist=$(ls -t dist/auto-sklearn-*.tar.gz | head -n 1)
5858
pip install $last_dist
59+
- name: Store repository status
60+
id: status-before
61+
run: |
62+
echo "::set-output name=BEFORE::$(git status --porcelain -b)"
5963
- name: Conda Run tests
6064
if: matrix.use-conda == true
6165
run: |
@@ -66,15 +70,26 @@ jobs:
6670
# to change the default python
6771
export PATH="$CONDA/envs/testenv/bin:$PATH"
6872
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=autosklearn --cov-report=xml'; fi
69-
$CONDA/envs/testenv/bin/python3 -m pytest --durations=20 -sv $codecov test
73+
$CONDA/envs/testenv/bin/python3 -m pytest --durations=20 -v $codecov test
7074
- name: Run tests
7175
if: matrix.use-conda == false
7276
run: |
7377
export OPENBLAS_NUM_THREADS=1
7478
export OMP_NUM_THREADS=1
7579
export MKL_NUM_THREADS=1
7680
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=autosklearn --cov-report=xml'; fi
77-
pytest --durations=20 -sv $codecov test
81+
pytest --durations=20 -v $codecov test
82+
- name: Check for files left behind by test
83+
if: ${{ always() }}
84+
run: |
85+
before="${{ steps.status-before.outputs.BEFORE }}"
86+
after="$(git status --porcelain -b)"
87+
if [[ "$before" != "$after" ]]; then
88+
echo "git status from before: $before"
89+
echo "git status from after: $after"
90+
echo "Not all generated files have been deleted!"
91+
exit 1
92+
fi
7893
- name: Upload coverage
7994
if: matrix.code-cov && always()
8095
uses: codecov/codecov-action@v1

autosklearn/evaluation/train_evaluator.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1201,6 +1201,7 @@ def eval_partial_cv_iterative(
12011201
resampling_strategy=resampling_strategy,
12021202
resampling_strategy_args=resampling_strategy_args,
12031203
seed=seed,
1204+
port=port,
12041205
num_run=num_run,
12051206
instance=instance,
12061207
all_scoring_functions=all_scoring_functions,
@@ -1293,6 +1294,7 @@ def eval_iterative_cv(
12931294
include=include,
12941295
exclude=exclude,
12951296
disable_file_output=disable_file_output,
1297+
port=port,
12961298
init_params=init_params,
12971299
budget=budget,
12981300
budget_type=budget_type,

autosklearn/util/backend.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,11 @@ def __init__(self,
8282
)
8383
)
8484
self._output_directory = output_directory
85-
# We create a dummy logger to start with
86-
# Then, when a port is available we can create a
87-
# client logger
85+
# Auto-Sklearn logs through the use of a PicklableClientLogger
86+
# For this reason we need a port to communicate with the server
87+
# When the backend is created, this port is not available
88+
# When the port is available in the main process, we
89+
# call the setup_logger with this port and update self.logger
8890
self.logger = None # type: Optional[PicklableClientLogger]
8991
self.create_directories()
9092

@@ -165,6 +167,9 @@ class Backend(object):
165167
"""
166168

167169
def __init__(self, context: BackendContext):
170+
# When the backend is created, this port is not available
171+
# When the port is available in the main process, we
172+
# call the setup_logger with this port and update self.logger
168173
self.logger = None # type: Optional[PicklableClientLogger]
169174
self.context = context
170175

scripts/run_auto-sklearn_for_metadata_generation.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import argparse
44
import json
55
import logging
6+
import logging.handlers
67
import os
78
import shutil
89
import sys
@@ -144,6 +145,7 @@
144145
include=include,
145146
metric=automl_arguments['metric'],
146147
cost_for_crash=get_cost_of_crash(automl_arguments['metric']),
148+
port=logging.handlers.DEFAULT_TCP_LOGGING_PORT,
147149
abort_on_first_run_crash=False,)
148150
run_info, run_value = ta.run_wrapper(
149151
RunInfo(

test/conftest.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import shutil
3+
import tempfile
34
import time
45
import unittest.mock
56

@@ -124,7 +125,8 @@ def dask_client(request):
124125
Workers are in subprocesses to not create deadlocks with the pynisher and logging.
125126
"""
126127

127-
client = Client(n_workers=2, threads_per_worker=1, processes=False)
128+
client = Client(n_workers=2, threads_per_worker=1, processes=False,
129+
local_directory=tempfile.gettempdir())
128130
print("Started Dask client={}\n".format(client))
129131

130132
def get_finalizer(address):

test/test_automl/automl_utils.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@ def __init__(self, logfile: str):
6666

6767
def parse_logfile(self) -> typing.List[str]:
6868
# We care about the [debug/info/...] messages
69+
# At the time of writing, the message format was:
70+
# [DEBUG] [2020-11-30 11:54:05,072:EnsembleBuilder] Restricting your
71+
# function to 3072 mb memory.
72+
#
73+
# [DEBUG] [2020-11-30 11:53:55,062:pynisher] Redirecting
74+
# output of the function to files.
6975
assert os.path.exists(self.logfile), "{} not found".format(self.logfile)
7076

7177
with open(self.logfile) as fh:
@@ -85,6 +91,8 @@ def count_ensembler_iterations(self) -> int:
8591
# We expect the start msg to be something like:
8692
# [DEBUG] [2020-11-26 19:22:42,160:EnsembleBuilder] \
8793
# Function called with argument: (61....
94+
# [DEBUG] [2020-11-30 11:53:47,069:EnsembleBuilder] Function called with argument:
95+
# (28.246965646743774, 1, False), {}
8896
match = re.search(
8997
r'EnsembleBuilder]\s+Function called with argument:\s+\(\d+\.\d+, (\d+), \w+',
9098
line)
@@ -93,6 +101,8 @@ def count_ensembler_iterations(self) -> int:
93101

94102
# Ensemble Builder actual call
95103
# Here we expect the msg:
104+
# [DEBUG] [2020-11-30 11:53:14,877:EnsembleBuilder] Starting iteration 0,
105+
# time left: 61.266255
96106
# [DEBUG] [2020-11-27 20:27:28,044:EnsembleBuilder] Starting iteration 2,
97107
# time left: 10.603252
98108
match = re.search(
@@ -110,21 +120,30 @@ def count_ensembler_iterations(self) -> int:
110120
def count_ensembler_success_pynisher_calls(self) -> int:
111121

112122
# We expect the return msg to be something like:
113-
# [DEBUG] [2020-11-26 19:22:43,018:EnsembleBuilder] return value: (([{'Times...
123+
# [DEBUG] [2020-11-30 11:53:47,911:EnsembleBuilder] return value:
124+
# (([{'Timestamp': Timestamp('2020-11-30 11:53:47.910727'),
125+
# 'ensemble_optimization_score': 0.9787234042553191}], 50, None, None, None), 0)
126+
# [DEBUG] [2020-11-30 11:54:05,984:EnsembleBuilder] return value:
127+
# (([{'Timestamp': Timestamp('2020-11- 30 11:54:05.983837'),
128+
# 'ensemble_optimization_score': 0.9787234042553191}], 50, None, None, None), 0)
114129
return_msgs = len([line for line in self.lines if re.search(
115130
r'EnsembleBuilder]\s+return value:.*Timestamp', line)])
116131

117132
return return_msgs
118133

119134
def count_tae_pynisher_calls(self) -> int:
120135
# We expect the return msg to be something like:
121-
# [DEBUG] [2020-11-26 19:22:39,558:pynisher] return value: (...
136+
# [DEBUG] [2020-11-30 11:53:11,264:pynisher] return value: (None, 0)
137+
# [DEBUG] [2020-11-30 11:53:13,768:pynisher] return value: (None, 0)
122138
return_msgs = len([line for line in self.lines if re.search(
123139
r'pynisher]\s+return value:\s+', line)])
124140
return (return_msgs)
125141

126142
def get_automl_setting_from_log(self, dataset_name: str, setting: str) -> str:
127143
for line in self.lines:
144+
# We expect messages of the form
145+
# [DEBUG] [2020-11-30 11:53:10,457:AutoML(5):breast_cancer] ensemble_size: 50
146+
# [DEBUG] [2020-11-30 11:53:10,457:AutoML(5):breast_cancer] ensemble_nbest: 50
128147
match = re.search(
129148
f"{dataset_name}]\\s*{setting}\\s*:\\s*(\\w+)",
130149
line)

test/test_metalearning/pyMetaLearn/test_meta_features_sparse.py

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from io import StringIO
2+
import logging
23
import os
34
import sys
45
import unittest
@@ -69,103 +70,104 @@ def setUp(self):
6970
self.y = y
7071
self.mf = meta_features.metafeatures
7172
self.helpers = meta_features.helper_functions
73+
self.logger = logging.getLogger()
7274

7375
# Precompute some helper functions
7476
self.helpers.set_value(
7577
"PCA",
76-
self.helpers["PCA"](self.X_transformed, self.y),
78+
self.helpers["PCA"](self.X_transformed, self.y, self.logger),
7779
)
7880
self.helpers.set_value(
7981
"MissingValues",
80-
self.helpers["MissingValues"](self.X, self.y, self.categorical),
82+
self.helpers["MissingValues"](self.X, self.y, self.logger, self.categorical),
8183
)
8284
self.mf.set_value(
8385
"NumberOfMissingValues",
84-
self.mf["NumberOfMissingValues"](self.X, self.y, self.categorical),
86+
self.mf["NumberOfMissingValues"](self.X, self.y, self.logger, self.categorical),
8587
)
8688
self.helpers.set_value(
8789
"NumSymbols",
88-
self.helpers["NumSymbols"](self.X, self.y, self.categorical),
90+
self.helpers["NumSymbols"](self.X, self.y, self.logger, self.categorical),
8991
)
9092
self.helpers.set_value(
9193
"ClassOccurences",
92-
self.helpers["ClassOccurences"](self.X, self.y),
94+
self.helpers["ClassOccurences"](self.X, self.y, self.logger),
9395
)
9496
self.helpers.set_value(
9597
"Skewnesses",
96-
self.helpers["Skewnesses"](self.X_transformed, self.y, self.categorical_transformed),
98+
self.helpers["Skewnesses"](self.X_transformed, self.y, self.logger, self.categorical_transformed),
9799
)
98100
self.helpers.set_value(
99101
"Kurtosisses",
100-
self.helpers["Kurtosisses"](self.X_transformed, self.y, self.categorical_transformed),
102+
self.helpers["Kurtosisses"](self.X_transformed, self.y, self.logger, self.categorical_transformed),
101103
)
102104

103105
def test_missing_values(self):
104-
mf = self.helpers["MissingValues"](self.X, self.y, self.categorical)
106+
mf = self.helpers["MissingValues"](self.X, self.y, self.logger, self.categorical)
105107
self.assertTrue(sparse.issparse(mf.value))
106108
self.assertEqual(mf.value.shape, self.X.shape)
107109
self.assertEqual(mf.value.dtype, np.bool)
108110
self.assertEqual(0, np.sum(mf.value.data))
109111

110112
def test_number_of_missing_values(self):
111-
mf = self.mf["NumberOfMissingValues"](self.X, self.y, self.categorical)
113+
mf = self.mf["NumberOfMissingValues"](self.X, self.y, self.logger, self.categorical)
112114
self.assertEqual(0, mf.value)
113115

114116
def test_percentage_missing_values(self):
115-
mf = self.mf["PercentageOfMissingValues"](self.X, self.y, self.categorical)
117+
mf = self.mf["PercentageOfMissingValues"](self.X, self.y, self.logger, self.categorical)
116118
self.assertEqual(0, mf.value)
117119

118120
def test_number_of_Instances_with_missing_values(self):
119121
mf = self.mf["NumberOfInstancesWithMissingValues"](
120-
self.X, self.y, self.categorical)
122+
self.X, self.y, self.logger, self.categorical)
121123
self.assertEqual(0, mf.value)
122124

123125
def test_percentage_of_Instances_with_missing_values(self):
124126
self.mf.set_value("NumberOfInstancesWithMissingValues",
125127
self.mf["NumberOfInstancesWithMissingValues"](
126-
self.X, self.y, self.categorical))
127-
mf = self.mf["PercentageOfInstancesWithMissingValues"](self.X, self.y,
128+
self.X, self.y, self.logger, self.categorical))
129+
mf = self.mf["PercentageOfInstancesWithMissingValues"](self.X, self.y, self.logger,
128130
self.categorical)
129131
self.assertAlmostEqual(0, mf.value)
130132

131133
def test_number_of_features_with_missing_values(self):
132-
mf = self.mf["NumberOfFeaturesWithMissingValues"](self.X, self.y,
134+
mf = self.mf["NumberOfFeaturesWithMissingValues"](self.X, self.y, self.logger,
133135
self.categorical)
134136
self.assertEqual(0, mf.value)
135137

136138
def test_percentage_of_features_with_missing_values(self):
137139
self.mf.set_value("NumberOfFeaturesWithMissingValues",
138140
self.mf["NumberOfFeaturesWithMissingValues"](
139-
self.X, self.y, self.categorical))
140-
mf = self.mf["PercentageOfFeaturesWithMissingValues"](self.X, self.y,
141+
self.X, self.y, self.logger, self.categorical))
142+
mf = self.mf["PercentageOfFeaturesWithMissingValues"](self.X, self.y, self.logger,
141143
self.categorical)
142144
self.assertAlmostEqual(0, mf.value)
143145

144146
def test_num_symbols(self):
145-
mf = self.helpers["NumSymbols"](self.X, self.y, self.categorical)
147+
mf = self.helpers["NumSymbols"](self.X, self.y, self.logger, self.categorical)
146148

147149
symbol_frequency = [2, 0, 6, 0, 1, 3, 0, 0, 3, 1, 0, 0, 0, 1, 0, 0,
148150
0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 1, 2, 2]
149151
self.assertEqual(mf.value, symbol_frequency)
150152

151153
def test_symbols_max(self):
152154
# this is attribute steel
153-
mf = self.mf["SymbolsMax"](self.X, self.y, self.categorical)
155+
mf = self.mf["SymbolsMax"](self.X, self.y, self.logger, self.categorical)
154156
self.assertEqual(mf.value, 6)
155157

156158
def test_symbols_mean(self):
157-
mf = self.mf["SymbolsMean"](self.X, self.y, self.categorical)
159+
mf = self.mf["SymbolsMean"](self.X, self.y, self.logger, self.categorical)
158160
# Empty looking spaces denote empty attributes
159161
symbol_frequency = [2, 6, 1, 3, 3, 1, 1, 2, 1, 1, 2, 2]
160162
self.assertAlmostEqual(mf.value, np.mean(symbol_frequency))
161163

162164
def test_symbols_std(self):
163-
mf = self.mf["SymbolsSTD"](self.X, self.y, self.categorical)
165+
mf = self.mf["SymbolsSTD"](self.X, self.y, self.logger, self.categorical)
164166
symbol_frequency = [2, 6, 1, 3, 3, 1, 1, 2, 1, 1, 2, 2]
165167
self.assertAlmostEqual(mf.value, np.std(symbol_frequency))
166168

167169
def test_symbols_sum(self):
168-
mf = self.mf["SymbolsSum"](self.X, self.y, self.categorical)
170+
mf = self.mf["SymbolsSum"](self.X, self.y, self.logger, self.categorical)
169171
self.assertEqual(mf.value, 25)
170172

171173
def test_skewnesses(self):
@@ -175,7 +177,7 @@ def test_skewnesses(self):
175177
-0.6969708499033568, 0.626346013011263,
176178
0.3809987596624038, 1.4762248835141034,
177179
0.07687661087633726, 0.36889797830360116]
178-
mf = self.helpers["Skewnesses"](self.X_transformed, self.y)
180+
mf = self.helpers["Skewnesses"](self.X_transformed, self.y, self.logger)
179181
print(mf.value)
180182
print(fixture)
181183
np.testing.assert_allclose(mf.value, fixture)
@@ -188,25 +190,25 @@ def test_kurtosisses(self):
188190
-1.1786325509475712, -1.2387998382327912,
189191
1.393438264413704, -0.9768209837948336,
190192
-1.7937072296512782]
191-
mf = self.helpers["Kurtosisses"](self.X_transformed, self.y)
193+
mf = self.helpers["Kurtosisses"](self.X_transformed, self.y, self.logger)
192194
np.testing.assert_allclose(mf.value, fixture)
193195

194196
def test_pca_95percent(self):
195197
mf = self.mf["PCAFractionOfComponentsFor95PercentVariance"](
196-
self.X_transformed, self.y)
198+
self.X_transformed, self.y, self.logger)
197199
self.assertAlmostEqual(0.7741935483870968, mf.value)
198200

199201
def test_pca_kurtosis_first_pc(self):
200-
mf = self.mf["PCAKurtosisFirstPC"](self.X_transformed, self.y)
202+
mf = self.mf["PCAKurtosisFirstPC"](self.X_transformed, self.y, self.logger)
201203
self.assertAlmostEqual(-0.15444516166802469, mf.value)
202204

203205
def test_pca_skewness_first_pc(self):
204-
mf = self.mf["PCASkewnessFirstPC"](self.X_transformed, self.y)
206+
mf = self.mf["PCASkewnessFirstPC"](self.X_transformed, self.y, self.logger)
205207
self.assertAlmostEqual(0.026514792083623905, mf.value)
206208

207209
def test_calculate_all_metafeatures(self):
208210
mf = meta_features.calculate_all_metafeatures(
209-
self.X, self.y, self.categorical, "2")
211+
self.X, self.y, self.categorical, "2", logger=self.logger)
210212
self.assertEqual(52, len(mf.metafeature_values))
211213
sio = StringIO()
212214
mf.dump(sio)

0 commit comments

Comments
 (0)