-
-
Notifications
You must be signed in to change notification settings - Fork 335
Generalized regression monitoring for PerfPipeline. #6443
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 23 commits
e8b0ccc
065611b
828a608
7715344
1dd77cb
2f44e2d
179e7c9
cd8b242
5cb87cb
9bb706b
c82a5a8
85e17ea
052d692
141f4f1
39eef20
355a4f0
7f7a537
6abff68
3e5e634
fe2a0d5
4735960
8b99b62
8c999b7
f0c6043
f9ea027
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| import argparse, pathlib, json, re | ||
|
|
||
| def main(): | ||
| p = argparse.ArgumentParser() | ||
| p.add_argument("--console", required=True) | ||
| p.add_argument("--benchmarkMetricsTemplate_json", required=True) | ||
karianna marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| p.add_argument("--fname", required=True) | ||
| p.add_argument("--testNames", required=True) | ||
| args = p.parse_args() | ||
|
|
||
| console = pathlib.Path(args.console).read_text(encoding="utf-8") | ||
|
|
||
| benchmarkMetricsTemplate_json = pathlib.Path(args.benchmarkMetricsTemplate_json).read_text(encoding="utf-8") | ||
karianna marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| benchmarkMetricsTemplate = json.loads(benchmarkMetricsTemplate_json) | ||
|
|
||
| tests = args.testNames.split(",") | ||
| for test in tests: | ||
| for metric in benchmarkMetricsTemplate[test].values(): | ||
| regex_parser = re.search(metric.get("regex"), console) | ||
| if not regex_parser: continue | ||
| metric.update({"value" : float(regex_parser.group(1))}) | ||
|
|
||
| benchmarkMetricsTemplate_json = json.dumps(benchmarkMetricsTemplate) | ||
| pathlib.Path(f"{args.fname}").write_text(benchmarkMetricsTemplate_json, encoding="utf-8") | ||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| import argparse, pathlib, json | ||
MattyWeee123 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| def initBenchmarkMetrics(metricConfig, test, benchmarkMetrics): | ||
| test_info = test.split("-") | ||
| benchmarkMap = metricConfig[test_info[0]] | ||
| metricMap = benchmarkMap["metrics"] | ||
| if len(test_info) > 1: | ||
|
||
| variant = test_info[1] | ||
| if (metricMap.get(variant) != None): | ||
| benchmarkMetrics.update({test : {variant : metricMap[variant]}}) | ||
| return | ||
|
|
||
| benchmarkMetrics.update({test : metricMap}) | ||
|
|
||
| def main(): | ||
| p = argparse.ArgumentParser() | ||
| p.add_argument("--metricConfig_json", required=True) | ||
MattyWeee123 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| p.add_argument("--testNames", required=True) | ||
| p.add_argument("--runBase", required=True) | ||
| p.add_argument("--aggrBase", required=True) | ||
| args = p.parse_args() | ||
|
|
||
| metricConfig_json = pathlib.Path(args.metricConfig_json).read_text(encoding="utf-8") | ||
| metricConfig = json.loads(metricConfig_json) | ||
|
|
||
| benchmarkMetrics = {} | ||
| tests = args.testNames.split(",") | ||
| for test in tests: initBenchmarkMetrics(metricConfig, test, benchmarkMetrics) | ||
| benchmarkMetrics_json = json.dumps(benchmarkMetrics) | ||
| pathlib.Path(f"{args.runBase}").write_text(benchmarkMetrics_json, encoding="utf-8") | ||
|
|
||
| for test in tests: | ||
| for metric in benchmarkMetrics[test].values(): | ||
| metric.update({"test" : {"values" : []}}) | ||
| metric.update({"baseline" : {"values" : []}}) | ||
|
|
||
| benchmarkMetrics_json = json.dumps(benchmarkMetrics) | ||
| pathlib.Path(f"{args.aggrBase}").write_text(benchmarkMetrics_json, encoding="utf-8") | ||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| import argparse, pathlib, json, re | ||
MattyWeee123 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| RE_COMMENT = re.compile(r""" | ||
| //.*?$ | | ||
| /\*.*?\*/ | ||
| """, re.DOTALL | re.MULTILINE | re.VERBOSE) | ||
|
|
||
| RE_TRAIL_COMMA = re.compile(r",(\s*[}\]])") | ||
|
|
||
| RE_REGEX = re.compile(r"""/((?:\\.|[^/\\])*?)/[gimsuy]*(?=\s*,)""") | ||
|
|
||
| RE_FUNC = re.compile(r"""(funcName:\s)(.*?)(,)""") | ||
|
|
||
| RE_KEYS = re.compile(r"""([,{]\s*)([A-Za-z_]\w*)(\s*:)""") | ||
|
|
||
| def js_to_json(metrics_js): | ||
| benchmark_parser = re.search(r"const\s+BenchmarkMetricRegex\s*=\s*({[\s\S]*?});", metrics_js) | ||
| if not benchmark_parser: | ||
| raise ValueError("BenchmarkMetricRegex not found") | ||
| obj = benchmark_parser.group(1) | ||
| obj = obj.replace("'", '"') #convert units and string keys | ||
| obj = RE_COMMENT.sub("", obj) #remove comments | ||
| obj = RE_REGEX.sub(lambda m: json.dumps(m.group(1)), obj) #convert regex | ||
| obj = RE_FUNC.sub(r'\1"\2"\3', obj) #convert funcName | ||
| obj = RE_TRAIL_COMMA.sub(r'\1', obj) #remove trailing commas after funcName and regex conversion | ||
| obj = RE_KEYS.sub(r'\1"\2"\3', obj) #convert non string keys after removing trailing commas | ||
| return obj | ||
|
|
||
| def main(): | ||
| p = argparse.ArgumentParser() | ||
| p.add_argument("--metricConfig_js", required=True) | ||
| args = p.parse_args() | ||
| metricConfig_js = pathlib.Path(args.metricConfig_js).read_text(encoding="utf-8") | ||
| metricConfig_json = js_to_json(metricConfig_js) | ||
| pathlib.Path("metricConfig.json").write_text(metricConfig_json, encoding="utf-8") | ||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,21 @@ | ||
| #!groovy | ||
| def testStats = [:] | ||
| def baselineStats = [:] | ||
| def testRuntimes = [] | ||
| def baselineRuntimes = [] | ||
|
|
||
| def testParams = [] | ||
| def baselineParams = [] | ||
| int PERF_ITERATIONS = params.PERF_ITERATIONS ? params.PERF_ITERATIONS.toInteger() : 4 | ||
| boolean RUN_BASELINE = (params.RUN_BASELINE != null) ? params.RUN_BASELINE.toBoolean() : true | ||
| int PERF_ITERATIONS = params.PERF_ITERATIONS.toInteger() | ||
| boolean PROCESS_METRICS = (params.PROCESS_METRICS != null) ? params.PROCESS_METRICS.toBoolean() : false | ||
| boolean EXIT_EARLY = (params.EXIT_EARLY != null) ? params.EXIT_EARLY.toBoolean() : false | ||
|
|
||
| def EXIT_EARLY = (params.EXIT_EARLY) ? true : false | ||
| if (params.SETUP_LABEL) { | ||
| SETUP_LABEL = params.SETUP_LABEL | ||
| } else { | ||
| if (PROCESS_METRICS && EXIT_EARLY) { | ||
| SETUP_LABEL = "test-rhibmcloud-rhel9-x64-1" | ||
|
||
| } else { | ||
| SETUP_LABEL = "ci.role.test&&hw.arch.x86&&sw.os.linux" | ||
| } | ||
| } | ||
|
|
||
| // loop through all the params and change the parameters if needed | ||
| params.each { param -> | ||
|
|
@@ -38,48 +45,78 @@ params.each { param -> | |
| } | ||
| } | ||
|
|
||
| node ("ci.role.test&&hw.arch.x86&&sw.os.linux") { | ||
| node (SETUP_LABEL) { | ||
| timestamps { | ||
| try { | ||
| ["TARGET", "BUILD_LIST", "PLATFORM", "LABEL"].each { key -> | ||
| def metrics = [:] | ||
| def testList = [] | ||
| def testNames = null | ||
| def testRun = null | ||
| def baseRun = null | ||
| def runBase = "runBase.json" | ||
| def aggrBase = "aggrBase.json" | ||
|
|
||
| ["BUILD_LIST", "PLATFORM", "LABEL"].each { key -> | ||
| [testParams, baselineParams].each { list -> | ||
| list << string(name: key, value: params."${key}") | ||
| } | ||
| } | ||
|
|
||
| if (PROCESS_METRICS) { | ||
| def owner = params.ADOPTOPENJDK_REPO.tokenize('/')[2] | ||
| getPythonDependencies(owner, params.ADOPTOPENJDK_BRANCH) | ||
| sh "curl -Os https://raw.githubusercontent.com/adoptium/aqa-test-tools/refs/heads/master/TestResultSummaryService/parsers/BenchmarkMetric.js" | ||
MattyWeee123 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| sh "python3 metricConfig2JSON.py --metricConfig_js BenchmarkMetric.js" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should Python be invoking this in a virtual env?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| sh "python3 initBenchmarkMetrics.py --metricConfig_json metricConfig.json --testNames ${params.TARGET.split("=")[1]} --runBase ${runBase} --aggrBase ${aggrBase}" | ||
| testList = params.TARGET.split("=")[1].tokenize(",") | ||
| metrics = readJSON file: aggrBase | ||
| } | ||
| else { | ||
| testParams << string(name: "TARGET", value: params.TARGET) | ||
| baselineParams << string(name: "TARGET", value: params.TARGET) | ||
| } | ||
|
|
||
| echo "starting to trigger build..." | ||
| lock(resource: params.LABEL) { | ||
| for (int i = 0; i < PERF_ITERATIONS; i++) { | ||
| //clone to avoid mutation | ||
| def thisTestParams = testParams.collect() | ||
| def thisBaselineParams = baselineParams.collect() | ||
| if (PROCESS_METRICS) { | ||
| //set the target, testlist should change if some metrics regress while others do not | ||
MattyWeee123 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| testNames = testList.join(",") | ||
| def TARGET = params.TARGET.replaceFirst(/(?<=TESTLIST=)[^ ]+/, testNames) | ||
| thisTestParams << string(name: "TARGET", value: TARGET) | ||
| thisBaselineParams << string(name: "TARGET", value: TARGET) | ||
| } | ||
|
|
||
| // test | ||
| testParams << string(name: "TEST_NUM", value: "TEST_NUM" + i.toString()) | ||
| def testRun = triggerJob(params.BENCHMARK, params.PLATFORM, testParams, "test") | ||
| aggregateLogs(testRun, testRuntimes) | ||
| testRun = triggerJob(params.BENCHMARK, params.PLATFORM, thisTestParams, "test") | ||
|
|
||
| // baseline | ||
| if (RUN_BASELINE) { | ||
| baselineParams << string(name: "BASELINE_NUM", value: "BASELINE_NUM_" + i.toString()) | ||
| def baseRun = triggerJob(params.BENCHMARK, params.PLATFORM, baselineParams, "baseline") | ||
| aggregateLogs(baseRun, baselineRuntimes) | ||
| baseRun = triggerJob(params.BENCHMARK, params.PLATFORM, thisBaselineParams, "baseline") | ||
|
|
||
| } else { | ||
| echo "Skipping baseline run since RUN_BASELINE is set to false" | ||
| } | ||
| if (params.TARGET && params.TARGET.contains('dacapo')) { | ||
| testStats = stats(testRuntimes) | ||
| baselineStats = stats(baselineRuntimes) | ||
| def score = (testStats.mean/baselineStats.mean) * 100 | ||
|
|
||
| echo "testRuntimes: ${testRuntimes}" | ||
| echo "baselineRuntimes: ${baselineRuntimes}" | ||
| echo "score: ${score} %" | ||
|
|
||
| if (i == PERF_ITERATIONS || (EXIT_EARLY && i >= PERF_ITERATIONS * 0.8)) { | ||
| if (score <= 98) { | ||
| currentBuild.result = 'UNSTABLE' | ||
| echo "Possible regression, set build result to UNSTABLE." | ||
|
|
||
| if (PROCESS_METRICS) { | ||
| aggregateLogs(testRun, testNames, testList, runBase, metrics, "test") | ||
| aggregateLogs(baseRun, testNames, testList, runBase, metrics, "baseline") | ||
| writeJSON file: "metrics.json", json: metrics, pretty: 4 | ||
| archiveArtifacts artifacts: "metrics.json" | ||
| if (i == PERF_ITERATIONS-1 || (EXIT_EARLY && i >= PERF_ITERATIONS * 0.8)) { | ||
MattyWeee123 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if (i == PERF_ITERATIONS-1) { | ||
| echo "All iterations completed" | ||
| } else { | ||
| echo "Perf iteration completed. EXIT_EARLY: ${EXIT_EARLY}. PERF_ITERATIONS: ${PERF_ITERATIONS}. Actual iterations: ${i}." | ||
| break | ||
| echo "Attempting early exit" | ||
| } | ||
| echo "checking for regressions" | ||
| checkRegressions(metrics, testList) | ||
| if (testList.size() == 0) break | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -120,49 +157,81 @@ def generateChildJobViaAutoGen(newJobName) { | |
| build job: 'Test_Job_Auto_Gen', parameters: jobParams, propagate: true | ||
| } | ||
|
|
||
| def aggregateLogs(run, runtimes) { | ||
| def aggregateLogs(run, testNames, testList, templateName, aggregateMetrics, testType) { | ||
| def json | ||
| node("ci.role.test&&hw.arch.x86&&sw.os.linux") { | ||
| def buildId = run.getRawBuild().getNumber() | ||
| def name = run.getProjectName() | ||
| def result = run.getCurrentResult() | ||
| def buildId = run.getRawBuild().getNumber() | ||
| def name = run.getProjectName() | ||
| def result = run.getCurrentResult() | ||
| def fname = "${name}_${buildId}.json" | ||
|
|
||
| echo "${name} #${buildId} completed with status ${result}, copying JSON logs..." | ||
| echo "${name} #${buildId} completed with status ${result}, retrieving console log..." | ||
| writeFile file : 'console.txt', text: run.getRawBuild().getLog() | ||
| sh "python3 benchmarkMetric.py --benchmarkMetricsTemplate_json ${templateName} --console console.txt --fname ${fname} --testNames ${testNames}" | ||
|
|
||
| try { | ||
| timeout(time: 1, unit: 'HOURS') { | ||
| copyArtifacts( | ||
| projectName: name, | ||
| selector: specific("${buildId}"), | ||
| filter: "**/${name}_${buildId}.json", | ||
| target: "." | ||
| ) | ||
|
|
||
| } | ||
| json = readJSON file: "${name}_${buildId}.json" | ||
| archiveArtifacts artifacts: "${name}_${buildId}.json", fingerprint: true, allowEmptyArchive: false | ||
| def metricList = json.metrics['dacapo-h2'] | ||
| def runtimeMap = metricList.find{ it.containsKey('value') } | ||
| if (runtimeMap) { | ||
| runtimes << (runtimeMap.value as double) | ||
| } else { | ||
| echo "No runtime in ${name}_${buildId}.json" | ||
| try { | ||
| archiveArtifacts artifacts: fname, fingerprint: true, allowEmptyArchive: false | ||
| } catch (Exception e) { | ||
| echo "Cannot copy/process ${name}_${buildId}.json from ${name}: ${e}" | ||
| } | ||
|
|
||
| def runMetrics = readJSON file: fname | ||
|
|
||
| testList.each { test -> | ||
| aggregateMetrics[test].each { metric -> | ||
| def value = runMetrics[test][metric.key]["value"] | ||
| if (value != null) metric.value[testType]["values"] << value | ||
| } | ||
| } | ||
| } | ||
|
|
||
| def checkRegressions(aggregateMetrics, testList) { | ||
MattyWeee123 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| testloop: for (test in testList.clone()) { | ||
| for (metric in aggregateMetrics[test].entrySet()) { | ||
| def testMetrics = metric.value["test"]["values"] | ||
| def baselineMetrics = metric.value["baseline"]["values"] | ||
| if (testMetrics.size() > 0 && baselineMetrics.size() > 0) { | ||
| def testStats = getStats(testMetrics) | ||
| def baselineStats = getStats(baselineMetrics) | ||
|
|
||
| echo "testStats: ${testStats}" | ||
| echo "baselineStats: ${baselineStats}" | ||
|
|
||
| def score = (metric.value["higherbetter"]) ? testStats.mean/baselineStats.mean : baselineStats.mean/testStats.mean | ||
| score *= 100 | ||
|
|
||
| echo "score: ${score}" | ||
|
|
||
| if (score <= 98) { | ||
| currentBuild.result = 'UNSTABLE' | ||
| echo "Possible ${metric.key} regression for ${test}, set build result to UNSTABLE." | ||
| continue testloop | ||
| } | ||
| } | ||
| else { | ||
| currentBuild.result = 'UNSTABLE' | ||
| echo "${metric.key} metric for ${test} not found across all iterations. Set build result to UNSTABLE." | ||
| continue testloop | ||
| } | ||
| } catch (Exception e) { | ||
| echo "Cannot copy/process ${name}_${buildId}.json from ${name}: ${e}" | ||
| } | ||
| echo "Perf iteration for ${test} completed." | ||
| testList.remove(test) //no metrics had regression or errors, we can EXIT_EARLY this test | ||
| } | ||
| } | ||
|
|
||
| def stats (List nums) { | ||
| def n = nums.size() | ||
| def getStats (values) { | ||
| def n = values.size() | ||
| def mid = n.intdiv(2) | ||
|
|
||
| def sorted = nums.sort() | ||
| def mean = nums.sum()/n | ||
| def sorted = values.sort() | ||
| def mean = values.sum()/n | ||
| def median = (n % 2 == 1) ? sorted[mid] : (sorted[mid-1]+sorted[mid])/2 | ||
| def variance = nums.collect{(it-mean)**2}.sum()/n | ||
| def variance = values.collect{(it-mean)**2}.sum()/n | ||
| def stdev = Math.sqrt(variance as double) | ||
| [mean: mean, max: sorted[-1], min: sorted[0], median: median, std: stdev] | ||
| } | ||
|
|
||
| def getPythonDependencies (owner, branch) { | ||
| def pythonScripts = ["benchmarkMetric.py", "initBenchmarkMetrics.py", "metricConfig2JSON.py"] | ||
| pythonScripts.each { pythonScript -> | ||
| sh "curl -Os https://raw.githubusercontent.com/${owner}/aqa-tests/refs/heads/${branch}/buildenv/jenkins/${pythonScript}" | ||
karianna marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.