Skip to content
Open
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
e8b0ccc
Generalized regression monitoring for PerfPipeline.
MattyWeee123 Jul 21, 2025
065611b
iterating over shallow copy to avoid remove exception
MattyWeee123 Jul 21, 2025
828a608
if an issue is found, set build result to UNSTABLE and move on to the…
MattyWeee123 Jul 21, 2025
7715344
moved removal to correct loop, fixed spacing
MattyWeee123 Jul 21, 2025
1dd77cb
resolve perf iteration conflict
MattyWeee123 Jul 21, 2025
2f44e2d
fix syntax error
MattyWeee123 Jul 21, 2025
179e7c9
trying entrySet
MattyWeee123 Jul 21, 2025
cd8b242
else paired correctly
MattyWeee123 Jul 21, 2025
5cb87cb
continue to label testloop instead of break
MattyWeee123 Jul 21, 2025
9bb706b
integrated with params.SETUP_LABEL, params.EXIT_EARLY, and params.PRO…
MattyWeee123 Jul 28, 2025
c82a5a8
fixed logic for TARGET
MattyWeee123 Jul 28, 2025
85e17ea
fixed logic for test and baseline params
MattyWeee123 Jul 28, 2025
052d692
using on def
MattyWeee123 Jul 28, 2025
141f4f1
params.PROCESS_METRICS separate from params.EXIT_EARLY
MattyWeee123 Jul 28, 2025
39eef20
support for SETUP_LABEL, PROCESS_METRICS, and EXIT_EARLY via L1 perf …
MattyWeee123 Jul 28, 2025
355a4f0
using params.PERF_ITERATIONS directly
MattyWeee123 Jul 28, 2025
7f7a537
some scope issues
MattyWeee123 Jul 28, 2025
6abff68
switching back to hard coded machine name python as default option, o…
MattyWeee123 Jul 28, 2025
3e5e634
more scoping issue
MattyWeee123 Jul 28, 2025
fe2a0d5
fixed node
MattyWeee123 Jul 28, 2025
4735960
labeling not working, using explicit name
MattyWeee123 Jul 28, 2025
8b99b62
cast PERF_ITERATIONS
MattyWeee123 Jul 29, 2025
8c999b7
handling PROCESS METRICS, EXIT EARLY, and SETUP LABEL via Jenkins job…
MattyWeee123 Aug 1, 2025
f0c6043
added comments
MattyWeee123 Aug 10, 2025
f9ea027
Merge branch 'master' into issue6418
MattyWeee123 Aug 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions buildenv/jenkins/benchmarkMetric.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import argparse, pathlib, json, re

def main():
p = argparse.ArgumentParser()
p.add_argument("--console", required=True)
p.add_argument("--benchmarkMetricsTemplate_json", required=True)
p.add_argument("--fname", required=True)
p.add_argument("--testNames", required=True)
args = p.parse_args()

console = pathlib.Path(args.console).read_text(encoding="utf-8")

benchmarkMetricsTemplate_json = pathlib.Path(args.benchmarkMetricsTemplate_json).read_text(encoding="utf-8")
benchmarkMetricsTemplate = json.loads(benchmarkMetricsTemplate_json)

tests = args.testNames.split(",")
for test in tests:
for metric in benchmarkMetricsTemplate[test].values():
regex_parser = re.search(metric.get("regex"), console)
if not regex_parser: continue
metric.update({"value" : float(regex_parser.group(1))})

benchmarkMetricsTemplate_json = json.dumps(benchmarkMetricsTemplate)
pathlib.Path(f"{args.fname}").write_text(benchmarkMetricsTemplate_json, encoding="utf-8")

if __name__ == "__main__":
main()
41 changes: 41 additions & 0 deletions buildenv/jenkins/initBenchmarkMetrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import argparse, pathlib, json

def initBenchmarkMetrics(metricConfig, test, benchmarkMetrics):
test_info = test.split("-")
benchmarkMap = metricConfig[test_info[0]]
metricMap = benchmarkMap["metrics"]
if len(test_info) > 1:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is only one test not valid?

Copy link
Contributor Author

@MattyWeee123 MattyWeee123 Aug 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@karianna It is, it would execute benchmarkMetrics.update({test:metricMap}) instead. The perf tests follow the convention "suite name-optional variant information", so if length is one suite name alone is enough to distinguish the unique benchmark information. This matches convention of benchmarkMetrics.js in aqa-test-tools for the perf tests.

test_info is a name of a test from the test list

variant = test_info[1]
if (metricMap.get(variant) != None):
benchmarkMetrics.update({test : {variant : metricMap[variant]}})
return

benchmarkMetrics.update({test : metricMap})

def main():
p = argparse.ArgumentParser()
p.add_argument("--metricConfig_json", required=True)
p.add_argument("--testNames", required=True)
p.add_argument("--runBase", required=True)
p.add_argument("--aggrBase", required=True)
args = p.parse_args()

metricConfig_json = pathlib.Path(args.metricConfig_json).read_text(encoding="utf-8")
metricConfig = json.loads(metricConfig_json)

benchmarkMetrics = {}
tests = args.testNames.split(",")
for test in tests: initBenchmarkMetrics(metricConfig, test, benchmarkMetrics)
benchmarkMetrics_json = json.dumps(benchmarkMetrics)
pathlib.Path(f"{args.runBase}").write_text(benchmarkMetrics_json, encoding="utf-8")

for test in tests:
for metric in benchmarkMetrics[test].values():
metric.update({"test" : {"values" : []}})
metric.update({"baseline" : {"values" : []}})

benchmarkMetrics_json = json.dumps(benchmarkMetrics)
pathlib.Path(f"{args.aggrBase}").write_text(benchmarkMetrics_json, encoding="utf-8")

if __name__ == "__main__":
main()
38 changes: 38 additions & 0 deletions buildenv/jenkins/metricConfig2JSON.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import argparse, pathlib, json, re

RE_COMMENT = re.compile(r"""
//.*?$ |
/\*.*?\*/
""", re.DOTALL | re.MULTILINE | re.VERBOSE)

RE_TRAIL_COMMA = re.compile(r",(\s*[}\]])")

RE_REGEX = re.compile(r"""/((?:\\.|[^/\\])*?)/[gimsuy]*(?=\s*,)""")

RE_FUNC = re.compile(r"""(funcName:\s)(.*?)(,)""")

RE_KEYS = re.compile(r"""([,{]\s*)([A-Za-z_]\w*)(\s*:)""")

def js_to_json(metrics_js):
benchmark_parser = re.search(r"const\s+BenchmarkMetricRegex\s*=\s*({[\s\S]*?});", metrics_js)
if not benchmark_parser:
raise ValueError("BenchmarkMetricRegex not found")
obj = benchmark_parser.group(1)
obj = obj.replace("'", '"') #convert units and string keys
obj = RE_COMMENT.sub("", obj) #remove comments
obj = RE_REGEX.sub(lambda m: json.dumps(m.group(1)), obj) #convert regex
obj = RE_FUNC.sub(r'\1"\2"\3', obj) #convert funcName
obj = RE_TRAIL_COMMA.sub(r'\1', obj) #remove trailing commas after funcName and regex conversion
obj = RE_KEYS.sub(r'\1"\2"\3', obj) #convert non string keys after removing trailing commas
return obj

def main():
p = argparse.ArgumentParser()
p.add_argument("--metricConfig_js", required=True)
args = p.parse_args()
metricConfig_js = pathlib.Path(args.metricConfig_js).read_text(encoding="utf-8")
metricConfig_json = js_to_json(metricConfig_js)
pathlib.Path("metricConfig.json").write_text(metricConfig_json, encoding="utf-8")

if __name__ == "__main__":
main()
187 changes: 128 additions & 59 deletions buildenv/jenkins/perfPipeline.groovy
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
#!groovy
def testStats = [:]
def baselineStats = [:]
def testRuntimes = []
def baselineRuntimes = []

def testParams = []
def baselineParams = []
int PERF_ITERATIONS = params.PERF_ITERATIONS ? params.PERF_ITERATIONS.toInteger() : 4
boolean RUN_BASELINE = (params.RUN_BASELINE != null) ? params.RUN_BASELINE.toBoolean() : true
int PERF_ITERATIONS = params.PERF_ITERATIONS.toInteger()
boolean PROCESS_METRICS = (params.PROCESS_METRICS != null) ? params.PROCESS_METRICS.toBoolean() : false
boolean EXIT_EARLY = (params.EXIT_EARLY != null) ? params.EXIT_EARLY.toBoolean() : false

def EXIT_EARLY = (params.EXIT_EARLY) ? true : false
if (params.SETUP_LABEL) {
SETUP_LABEL = params.SETUP_LABEL
} else {
if (PROCESS_METRICS && EXIT_EARLY) {
SETUP_LABEL = "test-rhibmcloud-rhel9-x64-1"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That seems very tightly coupled / brittle to be pinned to one machine only

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@llxia @karianna I wasn't sure what other machines had python3. The other rhel machine didn't work, so I stuck to rhel9.

} else {
SETUP_LABEL = "ci.role.test&&hw.arch.x86&&sw.os.linux"
}
}

// loop through all the params and change the parameters if needed
params.each { param ->
Expand Down Expand Up @@ -38,48 +45,78 @@ params.each { param ->
}
}

node ("ci.role.test&&hw.arch.x86&&sw.os.linux") {
node (SETUP_LABEL) {
timestamps {
try {
["TARGET", "BUILD_LIST", "PLATFORM", "LABEL"].each { key ->
def metrics = [:]
def testList = []
def testNames = null
def testRun = null
def baseRun = null
def runBase = "runBase.json"
def aggrBase = "aggrBase.json"

["BUILD_LIST", "PLATFORM", "LABEL"].each { key ->
[testParams, baselineParams].each { list ->
list << string(name: key, value: params."${key}")
}
}

if (PROCESS_METRICS) {
def owner = params.ADOPTOPENJDK_REPO.tokenize('/')[2]
getPythonDependencies(owner, params.ADOPTOPENJDK_BRANCH)
sh "curl -Os https://raw.githubusercontent.com/adoptium/aqa-test-tools/refs/heads/master/TestResultSummaryService/parsers/BenchmarkMetric.js"
sh "python3 metricConfig2JSON.py --metricConfig_js BenchmarkMetric.js"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should Python be invoking this in a virtual env?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sh "python3 initBenchmarkMetrics.py --metricConfig_json metricConfig.json --testNames ${params.TARGET.split("=")[1]} --runBase ${runBase} --aggrBase ${aggrBase}"
testList = params.TARGET.split("=")[1].tokenize(",")
metrics = readJSON file: aggrBase
}
else {
testParams << string(name: "TARGET", value: params.TARGET)
baselineParams << string(name: "TARGET", value: params.TARGET)
}

echo "starting to trigger build..."
lock(resource: params.LABEL) {
for (int i = 0; i < PERF_ITERATIONS; i++) {
//clone to avoid mutation
def thisTestParams = testParams.collect()
def thisBaselineParams = baselineParams.collect()
if (PROCESS_METRICS) {
//set the target, testlist should change if some metrics regress while others do not
testNames = testList.join(",")
def TARGET = params.TARGET.replaceFirst(/(?<=TESTLIST=)[^ ]+/, testNames)
thisTestParams << string(name: "TARGET", value: TARGET)
thisBaselineParams << string(name: "TARGET", value: TARGET)
}

// test
testParams << string(name: "TEST_NUM", value: "TEST_NUM" + i.toString())
def testRun = triggerJob(params.BENCHMARK, params.PLATFORM, testParams, "test")
aggregateLogs(testRun, testRuntimes)
testRun = triggerJob(params.BENCHMARK, params.PLATFORM, thisTestParams, "test")

// baseline
if (RUN_BASELINE) {
baselineParams << string(name: "BASELINE_NUM", value: "BASELINE_NUM_" + i.toString())
def baseRun = triggerJob(params.BENCHMARK, params.PLATFORM, baselineParams, "baseline")
aggregateLogs(baseRun, baselineRuntimes)
baseRun = triggerJob(params.BENCHMARK, params.PLATFORM, thisBaselineParams, "baseline")

} else {
echo "Skipping baseline run since RUN_BASELINE is set to false"
}
if (params.TARGET && params.TARGET.contains('dacapo')) {
testStats = stats(testRuntimes)
baselineStats = stats(baselineRuntimes)
def score = (testStats.mean/baselineStats.mean) * 100

echo "testRuntimes: ${testRuntimes}"
echo "baselineRuntimes: ${baselineRuntimes}"
echo "score: ${score} %"

if (i == PERF_ITERATIONS || (EXIT_EARLY && i >= PERF_ITERATIONS * 0.8)) {
if (score <= 98) {
currentBuild.result = 'UNSTABLE'
echo "Possible regression, set build result to UNSTABLE."

if (PROCESS_METRICS) {
aggregateLogs(testRun, testNames, testList, runBase, metrics, "test")
aggregateLogs(baseRun, testNames, testList, runBase, metrics, "baseline")
writeJSON file: "metrics.json", json: metrics, pretty: 4
archiveArtifacts artifacts: "metrics.json"
if (i == PERF_ITERATIONS-1 || (EXIT_EARLY && i >= PERF_ITERATIONS * 0.8)) {
if (i == PERF_ITERATIONS-1) {
echo "All iterations completed"
} else {
echo "Perf iteration completed. EXIT_EARLY: ${EXIT_EARLY}. PERF_ITERATIONS: ${PERF_ITERATIONS}. Actual iterations: ${i}."
break
echo "Attempting early exit"
}
echo "checking for regressions"
checkRegressions(metrics, testList)
if (testList.size() == 0) break
}
}
}
Expand Down Expand Up @@ -120,49 +157,81 @@ def generateChildJobViaAutoGen(newJobName) {
build job: 'Test_Job_Auto_Gen', parameters: jobParams, propagate: true
}

def aggregateLogs(run, runtimes) {
def aggregateLogs(run, testNames, testList, templateName, aggregateMetrics, testType) {
def json
node("ci.role.test&&hw.arch.x86&&sw.os.linux") {
def buildId = run.getRawBuild().getNumber()
def name = run.getProjectName()
def result = run.getCurrentResult()
def buildId = run.getRawBuild().getNumber()
def name = run.getProjectName()
def result = run.getCurrentResult()
def fname = "${name}_${buildId}.json"

echo "${name} #${buildId} completed with status ${result}, copying JSON logs..."
echo "${name} #${buildId} completed with status ${result}, retrieving console log..."
writeFile file : 'console.txt', text: run.getRawBuild().getLog()
sh "python3 benchmarkMetric.py --benchmarkMetricsTemplate_json ${templateName} --console console.txt --fname ${fname} --testNames ${testNames}"

try {
timeout(time: 1, unit: 'HOURS') {
copyArtifacts(
projectName: name,
selector: specific("${buildId}"),
filter: "**/${name}_${buildId}.json",
target: "."
)

}
json = readJSON file: "${name}_${buildId}.json"
archiveArtifacts artifacts: "${name}_${buildId}.json", fingerprint: true, allowEmptyArchive: false
def metricList = json.metrics['dacapo-h2']
def runtimeMap = metricList.find{ it.containsKey('value') }
if (runtimeMap) {
runtimes << (runtimeMap.value as double)
} else {
echo "No runtime in ${name}_${buildId}.json"
try {
archiveArtifacts artifacts: fname, fingerprint: true, allowEmptyArchive: false
} catch (Exception e) {
echo "Cannot copy/process ${name}_${buildId}.json from ${name}: ${e}"
}

def runMetrics = readJSON file: fname

testList.each { test ->
aggregateMetrics[test].each { metric ->
def value = runMetrics[test][metric.key]["value"]
if (value != null) metric.value[testType]["values"] << value
}
}
}

def checkRegressions(aggregateMetrics, testList) {
testloop: for (test in testList.clone()) {
for (metric in aggregateMetrics[test].entrySet()) {
def testMetrics = metric.value["test"]["values"]
def baselineMetrics = metric.value["baseline"]["values"]
if (testMetrics.size() > 0 && baselineMetrics.size() > 0) {
def testStats = getStats(testMetrics)
def baselineStats = getStats(baselineMetrics)

echo "testStats: ${testStats}"
echo "baselineStats: ${baselineStats}"

def score = (metric.value["higherbetter"]) ? testStats.mean/baselineStats.mean : baselineStats.mean/testStats.mean
score *= 100

echo "score: ${score}"

if (score <= 98) {
currentBuild.result = 'UNSTABLE'
echo "Possible ${metric.key} regression for ${test}, set build result to UNSTABLE."
continue testloop
}
}
else {
currentBuild.result = 'UNSTABLE'
echo "${metric.key} metric for ${test} not found across all iterations. Set build result to UNSTABLE."
continue testloop
}
} catch (Exception e) {
echo "Cannot copy/process ${name}_${buildId}.json from ${name}: ${e}"
}
echo "Perf iteration for ${test} completed."
testList.remove(test) //no metrics had regression or errors, we can EXIT_EARLY this test
}
}

def stats (List nums) {
def n = nums.size()
def getStats (values) {
def n = values.size()
def mid = n.intdiv(2)

def sorted = nums.sort()
def mean = nums.sum()/n
def sorted = values.sort()
def mean = values.sum()/n
def median = (n % 2 == 1) ? sorted[mid] : (sorted[mid-1]+sorted[mid])/2
def variance = nums.collect{(it-mean)**2}.sum()/n
def variance = values.collect{(it-mean)**2}.sum()/n
def stdev = Math.sqrt(variance as double)
[mean: mean, max: sorted[-1], min: sorted[0], median: median, std: stdev]
}

def getPythonDependencies (owner, branch) {
def pythonScripts = ["benchmarkMetric.py", "initBenchmarkMetrics.py", "metricConfig2JSON.py"]
pythonScripts.each { pythonScript ->
sh "curl -Os https://raw.githubusercontent.com/${owner}/aqa-tests/refs/heads/${branch}/buildenv/jenkins/${pythonScript}"
}
}
5 changes: 1 addition & 4 deletions buildenv/jenkins/perfPipeline_root.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,11 @@ params.each { param ->
}
node("worker || (ci.role.test&&hw.arch.x86&&sw.os.linux)") {
perfConfigJson.each { item ->
def BENCHMARK = item.BENCHMARK
def TARGET = item.TARGET
def BUILD_LIST = item.BUILD_LIST
def PLATMACHINE_MAP = item.PLAT_MACHINE_MAP
def baseParams = childParams.collect()
baseParams << string(name: "BENCHMARK", value: item.BENCHMARK)
baseParams << string(name: "TARGET", value: item.TARGET)
baseParams << string(name: "BUILD_LIST", value: item.BUILD_LIST)
baseParams << string(name: "PERF_ITERATIONS", value: item.PERF_ITERATIONS ? item.PERF_ITERATIONS.toString() : "4")

item.PLAT_MACHINE_MAP.each { kv ->
kv.each {p, m ->
Expand Down