Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
e8b0ccc
Generalized regression monitoring for PerfPipeline.
MattyWeee123 Jul 21, 2025
065611b
iterating over shallow copy to avoid remove exception
MattyWeee123 Jul 21, 2025
828a608
if an issue is found, set build result to UNSTABLE and move on to the…
MattyWeee123 Jul 21, 2025
7715344
moved removal to correct loop, fixed spacing
MattyWeee123 Jul 21, 2025
1dd77cb
resolve perf iteration conflict
MattyWeee123 Jul 21, 2025
2f44e2d
fix syntax error
MattyWeee123 Jul 21, 2025
179e7c9
trying entrySet
MattyWeee123 Jul 21, 2025
cd8b242
else paired correctly
MattyWeee123 Jul 21, 2025
5cb87cb
continue to label testloop instead of break
MattyWeee123 Jul 21, 2025
9bb706b
integrated with params.SETUP_LABEL, params.EXIT_EARLY, and params.PRO…
MattyWeee123 Jul 28, 2025
c82a5a8
fixed logic for TARGET
MattyWeee123 Jul 28, 2025
85e17ea
fixed logic for test and baseline params
MattyWeee123 Jul 28, 2025
052d692
using on def
MattyWeee123 Jul 28, 2025
141f4f1
params.PROCESS_METRICS separate from params.EXIT_EARLY
MattyWeee123 Jul 28, 2025
39eef20
support for SETUP_LABEL, PROCESS_METRICS, and EXIT_EARLY via L1 perf …
MattyWeee123 Jul 28, 2025
355a4f0
using params.PERF_ITERATIONS directly
MattyWeee123 Jul 28, 2025
7f7a537
some scope issues
MattyWeee123 Jul 28, 2025
6abff68
switching back to hard coded machine name python as default option, o…
MattyWeee123 Jul 28, 2025
3e5e634
more scoping issue
MattyWeee123 Jul 28, 2025
fe2a0d5
fixed node
MattyWeee123 Jul 28, 2025
4735960
labeling not working, using explicit name
MattyWeee123 Jul 28, 2025
8b99b62
cast PERF_ITERATIONS
MattyWeee123 Jul 29, 2025
8c999b7
handling PROCESS METRICS, EXIT EARLY, and SETUP LABEL via Jenkins job…
MattyWeee123 Aug 1, 2025
f0c6043
added comments
MattyWeee123 Aug 10, 2025
f9ea027
Merge branch 'master' into issue6418
MattyWeee123 Aug 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions buildenv/jenkins/benchmarkMetric.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import argparse, pathlib, json, re

def main():
p = argparse.ArgumentParser()
p.add_argument("--console", required=True)
p.add_argument("--benchmarkMetricsTemplate_json", required=True)
p.add_argument("--fname", required=True)
p.add_argument("--testNames", required=True)
args = p.parse_args()

console = pathlib.Path(args.console).read_text(encoding="utf-8")

benchmarkMetricsTemplate_json = pathlib.Path(args.benchmarkMetricsTemplate_json).read_text(encoding="utf-8")
benchmarkMetricsTemplate = json.loads(benchmarkMetricsTemplate_json)

tests = args.testNames.split(",")

#populate the template file with corresponding metrics extracted from console log
for test in tests:
for metric in benchmarkMetricsTemplate[test].values():
regex_parser = re.search(metric.get("regex"), console)
if not regex_parser: continue
metric.update({"value" : float(regex_parser.group(1))})

benchmarkMetricsTemplate_json = json.dumps(benchmarkMetricsTemplate)
pathlib.Path(f"{args.fname}").write_text(benchmarkMetricsTemplate_json, encoding="utf-8")

if __name__ == "__main__":
main()
44 changes: 44 additions & 0 deletions buildenv/jenkins/initBenchmarkMetrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import argparse, pathlib, json

#extract necessary benchmark information from metricConfig based on test, and
#update benchmarkMetrics such that it is optimal for later processing
def initBenchmarkMetrics(metricConfig, test, benchmarkMetrics):
test_info = test.split("-")
benchmarkMap = metricConfig[test_info[0]] #index by general test category
metricMap = benchmarkMap["metrics"]
if len(test_info) > 1: #if there is a variant, grab it directly
variant = test_info[1]
if (metricMap.get(variant) != None):
benchmarkMetrics.update({test : {variant : metricMap[variant]}})
return

#if there is no variant, we take the metricMap to already contain the unique information needed for test
benchmarkMetrics.update({test : metricMap})

def main():
p = argparse.ArgumentParser()
p.add_argument("--metricConfig_json", required=True)
p.add_argument("--testNames", required=True)
p.add_argument("--runBase", required=True)
p.add_argument("--aggrBase", required=True)
args = p.parse_args()

metricConfig_json = pathlib.Path(args.metricConfig_json).read_text(encoding="utf-8")
metricConfig = json.loads(metricConfig_json)

benchmarkMetrics = {}
tests = args.testNames.split(",")
for test in tests: initBenchmarkMetrics(metricConfig, test, benchmarkMetrics)
benchmarkMetrics_json = json.dumps(benchmarkMetrics)
pathlib.Path(f"{args.runBase}").write_text(benchmarkMetrics_json, encoding="utf-8") #serves as template populated by a single run

for test in tests:
for metric in benchmarkMetrics[test].values():
metric.update({"test" : {"values" : []}})
metric.update({"baseline" : {"values" : []}})

benchmarkMetrics_json = json.dumps(benchmarkMetrics)
pathlib.Path(f"{args.aggrBase}").write_text(benchmarkMetrics_json, encoding="utf-8") #serves as aggregate file populated by all runs

if __name__ == "__main__":
main()
41 changes: 41 additions & 0 deletions buildenv/jenkins/metricConfig2JSON.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import argparse, pathlib, json, re

#regex used to convert BenchmarkMetric.js into valid JSON file
RE_COMMENT = re.compile(r"""
//.*?$ |
/\*.*?\*/
""", re.DOTALL | re.MULTILINE | re.VERBOSE)

RE_TRAIL_COMMA = re.compile(r",(\s*[}\]])")

RE_REGEX = re.compile(r"""/((?:\\.|[^/\\])*?)/[gimsuy]*(?=\s*,)""")

RE_FUNC = re.compile(r"""(funcName:\s)(.*?)(,)""")

RE_KEYS = re.compile(r"""([,{]\s*)([A-Za-z_]\w*)(\s*:)""")

#parses the BenchmarkMetric.js file by grabbing the BenchmarkMetricRegex element,
#removing comments, and converting to proper JSON syntax
def js_to_json(metrics_js):
benchmark_parser = re.search(r"const\s+BenchmarkMetricRegex\s*=\s*({[\s\S]*?});", metrics_js)
if not benchmark_parser:
raise ValueError("BenchmarkMetricRegex not found")
obj = benchmark_parser.group(1)
obj = obj.replace("'", '"') #convert units and string keys
obj = RE_COMMENT.sub("", obj) #remove comments
obj = RE_REGEX.sub(lambda m: json.dumps(m.group(1)), obj) #convert regex
obj = RE_FUNC.sub(r'\1"\2"\3', obj) #convert funcName
obj = RE_TRAIL_COMMA.sub(r'\1', obj) #remove trailing commas after funcName and regex conversion
obj = RE_KEYS.sub(r'\1"\2"\3', obj) #convert non string keys after removing trailing commas
return obj

def main():
p = argparse.ArgumentParser()
p.add_argument("--metricConfig_js", required=True)
args = p.parse_args()
metricConfig_js = pathlib.Path(args.metricConfig_js).read_text(encoding="utf-8")
metricConfig_json = js_to_json(metricConfig_js)
pathlib.Path("metricConfig.json").write_text(metricConfig_json, encoding="utf-8")

if __name__ == "__main__":
main()
190 changes: 131 additions & 59 deletions buildenv/jenkins/perfPipeline.groovy
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
#!groovy
def testStats = [:]
def baselineStats = [:]
def testRuntimes = []
def baselineRuntimes = []

def testParams = []
def baselineParams = []
int PERF_ITERATIONS = params.PERF_ITERATIONS ? params.PERF_ITERATIONS.toInteger() : 4
boolean RUN_BASELINE = (params.RUN_BASELINE != null) ? params.RUN_BASELINE.toBoolean() : true
int PERF_ITERATIONS = params.PERF_ITERATIONS.toInteger()
boolean PROCESS_METRICS = (params.PROCESS_METRICS != null) ? params.PROCESS_METRICS.toBoolean() : false
boolean EXIT_EARLY = (params.EXIT_EARLY != null) ? params.EXIT_EARLY.toBoolean() : false

def EXIT_EARLY = (params.EXIT_EARLY) ? true : false
if (params.SETUP_LABEL) {
SETUP_LABEL = params.SETUP_LABEL
} else {
if (PROCESS_METRICS && EXIT_EARLY) {
SETUP_LABEL = "test-rhibmcloud-rhel9-x64-1" //machine needs python
} else {
SETUP_LABEL = "ci.role.test&&hw.arch.x86&&sw.os.linux"
}
}

// loop through all the params and change the parameters if needed
params.each { param ->
Expand Down Expand Up @@ -38,48 +45,81 @@ params.each { param ->
}
}

node ("ci.role.test&&hw.arch.x86&&sw.os.linux") {
node (SETUP_LABEL) {
timestamps {
try {
["TARGET", "BUILD_LIST", "PLATFORM", "LABEL"].each { key ->
def metrics = [:]
def testList = []
def testNames = null
def testRun = null
def baseRun = null
def runBase = "runBase.json"
def aggrBase = "aggrBase.json"

["BUILD_LIST", "PLATFORM", "LABEL"].each { key ->
[testParams, baselineParams].each { list ->
list << string(name: key, value: params."${key}")
}
}

if (PROCESS_METRICS) { //convert BenchmarkMetric.js to a JSON file optimized for metric processing
def owner = params.ADOPTOPENJDK_REPO.tokenize('/')[2]
getPythonDependencies(owner, params.ADOPTOPENJDK_BRANCH)
sh "curl -Os https://raw.githubusercontent.com/adoptium/aqa-test-tools/refs/heads/master/TestResultSummaryService/parsers/BenchmarkMetric.js"
sh "python3 metricConfig2JSON.py --metricConfig_js BenchmarkMetric.js"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should Python be invoking this in a virtual env?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sh "python3 initBenchmarkMetrics.py --metricConfig_json metricConfig.json --testNames ${params.TARGET.split("=")[1]} --runBase ${runBase} --aggrBase ${aggrBase}"
testList = params.TARGET.split("=")[1].tokenize(",")
metrics = readJSON file: aggrBase
}

if (!EXIT_EARLY) {
testParams << string(name: "TARGET", value: params.TARGET)
baselineParams << string(name: "TARGET", value: params.TARGET)
}

echo "starting to trigger build..."
lock(resource: params.LABEL) {
for (int i = 0; i < PERF_ITERATIONS; i++) {
//clone to avoid mutation
def thisTestParams = testParams.collect()
def thisBaselineParams = baselineParams.collect()
if (EXIT_EARLY) {
//update TARGET, testlist should hold metrics that were not exited early
testNames = testList.join(",")
def TARGET = params.TARGET.replaceFirst(/(?<=TESTLIST=)[^ ]+/, testNames)
thisTestParams << string(name: "TARGET", value: TARGET)
thisBaselineParams << string(name: "TARGET", value: TARGET)
}

// test
testParams << string(name: "TEST_NUM", value: "TEST_NUM" + i.toString())
def testRun = triggerJob(params.BENCHMARK, params.PLATFORM, testParams, "test")
aggregateLogs(testRun, testRuntimes)
testRun = triggerJob(params.BENCHMARK, params.PLATFORM, thisTestParams, "test")

// baseline
if (RUN_BASELINE) {
baselineParams << string(name: "BASELINE_NUM", value: "BASELINE_NUM_" + i.toString())
def baseRun = triggerJob(params.BENCHMARK, params.PLATFORM, baselineParams, "baseline")
aggregateLogs(baseRun, baselineRuntimes)
baseRun = triggerJob(params.BENCHMARK, params.PLATFORM, thisBaselineParams, "baseline")

} else {
echo "Skipping baseline run since RUN_BASELINE is set to false"
}
if (params.TARGET && params.TARGET.contains('dacapo')) {
testStats = stats(testRuntimes)
baselineStats = stats(baselineRuntimes)
def score = (testStats.mean/baselineStats.mean) * 100

echo "testRuntimes: ${testRuntimes}"
echo "baselineRuntimes: ${baselineRuntimes}"
echo "score: ${score} %"

if (i == PERF_ITERATIONS || (EXIT_EARLY && i >= PERF_ITERATIONS * 0.8)) {
if (score <= 98) {
currentBuild.result = 'UNSTABLE'
echo "Possible regression, set build result to UNSTABLE."

if (PROCESS_METRICS) {
aggregateLogs(testRun, testNames, testList, runBase, metrics, "test")
aggregateLogs(baseRun, testNames, testList, runBase, metrics, "baseline")
writeJSON file: "metrics.json", json: metrics, pretty: 4
archiveArtifacts artifacts: "metrics.json"

//if we are on the final iteration, or we have executed enough iterations to decide likelihood of regression and have permission to exit early
if (i == PERF_ITERATIONS-1 || (EXIT_EARLY && i >= PERF_ITERATIONS * 0.8)) {
if (i == PERF_ITERATIONS-1) {
echo "All iterations completed"
} else {
echo "Perf iteration completed. EXIT_EARLY: ${EXIT_EARLY}. PERF_ITERATIONS: ${PERF_ITERATIONS}. Actual iterations: ${i}."
break
echo "Attempting early exit"
}
echo "checking for regressions"
checkRegressions(metrics, testList) //compute relevant performance stats, check for regression
if (testList.size() == 0) break //if all tests have been exited early we can end testing
}
}
}
Expand Down Expand Up @@ -120,49 +160,81 @@ def generateChildJobViaAutoGen(newJobName) {
build job: 'Test_Job_Auto_Gen', parameters: jobParams, propagate: true
}

def aggregateLogs(run, runtimes) {
def aggregateLogs(run, testNames, testList, templateName, aggregateMetrics, testType) {
def json
node("ci.role.test&&hw.arch.x86&&sw.os.linux") {
def buildId = run.getRawBuild().getNumber()
def name = run.getProjectName()
def result = run.getCurrentResult()
def buildId = run.getRawBuild().getNumber()
def name = run.getProjectName()
def result = run.getCurrentResult()
def fname = "${name}_${buildId}.json"

echo "${name} #${buildId} completed with status ${result}, copying JSON logs..."
echo "${name} #${buildId} completed with status ${result}, retrieving console log..."
writeFile file : 'console.txt', text: run.getRawBuild().getLog()
sh "python3 benchmarkMetric.py --benchmarkMetricsTemplate_json ${templateName} --console console.txt --fname ${fname} --testNames ${testNames}"

try {
timeout(time: 1, unit: 'HOURS') {
copyArtifacts(
projectName: name,
selector: specific("${buildId}"),
filter: "**/${name}_${buildId}.json",
target: "."
)

}
json = readJSON file: "${name}_${buildId}.json"
archiveArtifacts artifacts: "${name}_${buildId}.json", fingerprint: true, allowEmptyArchive: false
def metricList = json.metrics['dacapo-h2']
def runtimeMap = metricList.find{ it.containsKey('value') }
if (runtimeMap) {
runtimes << (runtimeMap.value as double)
} else {
echo "No runtime in ${name}_${buildId}.json"
try {
archiveArtifacts artifacts: fname, fingerprint: true, allowEmptyArchive: false
} catch (Exception e) {
echo "Cannot copy/process ${name}_${buildId}.json from ${name}: ${e}"
}

def runMetrics = readJSON file: fname

testList.each { test ->
aggregateMetrics[test].each { metric ->
def value = runMetrics[test][metric.key]["value"]
if (value != null) metric.value[testType]["values"] << value
}
}
}

def checkRegressions(aggregateMetrics, testList) {
testloop: for (test in testList.clone()) {
for (metric in aggregateMetrics[test].entrySet()) {
def testMetrics = metric.value["test"]["values"]
def baselineMetrics = metric.value["baseline"]["values"]
if (testMetrics.size() > 0 && baselineMetrics.size() > 0) {
def testStats = getStats(testMetrics)
def baselineStats = getStats(baselineMetrics)

echo "testStats: ${testStats}"
echo "baselineStats: ${baselineStats}"

def score = (metric.value["higherbetter"]) ? testStats.mean/baselineStats.mean : baselineStats.mean/testStats.mean
score *= 100

echo "score: ${score}"

if (score <= 98) {
currentBuild.result = 'UNSTABLE'
echo "Possible ${metric.key} regression for ${test}, set build result to UNSTABLE."
continue testloop
}
}
else {
currentBuild.result = 'UNSTABLE'
echo "${metric.key} metric for ${test} not found across all iterations. Set build result to UNSTABLE."
continue testloop
}
} catch (Exception e) {
echo "Cannot copy/process ${name}_${buildId}.json from ${name}: ${e}"
}
echo "Perf iteration for ${test} completed."
testList.remove(test) //no metrics had regression or errors, we can EXIT_EARLY this test
}
}

def stats (List nums) {
def n = nums.size()
def getStats (values) {
def n = values.size()
def mid = n.intdiv(2)

def sorted = nums.sort()
def mean = nums.sum()/n
def sorted = values.sort()
def mean = values.sum()/n
def median = (n % 2 == 1) ? sorted[mid] : (sorted[mid-1]+sorted[mid])/2
def variance = nums.collect{(it-mean)**2}.sum()/n
def variance = values.collect{(it-mean)**2}.sum()/n
def stdev = Math.sqrt(variance as double)
[mean: mean, max: sorted[-1], min: sorted[0], median: median, std: stdev]
}

def getPythonDependencies (owner, branch) {
def pythonScripts = ["benchmarkMetric.py", "initBenchmarkMetrics.py", "metricConfig2JSON.py"]
pythonScripts.each { pythonScript ->
sh "curl -Os https://raw.githubusercontent.com/${owner}/aqa-tests/refs/heads/${branch}/buildenv/jenkins/${pythonScript}"
}
}
Loading