adoptium · MattyWeee123 · Jul 21, 2025 · Jul 21, 2025 · Jul 21, 2025 · Jul 21, 2025
diff --git a/buildenv/jenkins/benchmarkMetric.py b/buildenv/jenkins/benchmarkMetric.py
@@ -0,0 +1,29 @@
+import argparse, pathlib, json, re
+
+def main():
+        p = argparse.ArgumentParser()
+        p.add_argument("--console", required=True)
+        p.add_argument("--benchmarkMetricsTemplate_json", required=True)
+        p.add_argument("--fname", required=True)
+        p.add_argument("--testNames", required=True)
+        args = p.parse_args()
+
+        console = pathlib.Path(args.console).read_text(encoding="utf-8")
+
+        benchmarkMetricsTemplate_json = pathlib.Path(args.benchmarkMetricsTemplate_json).read_text(encoding="utf-8")
+        benchmarkMetricsTemplate = json.loads(benchmarkMetricsTemplate_json)
+
+        tests = args.testNames.split(",") 
+
+        #populate the template file with corresponding metrics extracted from console log
+        for test in tests: 
+                for metric in benchmarkMetricsTemplate[test].values():
+                        regex_parser = re.search(metric.get("regex"), console)
+                        if not regex_parser: continue 
+                        metric.update({"value" : float(regex_parser.group(1))})
+
+        benchmarkMetricsTemplate_json = json.dumps(benchmarkMetricsTemplate)
+        pathlib.Path(f"{args.fname}").write_text(benchmarkMetricsTemplate_json, encoding="utf-8")
+
+if __name__ == "__main__":
+        main()
diff --git a/buildenv/jenkins/initBenchmarkMetrics.py b/buildenv/jenkins/initBenchmarkMetrics.py
@@ -0,0 +1,44 @@
+import argparse, pathlib, json
+
+#extract necessary benchmark information from metricConfig based on test, and 
+#update benchmarkMetrics such that it is optimal for later processing 
+def initBenchmarkMetrics(metricConfig, test, benchmarkMetrics): 
+        test_info = test.split("-")
+        benchmarkMap = metricConfig[test_info[0]] #index by general test category
+        metricMap = benchmarkMap["metrics"]
+        if len(test_info) > 1: #if there is a variant, grab it directly 
+                variant = test_info[1]
+                if (metricMap.get(variant) != None): 
+                        benchmarkMetrics.update({test : {variant : metricMap[variant]}})
+                        return 
+
+        #if there is no variant, we take the metricMap to already contain the unique information needed for test
+        benchmarkMetrics.update({test : metricMap})
+
+def main():
+        p = argparse.ArgumentParser()
+        p.add_argument("--metricConfig_json", required=True)
+        p.add_argument("--testNames", required=True)
+        p.add_argument("--runBase", required=True)
+        p.add_argument("--aggrBase", required=True)
+        args = p.parse_args()
+
+        metricConfig_json = pathlib.Path(args.metricConfig_json).read_text(encoding="utf-8")
+        metricConfig = json.loads(metricConfig_json)
+
+        benchmarkMetrics = {}
+        tests = args.testNames.split(",")
+        for test in tests: initBenchmarkMetrics(metricConfig, test, benchmarkMetrics)
+        benchmarkMetrics_json = json.dumps(benchmarkMetrics)
+        pathlib.Path(f"{args.runBase}").write_text(benchmarkMetrics_json, encoding="utf-8") #serves as template populated by a single run
+
+        for test in tests: 
+                for metric in benchmarkMetrics[test].values():
+                        metric.update({"test" : {"values" : []}})
+                        metric.update({"baseline" : {"values" : []}})
+
+        benchmarkMetrics_json = json.dumps(benchmarkMetrics)
+        pathlib.Path(f"{args.aggrBase}").write_text(benchmarkMetrics_json, encoding="utf-8") #serves as aggregate file populated by all runs
+
+if __name__ == "__main__":
+        main()
diff --git a/buildenv/jenkins/metricConfig2JSON.py b/buildenv/jenkins/metricConfig2JSON.py
@@ -0,0 +1,41 @@
+import argparse, pathlib, json, re
+
+#regex used to convert BenchmarkMetric.js into valid JSON file
+RE_COMMENT = re.compile(r"""
+        //.*?$ |
+        /\*.*?\*/
+        """, re.DOTALL | re.MULTILINE | re.VERBOSE)
+
+RE_TRAIL_COMMA = re.compile(r",(\s*[}\]])")
+
+RE_REGEX = re.compile(r"""/((?:\\.|[^/\\])*?)/[gimsuy]*(?=\s*,)""")
+
+RE_FUNC = re.compile(r"""(funcName:\s)(.*?)(,)""")
+
+RE_KEYS = re.compile(r"""([,{]\s*)([A-Za-z_]\w*)(\s*:)""")
+
+#parses the BenchmarkMetric.js file by grabbing the BenchmarkMetricRegex element, 
+#removing comments, and converting to proper JSON syntax 
+def js_to_json(metrics_js):
+        benchmark_parser = re.search(r"const\s+BenchmarkMetricRegex\s*=\s*({[\s\S]*?});", metrics_js) 
+        if not benchmark_parser:
+                raise ValueError("BenchmarkMetricRegex not found")
+        obj = benchmark_parser.group(1)
+        obj = obj.replace("'", '"') #convert units and string keys 
+        obj = RE_COMMENT.sub("", obj) #remove comments 
+        obj = RE_REGEX.sub(lambda m: json.dumps(m.group(1)), obj) #convert regex 
+        obj = RE_FUNC.sub(r'\1"\2"\3', obj) #convert funcName
+        obj = RE_TRAIL_COMMA.sub(r'\1', obj) #remove trailing commas after funcName and regex conversion
+        obj = RE_KEYS.sub(r'\1"\2"\3', obj) #convert non string keys after removing trailing commas
+        return obj
+
+def main():
+        p = argparse.ArgumentParser()
+        p.add_argument("--metricConfig_js", required=True)
+        args = p.parse_args()
+        metricConfig_js = pathlib.Path(args.metricConfig_js).read_text(encoding="utf-8")
+        metricConfig_json = js_to_json(metricConfig_js)
+        pathlib.Path("metricConfig.json").write_text(metricConfig_json, encoding="utf-8")
+
+if __name__ == "__main__":
+        main()
diff --git a/buildenv/jenkins/perfPipeline.groovy b/buildenv/jenkins/perfPipeline.groovy
@@ -1,14 +1,21 @@
 #!groovy
-def testStats = [:]
-def baselineStats = [:]
-def testRuntimes = []
-def baselineRuntimes = []
+
 def testParams = []
 def baselineParams = []
-int PERF_ITERATIONS = params.PERF_ITERATIONS ? params.PERF_ITERATIONS.toInteger() : 4
 boolean RUN_BASELINE = (params.RUN_BASELINE != null) ? params.RUN_BASELINE.toBoolean() : true
+int PERF_ITERATIONS = params.PERF_ITERATIONS.toInteger()
+boolean PROCESS_METRICS = (params.PROCESS_METRICS != null) ? params.PROCESS_METRICS.toBoolean() : false 
+boolean EXIT_EARLY = (params.EXIT_EARLY != null) ? params.EXIT_EARLY.toBoolean() : false 
 
-def EXIT_EARLY = (params.EXIT_EARLY) ? true : false 
+if (params.SETUP_LABEL) {
+    SETUP_LABEL = params.SETUP_LABEL
+} else {
+    if (PROCESS_METRICS && EXIT_EARLY) {
+       SETUP_LABEL = "test-rhibmcloud-rhel9-x64-1" //machine needs python
+   } else {
+       SETUP_LABEL = "ci.role.test&&hw.arch.x86&&sw.os.linux"
+   }
+}
 
 // loop through all the params and change the parameters if needed
 params.each { param ->
@@ -38,48 +45,81 @@ params.each { param ->
     }
 }
 
-node ("ci.role.test&&hw.arch.x86&&sw.os.linux") {
+node (SETUP_LABEL) {
         timestamps {
                 try {
-                        ["TARGET", "BUILD_LIST", "PLATFORM", "LABEL"].each { key ->
+                        def metrics = [:]
+                        def testList = []
+                        def testNames = null 
+                        def testRun = null
+                        def baseRun = null 
+                        def runBase = "runBase.json"
+                        def aggrBase = "aggrBase.json"
+
+                        ["BUILD_LIST", "PLATFORM", "LABEL"].each { key ->
                                 [testParams, baselineParams].each { list ->
                                         list << string(name: key, value: params."${key}")
                                 }
                         }
+
+                        if (PROCESS_METRICS) { //convert BenchmarkMetric.js to a JSON file optimized for metric processing 
+                                def owner = params.ADOPTOPENJDK_REPO.tokenize('/')[2]
+                                getPythonDependencies(owner, params.ADOPTOPENJDK_BRANCH) 
+                                sh "curl -Os  https://raw.githubusercontent.com/adoptium/aqa-test-tools/refs/heads/master/TestResultSummaryService/parsers/BenchmarkMetric.js"
+                                sh "python3 metricConfig2JSON.py --metricConfig_js BenchmarkMetric.js"
+                                sh "python3 initBenchmarkMetrics.py --metricConfig_json metricConfig.json --testNames ${params.TARGET.split("=")[1]} --runBase ${runBase} --aggrBase ${aggrBase}"
+                                testList = params.TARGET.split("=")[1].tokenize(",")
+                                metrics = readJSON file: aggrBase
+                        }
+
+                        if (!EXIT_EARLY) {
+                                testParams << string(name: "TARGET", value: params.TARGET) 
+                                baselineParams << string(name: "TARGET", value: params.TARGET)
+                        }
 
                         echo "starting to trigger build..."
                         lock(resource: params.LABEL) {
                                 for (int i = 0; i < PERF_ITERATIONS; i++) {
+                                        //clone to avoid mutation
+                                        def thisTestParams = testParams.collect()
+                                        def thisBaselineParams = baselineParams.collect()
+                                        if (EXIT_EARLY) {     
+                                                //update TARGET, testlist should hold metrics that were not exited early
+                                                testNames = testList.join(",")
+                                                def TARGET = params.TARGET.replaceFirst(/(?<=TESTLIST=)[^ ]+/, testNames)
+                                                thisTestParams << string(name: "TARGET", value: TARGET)
+                                                thisBaselineParams << string(name: "TARGET", value: TARGET)
+                                        }
+
                                         // test
                                         testParams << string(name: "TEST_NUM", value: "TEST_NUM" + i.toString())
-                                        def testRun = triggerJob(params.BENCHMARK, params.PLATFORM, testParams, "test")
-                                        aggregateLogs(testRun, testRuntimes)
+                                        testRun = triggerJob(params.BENCHMARK, params.PLATFORM, thisTestParams, "test")
 
                                         // baseline
                                         if (RUN_BASELINE) {
                                                 baselineParams << string(name: "BASELINE_NUM", value: "BASELINE_NUM_" + i.toString())
-                                                def baseRun = triggerJob(params.BENCHMARK, params.PLATFORM, baselineParams, "baseline")
-                                                aggregateLogs(baseRun, baselineRuntimes)
+                                                baseRun = triggerJob(params.BENCHMARK, params.PLATFORM, thisBaselineParams, "baseline")
+
                                         } else {
                                                 echo "Skipping baseline run since RUN_BASELINE is set to false"
                                         }
-                                        if (params.TARGET && params.TARGET.contains('dacapo')) {
-                                                testStats = stats(testRuntimes)
-                                                baselineStats = stats(baselineRuntimes)
-                                                def score = (testStats.mean/baselineStats.mean) * 100
-
-                                                echo "testRuntimes: ${testRuntimes}" 
-                                                echo "baselineRuntimes: ${baselineRuntimes}"
-                                                echo "score: ${score} %"
-
-                                                if (i == PERF_ITERATIONS || (EXIT_EARLY && i >= PERF_ITERATIONS * 0.8)) {
-                                                        if (score <= 98) {
-                                                                currentBuild.result = 'UNSTABLE'
-                                                                echo "Possible regression, set build result to UNSTABLE."
+
+                                        if (PROCESS_METRICS) {
+                                                aggregateLogs(testRun, testNames, testList, runBase, metrics, "test")
+                                                aggregateLogs(baseRun, testNames, testList, runBase, metrics, "baseline")
+                                                writeJSON file: "metrics.json", json: metrics, pretty: 4
+                                                archiveArtifacts artifacts: "metrics.json" 
+
+                                                //if we are on the final iteration, or we have executed enough iterations to decide likelihood of regression and have permission to exit early 
+                                                if (i == PERF_ITERATIONS-1 || (EXIT_EARLY && i >= PERF_ITERATIONS * 0.8)) {
+                                                        if (i == PERF_ITERATIONS-1) {
+                                                                echo "All iterations completed"
                                                         } else {
-                                                                echo "Perf iteration completed. EXIT_EARLY: ${EXIT_EARLY}. PERF_ITERATIONS: ${PERF_ITERATIONS}. Actual iterations: ${i}."
-                                                                break
+                                                                echo "Attempting early exit"
                                                         }
+                                                        echo "checking for regressions"
+                                                        checkRegressions(metrics, testList) //compute relevant performance stats, check for regression
+                                                        if (testList.size() == 0) break //if all tests have been exited early we can end testing
                                                 }
                                         }
                                 }
@@ -120,49 +160,81 @@ def generateChildJobViaAutoGen(newJobName) {
     build job: 'Test_Job_Auto_Gen', parameters: jobParams, propagate: true
 }
 
-def aggregateLogs(run, runtimes) {
+def aggregateLogs(run, testNames, testList, templateName, aggregateMetrics, testType) {
         def json 
-        node("ci.role.test&&hw.arch.x86&&sw.os.linux") {
-                def buildId  = run.getRawBuild().getNumber()
-                def name = run.getProjectName()
-                def result = run.getCurrentResult()
+        def buildId = run.getRawBuild().getNumber()
+        def name = run.getProjectName()
+        def result = run.getCurrentResult()
+        def fname = "${name}_${buildId}.json"
 
-                echo "${name} #${buildId} completed with status ${result}, copying JSON logs..."
+        echo "${name} #${buildId} completed with status ${result}, retrieving console log..."
+        writeFile file : 'console.txt', text: run.getRawBuild().getLog() 
+        sh "python3 benchmarkMetric.py --benchmarkMetricsTemplate_json ${templateName} --console console.txt --fname ${fname} --testNames ${testNames}"
 
-                try {
-                        timeout(time: 1, unit: 'HOURS') {
-                                copyArtifacts(
-                                        projectName: name,
-                                        selector: specific("${buildId}"),
-                                        filter: "**/${name}_${buildId}.json",
-                                        target: "."
-                                )
-
-                        }   
-                        json = readJSON file: "${name}_${buildId}.json"
-                        archiveArtifacts artifacts: "${name}_${buildId}.json", fingerprint: true, allowEmptyArchive: false
-                        def metricList = json.metrics['dacapo-h2']
-                        def runtimeMap = metricList.find{ it.containsKey('value') }
-                        if (runtimeMap) {
-                                runtimes << (runtimeMap.value as double)
-                        } else { 
-                                echo "No runtime in ${name}_${buildId}.json" 
+        try {
+                archiveArtifacts artifacts: fname, fingerprint: true, allowEmptyArchive: false
+        } catch (Exception e) {
+                echo "Cannot copy/process ${name}_${buildId}.json from ${name}: ${e}"
+        }
+
+        def runMetrics = readJSON file: fname 
+
+        testList.each { test ->
+                aggregateMetrics[test].each { metric -> 
+                        def value = runMetrics[test][metric.key]["value"]
+                        if (value != null) metric.value[testType]["values"] << value
+                }
+        }
+}
+
+def checkRegressions(aggregateMetrics, testList) { 
+testloop: for (test in testList.clone()) {
+                for (metric in aggregateMetrics[test].entrySet()) {
+                        def testMetrics = metric.value["test"]["values"]
+                        def baselineMetrics = metric.value["baseline"]["values"]
+                        if (testMetrics.size() > 0 && baselineMetrics.size() > 0) {
+                                def testStats = getStats(testMetrics) 
+                                def baselineStats = getStats(baselineMetrics)
+
+                                echo "testStats: ${testStats}"
+                                echo "baselineStats: ${baselineStats}"
+
+                                def score = (metric.value["higherbetter"]) ? testStats.mean/baselineStats.mean : baselineStats.mean/testStats.mean
+                                score *= 100 
+
+                                echo "score: ${score}"
+
+                                if (score <= 98) {
+                                        currentBuild.result = 'UNSTABLE'
+                                        echo "Possible ${metric.key} regression for ${test}, set build result to UNSTABLE."
+                                        continue testloop
+                                }
+                        }
+                        else {
+                                currentBuild.result = 'UNSTABLE'
+                                echo "${metric.key} metric for ${test} not found across all iterations. Set build result to UNSTABLE."
+                                continue testloop
                         }
-                } catch (Exception e) {
-                        echo "Cannot copy/process ${name}_${buildId}.json from ${name}: ${e}"
                 }
+                echo "Perf iteration for ${test} completed."
+                testList.remove(test) //no metrics had regression or errors, we can EXIT_EARLY this test 
         }
 }
 
-def stats (List nums) { 
-        def n = nums.size()
+def getStats (values) { 
+        def n = values.size()
         def mid = n.intdiv(2)
-
-        def sorted = nums.sort()
-        def mean = nums.sum()/n
+        def sorted = values.sort()
+        def mean = values.sum()/n
         def median = (n % 2 == 1) ? sorted[mid] : (sorted[mid-1]+sorted[mid])/2
-        def variance = nums.collect{(it-mean)**2}.sum()/n
+        def variance = values.collect{(it-mean)**2}.sum()/n
         def stdev = Math.sqrt(variance as double)
         [mean: mean, max: sorted[-1], min: sorted[0], median: median, std: stdev]
 }
 
+def getPythonDependencies (owner, branch) {
+        def pythonScripts = ["benchmarkMetric.py", "initBenchmarkMetrics.py", "metricConfig2JSON.py"]
+        pythonScripts.each { pythonScript ->
+                sh "curl -Os https://raw.githubusercontent.com/${owner}/aqa-tests/refs/heads/${branch}/buildenv/jenkins/${pythonScript}"
+        }
+}