diff --git a/configuration/builders/sequences/sast.py b/configuration/builders/sequences/sast.py new file mode 100644 index 000000000..135b9fd4c --- /dev/null +++ b/configuration/builders/sequences/sast.py @@ -0,0 +1,55 @@ +import os + +from configuration.builders.infra.runtime import ( + BuildSequence, + DockerConfig, + InContainer, +) +from configuration.steps.base import StepOptions +from configuration.steps.commands.base import URL +from configuration.steps.commands.packages import SavePackages +from configuration.steps.commands.util import InferScript, PrintEnvironmentDetails +from configuration.steps.remote import ShellStep + + +def infer(config: DockerConfig): + sequence = BuildSequence() + + sequence.add_step(ShellStep(command=PrintEnvironmentDetails())) + + sequence.add_step( + InContainer( + docker_environment=config, + step=ShellStep( + command=InferScript(), + options=StepOptions( + description="running infer analysis", + descriptionDone="infer analysis complete", + ), + env_vars=[("JOBS", str("%(prop:jobs)s"))], + timeout=7200, + ), + ) + ) + + sequence.add_step( + InContainer( + docker_environment=config, + step=ShellStep( + command=SavePackages( + packages=["infer_results"], + destination="/packages/%(prop:tarbuildnum)s/logs/%(prop:buildername)s", + ), + url=URL( + url=f"{os.environ['ARTIFACTS_URL']}/%(prop:tarbuildnum)s/logs/%(prop:buildername)s", + url_text="Infer artifacts/logs", + ), + options=StepOptions( + alwaysRun=True, + description="saving infer analysis results", + descriptionDone="infer analysis results saved", + ), + ), + ) + ) + return sequence diff --git a/configuration/steps/commands/base.py b/configuration/steps/commands/base.py index 18988b81b..7e279a59c 100644 --- a/configuration/steps/commands/base.py +++ b/configuration/steps/commands/base.py @@ -62,8 +62,14 @@ def as_cmd_arg(self) -> list[str]: class BashCommand(Command): - def __init__(self, cmd: str, name: str = "Run command", user: str = "buildbot"): - super().__init__(name=name, workdir=PurePath("."), user=user) + def __init__( + self, + cmd: str, + name: str = "Run command", + user: str = "buildbot", + workdir: PurePath = PurePath("."), + ): + super().__init__(name=name, workdir=workdir, user=user) self.cmd = cmd def as_cmd_arg(self) -> list[str]: diff --git a/configuration/steps/commands/packages.py b/configuration/steps/commands/packages.py index f32169b2c..48a48dca3 100644 --- a/configuration/steps/commands/packages.py +++ b/configuration/steps/commands/packages.py @@ -130,7 +130,13 @@ def as_cmd_arg(self) -> list[str]: util.Interpolate( f""" mkdir -p {self.destination} && - cp -r {package_list} {self.destination} + for package in {package_list}; do + if [ ! -e "$package" ]; then + echo "Warning: package '$package' does not exist and will be skipped." + continue + fi + cp -r $package {self.destination} + done """ ), ] diff --git a/configuration/steps/commands/scripts/infer.sh b/configuration/steps/commands/scripts/infer.sh new file mode 100755 index 000000000..a2bb4327a --- /dev/null +++ b/configuration/steps/commands/scripts/infer.sh @@ -0,0 +1,362 @@ +#!/bin/bash + +# Infer script for performing +# static analysis on the MariaDB codebase + +set -x -e + +infer --version + +if [ $# -lt 1 ]; then + echo insufficient args >&2 + exit 1 +fi + +# Testing this version +branch=$1 +repository=${2:-"https://github.com/MariaDB/server.git"} +environment=${3:-"PROD"} + +if [ -z "$branch" ]; then + echo "usage $0 {branch/commit}" >&2 + exit 1 +fi + +trap "cleanup_for_CI" EXIT + +################################################################################ +## CONFIGURATION ## +################################################################################ + +base=$PWD +result_dir=$PWD/infer_results +infer="/mnt/infer/$environment" +sources="/mnt/src/$environment/server" +# less than zabbix (80) warning. +max_usage=75 # maximum disk usage (in percent) +limit=50 # number of commits away to consider for a differential build/analysis +: "${JOBS:=4}" + +################################################################################ +## FUNCTIONS ## +################################################################################ + +create_dirs() +{ + mkdir -p "$infer" + mkdir -p "$sources" +} + +# Inputs: $branch +# Postconditions: +# * $sources is checked out to $branch +# * $commit set to the reference +get_source() +{ + pushd "$sources" + trap 'popd' RETURN + if [ ! -d .git ]; then + git clone "$repository" . + else + git clean -df + fi + git config --global advice.detachedHead false + git fetch origin "$branch" + git checkout -f FETCH_HEAD + git submodule update --init --recursive --jobs "${JOBS}" + git clean -df + commit=$(git rev-parse FETCH_HEAD) +} + + +cleanup_for_CI() +{ + rm -rf "${result_dir}"/*.db "${result_dir}"/tmp +} + +# Function to get current disk usage (integer percent) +get_usage() { + df -P "$infer" | awk 'NR==2 {gsub(/%/,""); print $5}' +} + +host_cleanup() +{ + rm -rf "${result_dir}" index.txt report.json + echo "Checking disk usage on $(df -h "$infer" | tail -n -1)" + usage=$(get_usage) + echo "Current usage: ${usage}%" + + # Find directories sorted by oldest modification time (oldest first) + mapfile -t dirs < <( + find "$infer" -mindepth 1 -maxdepth 1 -type d -printf '%T@ %p\n' \ + | sort -n | awk '{print $2}' + ) + + # Loop through and delete until below threshold + for dir in "${dirs[@]}"; do + if (( usage < max_usage )); then + echo "Disk usage is ${usage}%, below ${max_usage}%. Done." + break + fi + + echo "Deleting oldest directory: $dir" + rm -rf -- "$dir" + + usage=$(get_usage) + echo "New usage: ${usage}%" + done + + if (( usage >= max_usage )); then + echo "Warning: disk still above ${max_usage}% after deleting all directories!" + else + echo "Done. Disk usage now ${usage}%." + fi +} + +# Precondition: get_sources +# Returns: +# 0 - full scane needed +# 1 - incremental scan +# Postcondition for return 0 +# +# Postcondition for return 1 +# * $base/index.txt - list of file differences +# * $merge_base - the reference commit +# * $result_dir - is copy of the results from the $merge_base +# * $infer/$merge_base - is touched - (recently used marker) +populate_differences() +{ + pushd "$sources" + trap 'popd' RETURN + + # Just assume we diverged from main at some point + # Using $commit because merge-base didn't process + # pull request references. + merge_base=$(git merge-base "$commit" origin/main) + + # Find something closer - e.g. we've appended to a branch + # we've already tested + mapfile -t commits < <(git rev-list "${merge_base}..FETCH_HEAD") + for common_commit in "${commits[@]}"; do + if [ -d "${infer}/$common_commit" ]; then + break; + fi + done + if [ ! -d "${infer}/$common_commit" ]; then + return 1 + fi + merge_base=$common_commit + # The file changes we from last results + git diff --name-only FETCH_HEAD.."${merge_base}" | tee "$base"/index.txt + + if [ ! -s "$base"/index.txt ]; then + echo "Empty changes - nothing necessary" + rm "$base"/index.txt + exit 0 + fi + + if [ "$(wc -l < "${base}"/index.txt)" -gt $limit ]; then + echo "More than $limit changes, just do a full generation" + rm "$base/index.txt" + return 1 + fi + + # use previous results as a base + cp -a "$infer/$merge_base" "$result_dir" + + # Using as a recently used maker + # Eventually we can remove/clear based on not being looked at + touch "$infer/$merge_base" + return 0 +} + +# Builds compiler commands database (compile_commands.json) for infer +# and generated source file that infer will need to scan +build() +{ + cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ + -S "${sources}" -B "${base}"/bld + cmake --build "${base}"/bld \ + --target GenError GenServerSource GenUnicodeDataSource GenFixPrivs \ + --parallel "$JOBS" +} + +infer_cmd() +{ + if [ -f "${sources}"/.infer/config ]; then + infer --inferconfig-path "${@}" + else + infer "${@}" + fi +} + +capture() +{ + infer_cmd capture --compilation-database compile_commands.json --project-root "${sources}" --results-dir "${result_dir}" "$@" +} + +analyze() +{ + analyze_cmd=(analyze --project-root "${sources}" --results-dir "${result_dir}" --max-jobs "${JOBS}" "$@") + if [ -f "$sources"/.infer/report-block-list.spec.json ]; then + # fp reports + analyze_cmd+=( --report-block-list-spec="${sources}"/.infer/report-block-list-spec.json ) + fi + infer_cmd "${analyze_cmd[@]}" +} + + +full_analysis() +{ + pushd "$base"/bld + trap 'popd' RETURN + echo "full run, this could take a while" + capture + analyze + cp -a "$result_dir" "$infer/$commit" +} + +incremental_analysis() +{ + pushd "$base"/bld + trap 'popd' RETURN + + echo "incremental run" + # We've copied over a result dir, so we're continuing + # https://fbinfer.com/docs/infer-workflow/#differential-workflow + # using 'infer capture" instead infer run + capture --reactive + + # some form of incremental + analyze --changed-files-index "$base"/index.txt + + # Preserve result + cp "${result_dir}"/report.json "$base"/report.json + + # just in case these have changed, including generated files + build + + # Can we use the previous captured $infer/$merge_base + capture --merge-capture "$infer/$merge_base" --reactive --mark-unchanged-procs + + analyze --incremental-analysis --changed-files-index ../index.txt + + # It may be merged next, or a commit pushed on top of it. + infer_cmd reportdiff --report-current "$base"/report.json --report-previous "${result_dir}"/report.json --project-root "${sources}" --results-dir "${result_dir}" + ## At this point we have infer_results/differential/{fixed,introduced}.json + #!? Change the name as we're going to use differential as a main branch difference + #!!mv "${result_dir}"/differential "${result_dir}"/diff_prev_commit + + rm -rf "$base"/bld "$base"/index.txt + + # Useful enough to save as $infer/ + # Its unknown if this is on main branch or now, but just save. + # If its merged next, then a commit exists, if a user appends + # a commit, we've got a smaller delta. + cp -a "${result_dir}" "$infer/${commit}" +} + +check() +{ + file=$1 + msg=$2 + if [ -f "${file}" ]; then + filesize=$(stat -c%s "$file") + # 2 is the size of an empty json array '[]' + if [ "$filesize" -gt 2 ]; then + echo "$msg" + echo + echo "Here are the changes:" + jq . "${file}" + return 1 + fi + fi + return 0 +} + +differential_to_main_branch() +{ + # Look at the changes from the main branch + # + # Take the main branch report.json + # remove fixed, add introduced, and then walk + # though other commits, if they exist, and apply the + # same again up until, and including the last commit + source "$sources"/VERSION + branch=${MYSQL_VERSION_MAJOR}.${MYSQL_VERSION_MINOR} + + pushd "$sources" + merge_base=$(git merge-base "origin/$branch" "$commit") + #mapfile -t commits < <(git rev-list "${merge_base}..${commit}") + popd + + ref_base=$infer/$merge_base/report.json + #for common_commit in "${commits[@]}"; do + # diff_dir="${infer}/$common_commit"/differential/ + # if [ -d "$diff_dir" ]; then + # # removed fixed issues and append introduced. + # jq --slurpfile to_remove "${diff_dir}"/fixed.json ' + # ($to_remove[0] | map(.hash)) as $hashes_to_remove + # | map(select(.hash as $h | $hashes_to_remove | index($h) | not))' \ + # "${ref_base}"/report.json > "${base}"/filtered.json + # ref_base=/tmp/report.json + # jq -s 'add | unique_by(.hash)' "${base}"/filtered.json "${diff_dir}"/introduced.json > "${ref_base}" + # fi + #done + #rm -f "${base}"/filtered.json + + infer_cmd reportdiff --report-current "${base}/report.json" --report-previous "${ref_base}" --project-root "${sources}" --results-dir "${result_dir}_diff" + + result_dir_main_diff=${result_dir}/main_diff + mv "${result_dir}_diff"/differential/ "${result_dir_main_diff}" + # cp here is debugging aid + #cp -a "${result_dir_main_diff}" "$infer/${commit}" + + check "${result_dir}"/differential/fixed.json "Good human! Thanks for fixing the bad things in the last commit" + check "${result_dir}"/differential/introduced.json "Bad human! Don't introduce bad things in the last commit" >&2 + check "${result_dir_main_diff}"/fixed.json "Good human! Thanks for fixing the bad things" + if ! check "${result_dir_main_diff}"//introduced.json "Bad human! Don't introduce bad things" >&2; then + exit 1 + fi +} + +################################################################################ +## MAIN SCRIPT ## +################################################################################ + +create_dirs +host_cleanup + +get_source + +if [ -d "${infer}/$commit" ]; then + echo "Already scanned $commit" + exit 0 +fi + +if populate_differences; then + echo "No common commit ancestor with analysis or over depth limit($limit)" >&2 + echo "This is going to take a while for a full scan" +fi + +if [ ! -f index.txt ]; then + RUN_MODE="full" +else + RUN_MODE="incremental" +fi + +build + +if [ "$RUN_MODE" = "full" ]; then + full_analysis + cp "$result_dir"/report.json "${base}" +fi +if [ "$RUN_MODE" = "incremental" ]; then + incremental_analysis +fi + +rm -rf "$base"/bld + +differential_to_main_branch diff --git a/configuration/steps/commands/util.py b/configuration/steps/commands/util.py index 2420ba57d..e1b31c9f9 100644 --- a/configuration/steps/commands/util.py +++ b/configuration/steps/commands/util.py @@ -1,3 +1,4 @@ +import os from pathlib import PurePath from buildbot.plugins import util @@ -173,3 +174,17 @@ def __init__( ): args = [f"{binary}:{','.join(libs)}" for binary, libs in binary_checks.items()] super().__init__(script_name="ldd_check.sh", args=args) + + +class InferScript(BashScriptCommand): + """ + A command to run the Infer analysis on the MariaDB codebase. + """ + + def __init__(self): + branch = util.Interpolate("%(prop:branch)s") + repository = util.Interpolate("%(prop:repository)s") + environment = os.environ.get("ENVIRON") + args = [branch, repository, environment] + super().__init__(script_name="infer.sh", args=args) + self.name = "Run Infer" diff --git a/constants.py b/constants.py index 14dbb98f3..dc9663e20 100644 --- a/constants.py +++ b/constants.py @@ -177,6 +177,7 @@ "amd64-debian-12-debug-embedded", "amd64-fedora-41", "amd64-fedora-42", + "amd64-infer-clang-20", "amd64-msan-clang-20-debug", "amd64-opensuse-1506", "amd64-rhel-10", diff --git a/master-migration/master.cfg b/master-migration/master.cfg index 2ac467163..b0899a2da 100644 --- a/master-migration/master.cfg +++ b/master-migration/master.cfg @@ -18,6 +18,7 @@ from configuration.builders.sequences.compile_only import ( ) from configuration.builders.sequences.debug import openssl_fips from configuration.builders.sequences.sanitizers import asan_ubsan, msan +from configuration.builders.sequences.sast import infer from configuration.reporters import github_summary from configuration.workers import worker from master_common import IS_CHECKCONFIG, base_master_config @@ -40,6 +41,18 @@ WORKER_POOL.add( WORKER_POOL.add( arch="amd64", worker=worker.NonLatent(name="hz-bbw9", config=config, total_jobs=110) ) +WORKER_POOL.add( + arch="amd64", + worker=worker.NonLatent(name="bg-bbw5-x64", config=config, total_jobs=8), +) + +DEFAULT_AMD64_WORKER_POOL = WORKER_POOL.get_workers_for_arch( + arch="amd64", filter_fn=lambda name: name in ["hz-bbw8", "hz-bbw9"] +) +INFER_WORKER_POOL = WORKER_POOL.get_workers_for_arch( + arch="amd64", filter_fn=lambda name: name == "bg-bbw5-x64" +) + c["workers"] = WORKER_POOL.get_instances() ## ------------------------------------------------------------------- ## @@ -50,7 +63,7 @@ c["builders"] = [ rpm_release_builder( name="amd64-rhel-9-rpm-autobake-migration", image="rhel9", - worker_pool=WORKER_POOL.get_workers_for_arch(arch="amd64"), + worker_pool=DEFAULT_AMD64_WORKER_POOL, arch="amd64", has_compat=False, rpm_type="rhel9", @@ -58,7 +71,7 @@ c["builders"] = [ deb_release_builder( name="amd64-debian-12-deb-autobake-migration", image="debian12", - worker_pool=WORKER_POOL.get_workers_for_arch(arch="amd64"), + worker_pool=DEFAULT_AMD64_WORKER_POOL, ), ] @@ -81,7 +94,7 @@ c["builders"].extend( f_seq(jobs=compile_only_jobs, config=docker_config(image="debian13")) ], ).get_config( - workers=WORKER_POOL.get_workers_for_arch(arch="amd64"), + workers=DEFAULT_AMD64_WORKER_POOL, next_build=nextBuild, can_start_build=canStartBuild, tags=["compile-only", "protected"], @@ -106,7 +119,7 @@ c["builders"].append( ) ], ).get_config( - workers=WORKER_POOL.get_workers_for_arch(arch="amd64"), + workers=DEFAULT_AMD64_WORKER_POOL, next_build=nextBuild, can_start_build=canStartBuild, tags=[ @@ -137,7 +150,7 @@ def ubasan_builder(name: str, debug: bool) -> GenericBuilder: ) ], ).get_config( - workers=WORKER_POOL.get_workers_for_arch(arch="amd64"), + workers=DEFAULT_AMD64_WORKER_POOL, next_build=nextBuild, can_start_build=canStartBuild, tags=list(tags_ubasan), @@ -166,7 +179,7 @@ def msan_builder(name: str, debug: bool) -> GenericBuilder: ) ], ).get_config( - workers=WORKER_POOL.get_workers_for_arch(arch="amd64"), + workers=DEFAULT_AMD64_WORKER_POOL, next_build=nextBuild, can_start_build=canStartBuild, tags=list(tags_msan), @@ -177,6 +190,34 @@ def msan_builder(name: str, debug: bool) -> GenericBuilder: builder = "amd64-msan-clang-20-debug" c["builders"].append(msan_builder(name=builder, debug=builder.endswith("debug"))) +## ------------------------------------------------------------------- ## +## STATIC ANALYZERS BUILDERS ## +## ------------------------------------------------------------------- ## + +c["builders"].append( + GenericBuilder( + name="amd64-infer-clang-20", + sequences=[ + infer( + config=docker_config( + image="debian13-infer-clang-20", + shm_size="16g", + additional_bind_mounts=[ + ("/srv/buildbot/src", "/mnt/src"), + ("/srv/buildbot/infer", "/mnt/infer"), + ], + ) + ), + ], + ).get_config( + workers=INFER_WORKER_POOL, + next_build=nextBuild, + can_start_build=canStartBuild, + tags=["clang", "infer", "sast"], + jobs=8, + ) +) + ## ------------------------------------------------------------------- ## ## REPORTERS ## ## ------------------------------------------------------------------- ## diff --git a/master-private.cfg-sample b/master-private.cfg-sample index 2a48b7c2a..f98b6eeb3 100644 --- a/master-private.cfg-sample +++ b/master-private.cfg-sample @@ -59,6 +59,7 @@ private["worker_pass"]= { "libvirt": "1234", "hz-bbw8": "1234", "hz-bbw9": "1234", + "bg-bbw5-x64": "1234", } private["docker_workers"]= { "amd-bbw1-docker":"tcp://IP_address:port",