Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions configuration/builders/sequences/sast.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import os

from configuration.builders.infra.runtime import (
BuildSequence,
DockerConfig,
InContainer,
)
from configuration.steps.base import StepOptions
from configuration.steps.commands.base import URL
from configuration.steps.commands.packages import SavePackages
from configuration.steps.commands.util import InferScript, PrintEnvironmentDetails
from configuration.steps.remote import ShellStep


def infer(config: DockerConfig):
sequence = BuildSequence()

sequence.add_step(ShellStep(command=PrintEnvironmentDetails()))

sequence.add_step(
InContainer(
docker_environment=config,
step=ShellStep(
command=InferScript("%(prop:branch)s"),
options=StepOptions(
description="running infer analysis",
descriptionDone="infer analysis complete",
),
env_vars=[("JOBS", str("%(prop:jobs)s"))],
),
)
)

sequence.add_step(
InContainer(
docker_environment=config,
step=ShellStep(
command=SavePackages(
packages=["infer_results"],
destination="/packages/%(prop:tarbuildnum)s/logs/%(prop:buildername)s",
),
url=URL(
url=f"{os.environ['ARTIFACTS_URL']}/%(prop:tarbuildnum)s/logs/%(prop:buildername)s",
url_text="Infer artifacts/logs",
),
options=StepOptions(
alwaysRun=True,
description="saving infer analysis results",
descriptionDone="infer analysis results saved",
),
),
)
)
return sequence
10 changes: 8 additions & 2 deletions configuration/steps/commands/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,14 @@ def as_cmd_arg(self) -> list[str]:


class BashCommand(Command):
def __init__(self, cmd: str, name: str = "Run command", user: str = "buildbot"):
super().__init__(name=name, workdir=PurePath("."), user=user)
def __init__(
self,
cmd: str,
name: str = "Run command",
user: str = "buildbot",
workdir: PurePath = PurePath("."),
):
super().__init__(name=name, workdir=workdir, user=user)
self.cmd = cmd

def as_cmd_arg(self) -> list[str]:
Expand Down
286 changes: 286 additions & 0 deletions configuration/steps/commands/scripts/infer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
#!/bin/bash

# Infer script for performing
# static analysis on the MariaDB codebase

set -x -e

infer --version

if [ $# -lt 1 ]; then
echo insufficient args >&2
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't you use functions from the bash library to make this a bit more homogeneous with all BB output messages (bb_log_info|warn|error)?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fauust the script is replaced in a built-in buildbot command.
It is not a script that is downloaded at runtime but rather embedded in the config-time command, hence the lack of source support.

I will, in the future, add support for downloading multiple scripts.

exit 1
fi

# Testing this version
branch=$1

if [ -z "$branch" ]; then
echo "usage $0 {branch/commit}" >&2
exit 1
fi

: "${JOBS:=4}"

base=$PWD
result_dir=$PWD/infer_results
infer="/mnt/infer"

rm -rf "${result_dir}" index.txt report.json

## Fetch

pushd /mnt/src
if [ ! -d .git ]; then
git clone https://github.com/MariaDB/server.git
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing final dot:
git clone https://github.com/MariaDB/server.git .

else
git clean -df
fi
git fetch origin "$branch"
git checkout -f FETCH_HEAD
git submodule update --init --recursive --jobs "${JOBS}"
git clean -df
commit=$(git rev-parse FETCH_HEAD)

if [ -d "${infer}/$commit" ]; then
echo "Already scanned $commit"
exit 0
fi

# Directory to clean
# Target maximum usage (in percent)
max_usage=90
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to be synced with zabbix alerting (warning = 80, critical = 90). We can change it on zabbix ofc.

Copy link
Collaborator

@RazvanLiviuVarzaru RazvanLiviuVarzaru Oct 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bg-bbw5 has 1.6 TB, 80 % is reasonably enough.


# Function to get current disk usage (integer percent)
get_usage() {
df -P "$infer" | awk 'NR==2 {gsub(/%/,""); print $5}'
}

echo "Checking disk usage on $(df -h "$infer" | tail -n -1)"
usage=$(get_usage)
echo "Current usage: ${usage}%"

# Find directories sorted by oldest modification time (oldest first)
mapfile -t dirs < <(
find "$infer" -mindepth 1 -maxdepth 1 -type d -printf '%T@ %p\n' \
| sort -n | awk '{print $2}'
)

# Loop through and delete until below threshold
for dir in "${dirs[@]}"; do
if (( usage < max_usage )); then
echo "Disk usage is ${usage}%, below ${max_usage}%. Done."
break
fi

echo "Deleting oldest directory: $dir"
rm -rf -- "$dir"

usage=$(get_usage)
echo "New usage: ${usage}%"
done

if (( usage >= max_usage )); then
echo "Warning: disk still above ${max_usage}% after deleting all directories!"
else
echo "Done. Disk usage now ${usage}%."
fi


# What can we use as a reference

populate_differences()
# input $merge_base
{
# Find something closer - e.g. we've appended to a branch
# we've already tested
mapfile -t commits < <(git rev-list "${merge_base}..FETCH_HEAD")
for common_commit in "${commits[@]}"; do
if [ -d "${infer}/$common_commit" ]; then
break;
fi
done
if [ ! -d "${infer}/$common_commit" ]; then
return 1
fi
merge_base=$common_commit
# The file changes we from last results
git diff --name-only FETCH_HEAD.."${merge_base}" | tee "$base"/index.txt

if [ ! -s "$base"/index.txt ]; then
echo "Empty changes - nothing necessary"
rm "$base"/index.txt
exit 0
fi

limit=50
if [ "$(wc -l < "${base}"/index.txt)" -gt $limit ]; then
echo "More than $limit changes, just do a full generation"
rm "$base/index.txt"
return 1
fi

# use previous results as a base
cp -a "$infer/$merge_base" "$result_dir"

# Using as a recently used maker
# Eventually we can remove/clear based on not being looked at
touch "$infer/$merge_base"
return 0
}

# Just assume we diverged from main at some point
# Using $commit because merge-base didn't process
# pull request references.
merge_base=$(git merge-base "$commit" origin/main)

if populate_differences; then
echo "No common commit ancestor with analysis or over depth limit($limit)" >&2

echo "This is going to take a while for a full scan"
fi

# back from /mnt/src
popd

# Build

build()
{
cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DCMAKE_C_COMPILER=clang \
-DCMAKE_CXX_COMPILER=clang++ \
-S /mnt/src -B bld
cmake --build bld \
--target GenError GenServerSource GenUnicodeDataSource GenFixPrivs \
--parallel "$JOBS"
}

if [ ! -d bld ]; then
mkdir bld
build
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

fi

#
capture()
{
infer capture --compilation-database compile_commands.json --project-root /mnt/src --results-dir "${result_dir}" "$@"
}

analyze()
{
infer analyze --project-root /mnt/src --results-dir "${result_dir}" --max-jobs "${JOBS}" "$@"
}
# Capture and analyze the feature of the files changes in index
#
cd bld
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

popd/pushd for consistency


if [ ! -f ../index.txt ]; then
echo "full run, this could take a while"
capture
analyze
cp -a "$result_dir" "$infer/$commit"
cd ..
else
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On a fresh run

+ infer reportdiff --report-current report.json --report-previous /mnt/infer/68432a0bc365d783cde21487c257b7e865221f41/report.json --project-root /mnt/src --results-dir /home/buildbot/infer_results_diff
Uncaught Internal Error: (Sys_error "/home/buildbot/report.json: No such file or directory")

Guessing your initial intention was to put everything under this line under the else block and keep the ${result_dir} cleanup for CI outside the IF...ELSE.

Because I remember you had an exit 0 initially under the full run block.
For clarity you can define a variable to decide when to perform a full analysis vs. an incremental then put the full / incremental blocks under functions.

Copy link
Collaborator

@RazvanLiviuVarzaru RazvanLiviuVarzaru Oct 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this? Haven't tested it yet, I just arranged everything for clarity.

#!/bin/bash

# Infer script for performing
# static analysis on the MariaDB codebase

set -x -e

infer --version

trap 'cleanup_for_CI' EXIT

if [ $# -lt 1 ]; then
	echo insufficient args >&2
	exit 1
fi

# Testing this version
branch=$1

if [ -z "$branch" ]; then
  echo "usage $0 {branch/commit}" >&2
  exit 1
fi

################################################################################
##                               CONFIGURATION                                ##
################################################################################

base=$PWD
result_dir=$PWD/infer_results
infer="/mnt/infer"
sources="/mnt/src"
max_usage=80 # maximum disk usage (in percent)
: "${JOBS:=4}"

################################################################################
##                               FUNCTIONS                                   ##
################################################################################

get_source()
{
  pushd $sources
  trap 'popd' RETURN
  if [ ! -d .git ]; then
    git clone https://github.com/MariaDB/server.git .
  else
    git clean -df
  fi
  git fetch origin "$branch"
  git checkout -f FETCH_HEAD
  git submodule update --init --recursive --jobs "${JOBS}"
  git clean -df
  commit=$(git rev-parse FETCH_HEAD)
}

cleanup_for_CI()
{
  rm -rf "${result_dir}"/*.db "${result_dir}"/tmp
}

# Function to get current disk usage (integer percent)
get_usage() {
    df -P "$infer" | awk 'NR==2 {gsub(/%/,""); print $5}'
}

host_cleanup()
{
  rm -rf "${result_dir}" index.txt report.json
  echo "Checking disk usage on $(df -h "$infer" | tail -n -1)"
  usage=$(get_usage)
  echo "Current usage: ${usage}%"

  # Find directories sorted by oldest modification time (oldest first)
  mapfile -t dirs < <(
    find "$infer" -mindepth 1 -maxdepth 1 -type d -printf '%T@ %p\n' \
    | sort -n | awk '{print $2}'
  )

  # Loop through and delete until below threshold
  for dir in "${dirs[@]}"; do
      if (( usage < max_usage )); then
          echo "Disk usage is ${usage}%, below ${max_usage}%. Done."
          break
      fi

      echo "Deleting oldest directory: $dir"
      rm -rf -- "$dir"

      usage=$(get_usage)
      echo "New usage: ${usage}%"
  done

  if (( usage >= max_usage )); then
      echo "Warning: disk still above ${max_usage}% after deleting all directories!"
  else
      echo "Done. Disk usage now ${usage}%."
  fi
}

populate_differences()
{
  pushd $sources
  trap 'popd' RETURN
  
  # Just assume we diverged from main at some point
  # Using $commit because merge-base didn't process
  # pull request references.
  merge_base=$(git merge-base "$commit" origin/main)  
  
  # Find something closer - e.g. we've appended to a branch
  # we've already tested
  mapfile -t commits < <(git rev-list "${merge_base}..FETCH_HEAD")
  for common_commit in "${commits[@]}"; do
    if [ -d "${infer}/$common_commit" ]; then
      break;
    fi
  done
  if [ ! -d "${infer}/$common_commit" ]; then
    return 1
  fi
  merge_base=$common_commit
  # The file changes we from last results
  git diff --name-only FETCH_HEAD.."${merge_base}" | tee "$base"/index.txt

  if [ ! -s "$base"/index.txt ]; then
    echo "Empty changes - nothing necessary"
    rm "$base"/index.txt
    exit 0
  fi

  limit=50
  if [ "$(wc -l < "${base}"/index.txt)" -gt $limit ]; then
    echo "More than $limit changes, just do a full generation"
    rm "$base/index.txt"
    return 1
  fi

  # use previous results as a base
  cp -a "$infer/$merge_base" "$result_dir"

  # Using as a recently used maker
  # Eventually we can remove/clear based on not being looked at
  touch "$infer/$merge_base"
  return 0
}

build()
{
  pushd $base
  trap 'popd' RETURN
  if [ ! -d bld ]; then
    mkdir bld
  fi
  cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
        -DCMAKE_C_COMPILER=clang \
        -DCMAKE_CXX_COMPILER=clang++ \
        -S /mnt/src -B bld
  cmake --build bld \
        --target GenError GenServerSource GenUnicodeDataSource GenFixPrivs \
        --parallel "$JOBS"
}

capture()
{
  infer capture --compilation-database compile_commands.json --project-root /mnt/src --results-dir "${result_dir}" "$@"
}

analyze()
{
  infer analyze --project-root /mnt/src --results-dir "${result_dir}" --max-jobs "${JOBS}" "$@"
}

check()
{
  file=$1
  msg=$2
  if [ -f "${file}" ]; then
    filesize=$(stat -c%s "$file")
    # 2 is the size of an empty json array '[]'
    if [ "$filesize" -gt 2 ]; then
      echo "$msg"
      echo
      echo "Here are the changes:"
      jq . "${file}"
      return 1
    fi
  fi
  return 0
}
  
full_analysis()
{
  pushd $base/bld
  trap 'popd' RETURN
  echo "full run, this could take a while"
  capture
  analyze
  cp -a "$result_dir" "$infer/$commit"
}

incremental_analysis()
{
  pushd $base/bld
  trap 'popd' RETURN

  echo "incremental run"
  # We've copied over a result dir, so we're continuing
  # https://fbinfer.com/docs/infer-workflow/#differential-workflow
 # using 'infer capture" instead infer run
  capture --reactive

  # some form of incremental
  analyze --changed-files-index $base/index.txt

  # Preserve result
  cp "${result_dir}"/report.json $base/report.json

  # just in case these have changed, including generated files
  build

  # Can we use the previous captured $infer/$merge_base
  capture --merge-capture "$infer/$merge_base" --reactive --mark-unchanged-procs

  analyze --incremental-analysis  --changed-files-index ../index.txt

  # It may be merged next, or a commit pushed on top of it.
  infer reportdiff --report-current $base/report.json --report-previous "${result_dir}"/report.json  --project-root /mnt/src --results-dir "${result_dir}"
  ## At this point we have infer_results/differential/{fixed,introduced}.json
  #!? Change the name as we're going to use differential as a main branch difference
  #!!mv "${result_dir}"/differential "${result_dir}"/diff_prev_commit
  
  rm -rf $base/bld $base/index.txt
  
  # Useful enough to save as $infer/
  # Its unknown if this is on main branch or now, but just save.
  # If its merged next, then a commit exists, if a user appends
  # a commit, we've got a smaller delta.
  cp -a "${result_dir}" "$infer/${commit}"
  
  # Look at the changes from the main branch
  #
  # Take the main branch report.json
  # remove fixed, add introduced, and then walk
  # though other commits, if they exist, and apply the
  # same again up until, and including the last commit
  source $sources/VERSION
  branch=${MYSQL_VERSION_MAJOR}.${MYSQL_VERSION_MINOR}

  pushd $sources
  merge_base=$(git merge-base "origin/$branch" "$commit")
  mapfile -t commits < <(git rev-list "${merge_base}..${commit}")
  popd

  base=$infer/$merge_base
  last_ref=$base
  for common_commit in "${commits[@]}"; do
    diff_dir="${infer}/$common_commit"/differential/
    if [ -d "$diff_dir" ]; then
      # removed fixed issues and append introduced.
      jq --slurpfile to_remove  "${diff_dir}"/fixed.json '
        ($to_remove[0] | map(.hash)) as $hashes_to_remove
        | map(select(.hash as $h | $hashes_to_remove | index($h) | not))' \
        "${last_ref}"/report.json > filtered.json
      jq -s 'add | unique_by(.hash)' filtered.json  "${diff_dir}"/introduced.json > report.json
      last_ref=$PWD
    fi
  done

  infer reportdiff --report-current report.json --report-previous "${base}"/report.json  --project-root /mnt/src --results-dir "${result_dir}_diff"

  result_dir_main_diff=${result_dir}/main_diff
  mv "${result_dir}_diff"/differential/ "${result_dir_main_diff}"
  cp -a "${result_dir_main_diff}" "$infer/${commit}"

  check "${result_dir}"/differential/fixed.json "Good human! Thanks for fixing the bad things in the last commit"
  check "${result_dir}"/differential/introduced.json "Bad human! Don't introduce bad things in the last commit" >&2
  check "${result_dir_main_diff}"/fixed.json "Good human! Thanks for fixing the bad things"
  if check "${result_dir_main_diff}"//introduced.json "Bad human! Don't introduce bad things" >&2; then
    exit 1
  fi
}

################################################################################
##                               MAIN SCRIPT                                  ##
################################################################################

host_cleanup

if [ -d "${infer}/$commit" ]; then
  echo "Already scanned $commit"
  exit 0
fi

get_source

if populate_differences; then
  echo "No common commit ancestor with analysis or over depth limit($limit)" >&2
  echo "This is going to take a while for a full scan"
fi

if [ ! -f index.txt ]; then
  RUN_MODE="full"
else
  RUN_MODE="incremental"
fi

build

if [ "$RUN_MODE" = "full" ]; then
  full_analysis
fi
if [ "$RUN_MODE" = "incremental" ]; then
  incremental_analysis
fi

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

much nicer

echo "incremental run"
# We've copied over a result dir, so we're continuing
# https://fbinfer.com/docs/infer-workflow/#differential-workflow
# using 'infer capture" instead infer run
capture --reactive

# some form of incremental
analyze --changed-files-index ../index.txt

# Preserve result
cp "${result_dir}"/report.json ../report.json

# just in case these have changed, including generated files
cd ..
build
cd bld

# Can we use the previous captured $infer/$merge_base
capture --merge-capture "$infer/$merge_base" --reactive --mark-unchanged-procs

analyze --incremental-analysis --changed-files-index ../index.txt

# It may be merged next, or a commit pushed on top of it.
infer reportdiff --report-current ../report.json --report-previous "${result_dir}"/report.json --project-root /mnt/src --results-dir "${result_dir}"
cd ..
## At this point we have infer_results/differential/{fixed,introduced}.json
#!? Change the name as we're going to use differential as a main branch difference
#!!mv "${result_dir}"/differential "${result_dir}"/diff_prev_commit
fi
rm -rf bld index.txt

# Useful enough to save as $infer/
# Its unknown if this is on main branch or now, but just save.
# If its merged next, then a commit exists, if a user appends
# a commit, we've got a smaller delta.
cp -a "${result_dir}" "$infer/${commit}"

# Look at the changes from the main branch
#
# Take the main branch report.json
# remove fixed, add introduced, and then walk
# though other commits, if they exist, and apply the
# same again up until, and including the last commit
source /mnt/src/VERSION
branch=${MYSQL_VERSION_MAJOR}.${MYSQL_VERSION_MINOR}

pushd /mnt/src
merge_base=$(git merge-base "origin/$branch" "$commit")
mapfile -t commits < <(git rev-list "${merge_base}..${commit}")
popd

base=/mnt/infer/$merge_base
last_ref=$base
for common_commit in "${commits[@]}"; do
diff_dir="${infer}/$common_commit"/differential/
if [ -d "$diff_dir" ]; then
# removed fixed issues and append introduced.
jq --slurpfile to_remove "${diff_dir}"/fixed.json '
($to_remove[0] | map(.hash)) as $hashes_to_remove
| map(select(.hash as $h | $hashes_to_remove | index($h) | not))' \
"${last_ref}"/report.json > filtered.json
jq -s 'add | unique_by(.hash)' filtered.json "${diff_dir}"/introduced.json > report.json
last_ref=$PWD
fi
done

infer reportdiff --report-current report.json --report-previous "${base}"/report.json --project-root /mnt/src --results-dir "${result_dir}_diff"

result_dir_main_diff=${result_dir}/main_diff
mv "${result_dir}_diff"/differential/ "${result_dir_main_diff}"
cp -a "${result_dir_main_diff}" "$infer/${commit}"

# cleanup for smaller CI
rm -rf "${result_dir}"/*.db "${result_dir}"/tmp

check()
{
file=$1
msg=$2
if [ -f "${file}" ]; then
filesize=$(stat -c%s "$file")
# 2 is the size of an empty json array '[]'
if [ "$filesize" -gt 2 ]; then
echo "$msg"
echo
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

echo -e "$msg\n"

but see my comment on bash lib.

echo "Here are the changes:"
jq . "${file}"
return 1
fi
fi
return 0
}

check "${result_dir}"/differential/fixed.json "Good human! Thanks for fixing the bad things in the last commit"

check "${result_dir}"/differential/introduced.json "Bad human! Don't introduce bad things in the last commit" >&2

check "${result_dir_main_diff}"/fixed.json "Good human! Thanks for fixing the bad things"

if check "${result_dir_main_diff}"//introduced.json "Bad human! Don't introduce bad things" >&2; then
exit 1
fi
9 changes: 9 additions & 0 deletions configuration/steps/commands/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,12 @@ def __init__(
):
args = [f"{binary}:{','.join(libs)}" for binary, libs in binary_checks.items()]
super().__init__(script_name="ldd_check.sh", args=args)


class InferScript(BashScriptCommand):
"""
A command to run the Infer analysis on the MariaDB codebase.
"""

def __init__(self, branch: str):
super().__init__(script_name="infer.sh", args=[branch])
1 change: 1 addition & 0 deletions constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@
"amd64-debian-12-debug-embedded",
"amd64-fedora-41",
"amd64-fedora-42",
"amd64-infer-clang-20",
"amd64-msan-clang-20-debug",
"amd64-opensuse-1506",
"amd64-rhel-10",
Expand Down
Loading