Skip to content

Make compute-sanitizer not report API errors as errors #687

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions ci/tools/setup-sanitizer
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@ set -euo pipefail
if [[ "${SETUP_SANITIZER}" == 1 ]]; then
COMPUTE_SANITIZER="${CUDA_HOME}/bin/compute-sanitizer"
COMPUTE_SANITIZER_VERSION=$(${COMPUTE_SANITIZER} --version | grep -Eo "[0-9]{4}\.[0-9]\.[0-9]" | sed -e 's/\.//g')
SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck --error-exitcode=1"
SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck --error-exitcode=1 --report-api-errors=no"
if [[ "$COMPUTE_SANITIZER_VERSION" -ge 202111 ]]; then
SANITIZER_CMD="${SANITIZER_CMD} --padding=32"
fi
echo "CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER=1" >> $GITHUB_ENV
else
SANITIZER_CMD=""
fi
Expand Down
5 changes: 0 additions & 5 deletions cuda_bindings/docs/source/environment_variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,3 @@
## Runtime Environment Variables

- `CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM` : When set to 1, the default stream is the per-thread default stream. When set to 0, the default stream is the legacy default stream. This defaults to 0, for the legacy default stream. See [Stream Synchronization Behavior](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html) for an explanation of the legacy and per-thread default streams.


## Test-Time Environment Variables

- `CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER` : When set to 1, tests are skipped that would cause [compute-sanitizer](https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html) to raise an error.
6 changes: 0 additions & 6 deletions cuda_bindings/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
# Copyright 2025 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

import os

import pytest

skipif_testing_with_compute_sanitizer = pytest.mark.skipif(
os.environ.get("CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER", "0") == "1",
reason="The compute-sanitizer is running, and this test causes an API error.",
)


def pytest_configure(config):
config.custom_info = []
Expand Down
8 changes: 0 additions & 8 deletions cuda_bindings/tests/test_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import numpy as np
import pytest
from conftest import skipif_testing_with_compute_sanitizer

import cuda.cuda as cuda
import cuda.cudart as cudart
Expand Down Expand Up @@ -80,7 +79,6 @@ def test_cuda_memcpy():
assert err == cuda.CUresult.CUDA_SUCCESS


@skipif_testing_with_compute_sanitizer
def test_cuda_array():
(err,) = cuda.cuInit(0)
assert err == cuda.CUresult.CUDA_SUCCESS
Expand Down Expand Up @@ -234,7 +232,6 @@ def test_cuda_uuid_list_access():
assert err == cuda.CUresult.CUDA_SUCCESS


@skipif_testing_with_compute_sanitizer
def test_cuda_cuModuleLoadDataEx():
(err,) = cuda.cuInit(0)
assert err == cuda.CUresult.CUDA_SUCCESS
Expand Down Expand Up @@ -622,7 +619,6 @@ def test_cuda_coredump_attr():
assert err == cuda.CUresult.CUDA_SUCCESS


@skipif_testing_with_compute_sanitizer
def test_get_error_name_and_string():
(err,) = cuda.cuInit(0)
assert err == cuda.CUresult.CUDA_SUCCESS
Expand Down Expand Up @@ -952,7 +948,6 @@ def test_CUmemDecompressParams_st():
assert int(desc.dstActBytes) == 0


@skipif_testing_with_compute_sanitizer
def test_all_CUresult_codes():
max_code = int(max(cuda.CUresult))
# Smoke test. CUDA_ERROR_UNKNOWN = 999, but intentionally using literal value.
Expand Down Expand Up @@ -985,21 +980,18 @@ def test_all_CUresult_codes():
assert num_good >= 76 # CTK 11.0.3_450.51.06


@skipif_testing_with_compute_sanitizer
def test_cuKernelGetName_failure():
err, name = cuda.cuKernelGetName(0)
assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE
assert name is None


@skipif_testing_with_compute_sanitizer
def test_cuFuncGetName_failure():
err, name = cuda.cuFuncGetName(0)
assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE
assert name is None


@skipif_testing_with_compute_sanitizer
@pytest.mark.skipif(
driverVersionLessThan(12080) or not supportsCudaAPI("cuCheckpointProcessGetState"),
reason="When API was introduced",
Expand Down
2 changes: 0 additions & 2 deletions cuda_bindings/tests/test_cudart.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import numpy as np
import pytest
from conftest import skipif_testing_with_compute_sanitizer

import cuda.cuda as cuda
import cuda.cudart as cudart
Expand Down Expand Up @@ -67,7 +66,6 @@ def test_cudart_memcpy():
assertSuccess(err)


@skipif_testing_with_compute_sanitizer
def test_cudart_hostRegister():
# Use hostRegister API to check for correct enum return values
page_size = 80
Expand Down
6 changes: 0 additions & 6 deletions cuda_core/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,5 @@ def pop_all_contexts():
return pop_all_contexts


skipif_testing_with_compute_sanitizer = pytest.mark.skipif(
os.environ.get("CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER", "0") == "1",
reason="The compute-sanitizer is running, and this test causes an API error.",
)


# TODO: make the fixture more sophisticated using path finder
skipif_need_cuda_headers = pytest.mark.skipif(os.environ.get("CUDA_PATH") is None, reason="need CUDA header")
3 changes: 0 additions & 3 deletions cuda_core/tests/test_cuda_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

import pytest
from conftest import skipif_testing_with_compute_sanitizer

from cuda.bindings import driver, runtime
from cuda.core.experimental._utils import cuda_utils
Expand Down Expand Up @@ -41,8 +40,6 @@ def test_runtime_cuda_error_explanations_health():
assert not extra_expl


# this test causes an API error when the driver is too old to know about all of the error codes
@skipif_testing_with_compute_sanitizer
def test_check_driver_error():
num_unexpected = 0
for error in driver.CUresult:
Expand Down
5 changes: 1 addition & 4 deletions cuda_core/tests/test_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np
import pytest
from conftest import skipif_need_cuda_headers, skipif_testing_with_compute_sanitizer
from conftest import skipif_need_cuda_headers

import cuda.core.experimental
from cuda.core.experimental import Device, EventOptions, LaunchConfig, Program, ProgramOptions, launch
Expand Down Expand Up @@ -71,7 +71,6 @@ def test_is_done(init_cuda):
assert event.is_done in (True, False)


@skipif_testing_with_compute_sanitizer
def test_error_timing_disabled():
device = Device()
device.set_current()
Expand All @@ -94,7 +93,6 @@ def test_error_timing_disabled():
event2 - event1


@skipif_testing_with_compute_sanitizer
def test_error_timing_recorded():
device = Device()
device.set_current()
Expand All @@ -114,7 +112,6 @@ def test_error_timing_recorded():
event3 - event2


@skipif_testing_with_compute_sanitizer
@skipif_need_cuda_headers # libcu++
@pytest.mark.skipif(tuple(int(i) for i in np.__version__.split(".")[:2]) < (2, 1), reason="need numpy 2.1.0+")
def test_error_timing_incomplete():
Expand Down
3 changes: 0 additions & 3 deletions cuda_core/tests/test_linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

import pytest
from conftest import skipif_testing_with_compute_sanitizer

from cuda.core.experimental import Device, Linker, LinkerOptions, Program, ProgramOptions, _linker
from cuda.core.experimental._module import ObjectCode
Expand Down Expand Up @@ -145,8 +144,6 @@ def test_linker_link_invalid_target_type(compile_ptx_functions):
linker.link("invalid_target")


# this test causes an API error when using the culink API
@skipif_testing_with_compute_sanitizer
def test_linker_get_error_log(compile_ptx_functions):
options = LinkerOptions(name="ABC", arch=ARCH)

Expand Down
4 changes: 0 additions & 4 deletions cuda_core/tests/test_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import warnings

import pytest
from conftest import skipif_testing_with_compute_sanitizer

import cuda.core.experimental
from cuda.core.experimental import Device, ObjectCode, Program, ProgramOptions, system
Expand Down Expand Up @@ -181,7 +180,6 @@ def test_object_code_handle(get_saxpy_object_code):
assert mod.handle is not None


@skipif_testing_with_compute_sanitizer
def test_saxpy_arguments(get_saxpy_kernel, cuda12_prerequisite_check):
if not cuda12_prerequisite_check:
pytest.skip("Test requires CUDA 12")
Expand Down Expand Up @@ -212,7 +210,6 @@ class ExpectedStruct(ctypes.Structure):
assert all(actual == expected for actual, expected in zip(sizes, expected_sizes))


@skipif_testing_with_compute_sanitizer
@pytest.mark.parametrize("nargs", [0, 1, 2, 3, 16])
@pytest.mark.parametrize("c_type_name,c_type", [("int", ctypes.c_int), ("short", ctypes.c_short)], ids=["int", "short"])
def test_num_arguments(init_cuda, nargs, c_type_name, c_type, cuda12_prerequisite_check):
Expand All @@ -238,7 +235,6 @@ class ExpectedStruct(ctypes.Structure):
assert all([actual.size == expected.size for actual, expected in zip(arg_info, members)])


@skipif_testing_with_compute_sanitizer
def test_num_args_error_handling(deinit_all_contexts_function, cuda12_prerequisite_check):
if not cuda12_prerequisite_check:
pytest.skip("Test requires CUDA 12")
Expand Down