Skip to content

Fix nvbugpro 5348750 #725

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test-wheel-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
- { ARCH: ${ARCH}, PY_VER: '3.10', CUDA_VER: '12.9.0', LOCAL_CTK: '1', GPU: ${gpu}, DRIVER: 'latest' }
- { ARCH: ${ARCH}, PY_VER: '3.11', CUDA_VER: '11.8.0', LOCAL_CTK: '1', GPU: ${gpu}, DRIVER: 'latest' }
- { ARCH: ${ARCH}, PY_VER: '3.11', CUDA_VER: '12.9.0', LOCAL_CTK: '0', GPU: ${gpu}, DRIVER: 'latest' }
- { ARCH: ${ARCH}, PY_VER: '3.12', CUDA_VER: '12.0.1', LOCAL_CTK: '1', GPU: ${gpu}, DRIVER: 'latest' }
- { ARCH: ${ARCH}, PY_VER: '3.12', CUDA_VER: '12.0.1', LOCAL_CTK: '1', GPU: ${gpu}, DRIVER: 'earliest' }
- { ARCH: ${ARCH}, PY_VER: '3.12', CUDA_VER: '12.9.0', LOCAL_CTK: '1', GPU: ${gpu}, DRIVER: 'latest' }
- { ARCH: ${ARCH}, PY_VER: '3.13', CUDA_VER: '11.8.0', LOCAL_CTK: '0', GPU: ${gpu}, DRIVER: 'latest' }
- { ARCH: ${ARCH}, PY_VER: '3.13', CUDA_VER: '12.0.1', LOCAL_CTK: '1', GPU: ${gpu}, DRIVER: 'latest' }
Expand Down
2 changes: 2 additions & 0 deletions cuda_bindings/tests/test_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,12 +980,14 @@ def test_all_CUresult_codes():
assert num_good >= 76 # CTK 11.0.3_450.51.06


@pytest.mark.skipif(driverVersionLessThan(12030), reason="Driver too old for cuKernelGetName")
def test_cuKernelGetName_failure():
err, name = cuda.cuKernelGetName(0)
assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE
assert name is None


@pytest.mark.skipif(driverVersionLessThan(12030), reason="Driver too old for cuFuncGetName")
def test_cuFuncGetName_failure():
err, name = cuda.cuFuncGetName(0)
assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE
Expand Down
8 changes: 7 additions & 1 deletion cuda_core/cuda/core/experimental/_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,13 @@ def _lazy_init():
"data": driver.cuLibraryLoadData,
"kernel": driver.cuLibraryGetKernel,
"attribute": driver.cuKernelGetAttribute,
"paraminfo": driver.cuKernelGetParamInfo,
}
_kernel_ctypes = (driver.CUfunction, driver.CUkernel)
else:
_kernel_ctypes = (driver.CUfunction,)
_driver_ver = handle_return(driver.cuDriverGetVersion())
if _py_major_ver >= 12 and _driver_ver >= 12040:
_backend["new"]["paraminfo"] = driver.cuKernelGetParamInfo
_inited = True


Expand Down Expand Up @@ -391,6 +392,11 @@ def _get_arguments_info(self, param_info=False) -> tuple[int, list[ParamInfo]]:
attr_impl = self.attributes
if attr_impl._backend_version != "new":
raise NotImplementedError("New backend is required")
if "paraminfo" not in attr_impl._loader:
raise NotImplementedError(
"Driver version 12.4 or newer is required for this function. "
f"Using driver version {_driver_ver // 1000}.{(_driver_ver % 1000) // 10}"
)
arg_pos = 0
param_info_data = []
while True:
Expand Down
27 changes: 16 additions & 11 deletions cuda_core/tests/test_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@


@pytest.fixture(scope="module")
def cuda12_prerequisite_check():
def cuda12_4_prerequisite_check():
# binding availability depends on cuda-python version
# and version of underlying CUDA toolkit
_py_major_ver, _ = get_binding_version()
_driver_ver = handle_return(driver.cuDriverGetVersion())
return _py_major_ver >= 12 and _driver_ver >= 12000
return _py_major_ver >= 12 and _driver_ver >= 12040


def test_kernel_attributes_init_disabled():
Expand Down Expand Up @@ -180,12 +180,15 @@ def test_object_code_handle(get_saxpy_object_code):
assert mod.handle is not None


def test_saxpy_arguments(get_saxpy_kernel, cuda12_prerequisite_check):
if not cuda12_prerequisite_check:
pytest.skip("Test requires CUDA 12")
def test_saxpy_arguments(get_saxpy_kernel, cuda12_4_prerequisite_check):
krn, _ = get_saxpy_kernel

assert krn.num_arguments == 5
if cuda12_4_prerequisite_check:
assert krn.num_arguments == 5
else:
with pytest.raises(NotImplementedError):
_ = krn.num_arguments
return

assert "ParamInfo" in str(type(krn).arguments_info.fget.__annotations__)
arg_info = krn.arguments_info
Expand All @@ -212,8 +215,8 @@ class ExpectedStruct(ctypes.Structure):

@pytest.mark.parametrize("nargs", [0, 1, 2, 3, 16])
@pytest.mark.parametrize("c_type_name,c_type", [("int", ctypes.c_int), ("short", ctypes.c_short)], ids=["int", "short"])
def test_num_arguments(init_cuda, nargs, c_type_name, c_type, cuda12_prerequisite_check):
if not cuda12_prerequisite_check:
def test_num_arguments(init_cuda, nargs, c_type_name, c_type, cuda12_4_prerequisite_check):
if not cuda12_4_prerequisite_check:
pytest.skip("Test requires CUDA 12")
args_str = ", ".join([f"{c_type_name} p_{i}" for i in range(nargs)])
src = f"__global__ void foo{nargs}({args_str}) {{ }}"
Expand All @@ -235,8 +238,8 @@ class ExpectedStruct(ctypes.Structure):
assert all([actual.size == expected.size for actual, expected in zip(arg_info, members)])


def test_num_args_error_handling(deinit_all_contexts_function, cuda12_prerequisite_check):
if not cuda12_prerequisite_check:
def test_num_args_error_handling(deinit_all_contexts_function, cuda12_4_prerequisite_check):
if not cuda12_4_prerequisite_check:
pytest.skip("Test requires CUDA 12")
src = "__global__ void foo(int a) { }"
prog = Program(src, code_type="c++")
Expand Down Expand Up @@ -341,7 +344,7 @@ def test_occupancy_available_dynamic_shared_memory_per_block(get_saxpy_kernel, n
def test_occupancy_max_active_clusters(get_saxpy_kernel, cluster):
kernel, _ = get_saxpy_kernel
dev = Device()
if (cluster) and (dev.compute_capability < (9, 0)):
if dev.compute_capability < (9, 0):
pytest.skip("Device with compute capability 90 or higher is required for cluster support")
launch_config = cuda.core.experimental.LaunchConfig(grid=128, block=64, cluster=cluster)
query_fn = kernel.occupancy.max_active_clusters
Expand All @@ -356,6 +359,8 @@ def test_occupancy_max_active_clusters(get_saxpy_kernel, cluster):
def test_occupancy_max_potential_cluster_size(get_saxpy_kernel):
kernel, _ = get_saxpy_kernel
dev = Device()
if dev.compute_capability < (9, 0):
pytest.skip("Device with compute capability 90 or higher is required for cluster support")
launch_config = cuda.core.experimental.LaunchConfig(grid=128, block=64)
query_fn = kernel.occupancy.max_potential_cluster_size
max_potential_cluster_size = query_fn(launch_config)
Expand Down
Loading