diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 05011504f..59c1ae43b 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -69,7 +69,7 @@ jobs: - { ARCH: ${ARCH}, PY_VER: '3.10', CUDA_VER: '12.9.0', LOCAL_CTK: '1', GPU: ${gpu}, DRIVER: 'latest' } - { ARCH: ${ARCH}, PY_VER: '3.11', CUDA_VER: '11.8.0', LOCAL_CTK: '1', GPU: ${gpu}, DRIVER: 'latest' } - { ARCH: ${ARCH}, PY_VER: '3.11', CUDA_VER: '12.9.0', LOCAL_CTK: '0', GPU: ${gpu}, DRIVER: 'latest' } - - { ARCH: ${ARCH}, PY_VER: '3.12', CUDA_VER: '12.0.1', LOCAL_CTK: '1', GPU: ${gpu}, DRIVER: 'latest' } + - { ARCH: ${ARCH}, PY_VER: '3.12', CUDA_VER: '12.0.1', LOCAL_CTK: '1', GPU: ${gpu}, DRIVER: 'earliest' } - { ARCH: ${ARCH}, PY_VER: '3.12', CUDA_VER: '12.9.0', LOCAL_CTK: '1', GPU: ${gpu}, DRIVER: 'latest' } - { ARCH: ${ARCH}, PY_VER: '3.13', CUDA_VER: '11.8.0', LOCAL_CTK: '0', GPU: ${gpu}, DRIVER: 'latest' } - { ARCH: ${ARCH}, PY_VER: '3.13', CUDA_VER: '12.0.1', LOCAL_CTK: '1', GPU: ${gpu}, DRIVER: 'latest' } diff --git a/cuda_bindings/tests/test_cuda.py b/cuda_bindings/tests/test_cuda.py index a6ba12099..3532c79b4 100644 --- a/cuda_bindings/tests/test_cuda.py +++ b/cuda_bindings/tests/test_cuda.py @@ -980,12 +980,14 @@ def test_all_CUresult_codes(): assert num_good >= 76 # CTK 11.0.3_450.51.06 +@pytest.mark.skipif(driverVersionLessThan(12030), reason="Driver too old for cuKernelGetName") def test_cuKernelGetName_failure(): err, name = cuda.cuKernelGetName(0) assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE assert name is None +@pytest.mark.skipif(driverVersionLessThan(12030), reason="Driver too old for cuFuncGetName") def test_cuFuncGetName_failure(): err, name = cuda.cuFuncGetName(0) assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 8bfc74e4a..56b2a83a2 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -46,12 +46,13 @@ def _lazy_init(): "data": driver.cuLibraryLoadData, "kernel": driver.cuLibraryGetKernel, "attribute": driver.cuKernelGetAttribute, - "paraminfo": driver.cuKernelGetParamInfo, } _kernel_ctypes = (driver.CUfunction, driver.CUkernel) else: _kernel_ctypes = (driver.CUfunction,) _driver_ver = handle_return(driver.cuDriverGetVersion()) + if _py_major_ver >= 12 and _driver_ver >= 12040: + _backend["new"]["paraminfo"] = driver.cuKernelGetParamInfo _inited = True @@ -391,6 +392,11 @@ def _get_arguments_info(self, param_info=False) -> tuple[int, list[ParamInfo]]: attr_impl = self.attributes if attr_impl._backend_version != "new": raise NotImplementedError("New backend is required") + if "paraminfo" not in attr_impl._loader: + raise NotImplementedError( + "Driver version 12.4 or newer is required for this function. " + f"Using driver version {_driver_ver // 1000}.{(_driver_ver % 1000) // 10}" + ) arg_pos = 0 param_info_data = [] while True: diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 24e326bca..9fcf951b5 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -32,12 +32,12 @@ @pytest.fixture(scope="module") -def cuda12_prerequisite_check(): +def cuda12_4_prerequisite_check(): # binding availability depends on cuda-python version # and version of underlying CUDA toolkit _py_major_ver, _ = get_binding_version() _driver_ver = handle_return(driver.cuDriverGetVersion()) - return _py_major_ver >= 12 and _driver_ver >= 12000 + return _py_major_ver >= 12 and _driver_ver >= 12040 def test_kernel_attributes_init_disabled(): @@ -180,12 +180,15 @@ def test_object_code_handle(get_saxpy_object_code): assert mod.handle is not None -def test_saxpy_arguments(get_saxpy_kernel, cuda12_prerequisite_check): - if not cuda12_prerequisite_check: - pytest.skip("Test requires CUDA 12") +def test_saxpy_arguments(get_saxpy_kernel, cuda12_4_prerequisite_check): krn, _ = get_saxpy_kernel - assert krn.num_arguments == 5 + if cuda12_4_prerequisite_check: + assert krn.num_arguments == 5 + else: + with pytest.raises(NotImplementedError): + _ = krn.num_arguments + return assert "ParamInfo" in str(type(krn).arguments_info.fget.__annotations__) arg_info = krn.arguments_info @@ -212,8 +215,8 @@ class ExpectedStruct(ctypes.Structure): @pytest.mark.parametrize("nargs", [0, 1, 2, 3, 16]) @pytest.mark.parametrize("c_type_name,c_type", [("int", ctypes.c_int), ("short", ctypes.c_short)], ids=["int", "short"]) -def test_num_arguments(init_cuda, nargs, c_type_name, c_type, cuda12_prerequisite_check): - if not cuda12_prerequisite_check: +def test_num_arguments(init_cuda, nargs, c_type_name, c_type, cuda12_4_prerequisite_check): + if not cuda12_4_prerequisite_check: pytest.skip("Test requires CUDA 12") args_str = ", ".join([f"{c_type_name} p_{i}" for i in range(nargs)]) src = f"__global__ void foo{nargs}({args_str}) {{ }}" @@ -235,8 +238,8 @@ class ExpectedStruct(ctypes.Structure): assert all([actual.size == expected.size for actual, expected in zip(arg_info, members)]) -def test_num_args_error_handling(deinit_all_contexts_function, cuda12_prerequisite_check): - if not cuda12_prerequisite_check: +def test_num_args_error_handling(deinit_all_contexts_function, cuda12_4_prerequisite_check): + if not cuda12_4_prerequisite_check: pytest.skip("Test requires CUDA 12") src = "__global__ void foo(int a) { }" prog = Program(src, code_type="c++") @@ -341,7 +344,7 @@ def test_occupancy_available_dynamic_shared_memory_per_block(get_saxpy_kernel, n def test_occupancy_max_active_clusters(get_saxpy_kernel, cluster): kernel, _ = get_saxpy_kernel dev = Device() - if (cluster) and (dev.compute_capability < (9, 0)): + if dev.compute_capability < (9, 0): pytest.skip("Device with compute capability 90 or higher is required for cluster support") launch_config = cuda.core.experimental.LaunchConfig(grid=128, block=64, cluster=cluster) query_fn = kernel.occupancy.max_active_clusters @@ -356,6 +359,8 @@ def test_occupancy_max_active_clusters(get_saxpy_kernel, cluster): def test_occupancy_max_potential_cluster_size(get_saxpy_kernel): kernel, _ = get_saxpy_kernel dev = Device() + if dev.compute_capability < (9, 0): + pytest.skip("Device with compute capability 90 or higher is required for cluster support") launch_config = cuda.core.experimental.LaunchConfig(grid=128, block=64) query_fn = kernel.occupancy.max_potential_cluster_size max_potential_cluster_size = query_fn(launch_config)