diff --git a/.gitignore b/.gitignore index de32950bc..656dd8916 100644 --- a/.gitignore +++ b/.gitignore @@ -48,6 +48,7 @@ cuda_bindings/cuda/bindings/runtime.pxd cuda_bindings/cuda/bindings/runtime.pyx cuda_bindings/cuda/bindings/nvrtc.pxd cuda_bindings/cuda/bindings/nvrtc.pyx +cuda_bindings/cuda/bindings/utils/_get_handle.pyx # Distribution / packaging .Python @@ -181,4 +182,4 @@ dmypy.json cython_debug/ # Dont ignore -!.github/actions/build/ \ No newline at end of file +!.github/actions/build/ diff --git a/cuda_bindings/cuda/bindings/utils/__init__.pxd b/cuda_bindings/cuda/bindings/utils/__init__.pxd new file mode 100644 index 000000000..e69de29bb diff --git a/cuda_bindings/cuda/bindings/utils/__init__.py b/cuda_bindings/cuda/bindings/utils/__init__.py index 4cb128415..ab13d004c 100644 --- a/cuda_bindings/cuda/bindings/utils/__init__.py +++ b/cuda_bindings/cuda/bindings/utils/__init__.py @@ -1,4 +1,5 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +from ._get_handle import get_cuda_native_handle from ._ptx_utils import get_minimal_required_cuda_ver_from_ptx_ver, get_ptx_ver diff --git a/cuda_bindings/cuda/bindings/utils/_get_handle.pyx.in b/cuda_bindings/cuda/bindings/utils/_get_handle.pyx.in new file mode 100644 index 000000000..2d40133db --- /dev/null +++ b/cuda_bindings/cuda/bindings/utils/_get_handle.pyx.in @@ -0,0 +1,230 @@ +# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +# This code was automatically generated with version 12.9.0. Do not modify it directly. + +from libc.stdint cimport uintptr_t +cimport cython + +from cuda.bindings cimport driver, runtime, cydriver, cyruntime + + +cdef dict _handle_getters = None + +@cython.embedsignature(True) +def get_cuda_native_handle(obj) -> int: + """ Returns the address of the provided CUDA Python object as Python int. + + Parameters + ---------- + obj : Any + CUDA Python object + + Returns + ------- + int : The object address. + """ + global _handle_getters + obj_type = type(obj) + if _handle_getters is None: + _handle_getters = dict() + {{if 'CUcontext' in found_types}} + def CUcontext_getter(driver.CUcontext x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUcontext] = CUcontext_getter + {{endif}} + {{if 'CUmodule' in found_types}} + def CUmodule_getter(driver.CUmodule x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUmodule] = CUmodule_getter + {{endif}} + {{if 'CUfunction' in found_types}} + def CUfunction_getter(driver.CUfunction x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUfunction] = CUfunction_getter + {{endif}} + {{if 'CUlibrary' in found_types}} + def CUlibrary_getter(driver.CUlibrary x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUlibrary] = CUlibrary_getter + {{endif}} + {{if 'CUkernel' in found_types}} + def CUkernel_getter(driver.CUkernel x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUkernel] = CUkernel_getter + {{endif}} + {{if 'CUarray' in found_types}} + def CUarray_getter(driver.CUarray x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUarray] = CUarray_getter + {{endif}} + {{if 'CUmipmappedArray' in found_types}} + def CUmipmappedArray_getter(driver.CUmipmappedArray x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUmipmappedArray] = CUmipmappedArray_getter + {{endif}} + {{if 'CUtexref' in found_types}} + def CUtexref_getter(driver.CUtexref x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUtexref] = CUtexref_getter + {{endif}} + {{if 'CUsurfref' in found_types}} + def CUsurfref_getter(driver.CUsurfref x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUsurfref] = CUsurfref_getter + {{endif}} + {{if 'CUevent' in found_types}} + def CUevent_getter(driver.CUevent x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUevent] = CUevent_getter + {{endif}} + {{if 'CUstream' in found_types}} + def CUstream_getter(driver.CUstream x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUstream] = CUstream_getter + {{endif}} + {{if 'CUgraphicsResource' in found_types}} + def CUgraphicsResource_getter(driver.CUgraphicsResource x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUgraphicsResource] = CUgraphicsResource_getter + {{endif}} + {{if 'CUexternalMemory' in found_types}} + def CUexternalMemory_getter(driver.CUexternalMemory x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUexternalMemory] = CUexternalMemory_getter + {{endif}} + {{if 'CUexternalSemaphore' in found_types}} + def CUexternalSemaphore_getter(driver.CUexternalSemaphore x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUexternalSemaphore] = CUexternalSemaphore_getter + {{endif}} + {{if 'CUgraph' in found_types}} + def CUgraph_getter(driver.CUgraph x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUgraph] = CUgraph_getter + {{endif}} + {{if 'CUgraphNode' in found_types}} + def CUgraphNode_getter(driver.CUgraphNode x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUgraphNode] = CUgraphNode_getter + {{endif}} + {{if 'CUgraphExec' in found_types}} + def CUgraphExec_getter(driver.CUgraphExec x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUgraphExec] = CUgraphExec_getter + {{endif}} + {{if 'CUmemoryPool' in found_types}} + def CUmemoryPool_getter(driver.CUmemoryPool x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUmemoryPool] = CUmemoryPool_getter + {{endif}} + {{if 'CUuserObject' in found_types}} + def CUuserObject_getter(driver.CUuserObject x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUuserObject] = CUuserObject_getter + {{endif}} + {{if 'CUgraphDeviceNode' in found_types}} + def CUgraphDeviceNode_getter(driver.CUgraphDeviceNode x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUgraphDeviceNode] = CUgraphDeviceNode_getter + {{endif}} + {{if 'CUasyncCallbackHandle' in found_types}} + def CUasyncCallbackHandle_getter(driver.CUasyncCallbackHandle x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUasyncCallbackHandle] = CUasyncCallbackHandle_getter + {{endif}} + {{if 'CUgreenCtx' in found_types}} + def CUgreenCtx_getter(driver.CUgreenCtx x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUgreenCtx] = CUgreenCtx_getter + {{endif}} + {{if 'CUlinkState' in found_types}} + def CUlinkState_getter(driver.CUlinkState x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUlinkState] = CUlinkState_getter + {{endif}} + {{if 'CUdevResourceDesc' in found_types}} + def CUdevResourceDesc_getter(driver.CUdevResourceDesc x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUdevResourceDesc] = CUdevResourceDesc_getter + {{endif}} + {{if 'CUlogsCallbackHandle' in found_types}} + def CUlogsCallbackHandle_getter(driver.CUlogsCallbackHandle x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUlogsCallbackHandle] = CUlogsCallbackHandle_getter + {{endif}} + {{if True}} + def CUeglStreamConnection_getter(driver.CUeglStreamConnection x): return (x._pvt_ptr[0]) + _handle_getters[driver.CUeglStreamConnection] = CUeglStreamConnection_getter + {{endif}} + {{if True}} + def EGLImageKHR_getter(runtime.EGLImageKHR x): return (x._pvt_ptr[0]) + _handle_getters[runtime.EGLImageKHR] = EGLImageKHR_getter + {{endif}} + {{if True}} + def EGLStreamKHR_getter(runtime.EGLStreamKHR x): return (x._pvt_ptr[0]) + _handle_getters[runtime.EGLStreamKHR] = EGLStreamKHR_getter + {{endif}} + {{if True}} + def EGLSyncKHR_getter(runtime.EGLSyncKHR x): return (x._pvt_ptr[0]) + _handle_getters[runtime.EGLSyncKHR] = EGLSyncKHR_getter + {{endif}} + {{if 'cudaArray_t' in found_types}} + def cudaArray_t_getter(runtime.cudaArray_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaArray_t] = cudaArray_t_getter + {{endif}} + {{if 'cudaArray_const_t' in found_types}} + def cudaArray_const_t_getter(runtime.cudaArray_const_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaArray_const_t] = cudaArray_const_t_getter + {{endif}} + {{if 'cudaMipmappedArray_t' in found_types}} + def cudaMipmappedArray_t_getter(runtime.cudaMipmappedArray_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaMipmappedArray_t] = cudaMipmappedArray_t_getter + {{endif}} + {{if 'cudaMipmappedArray_const_t' in found_types}} + def cudaMipmappedArray_const_t_getter(runtime.cudaMipmappedArray_const_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaMipmappedArray_const_t] = cudaMipmappedArray_const_t_getter + {{endif}} + {{if 'cudaStream_t' in found_types}} + def cudaStream_t_getter(runtime.cudaStream_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaStream_t] = cudaStream_t_getter + {{endif}} + {{if 'cudaEvent_t' in found_types}} + def cudaEvent_t_getter(runtime.cudaEvent_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaEvent_t] = cudaEvent_t_getter + {{endif}} + {{if 'cudaGraphicsResource_t' in found_types}} + def cudaGraphicsResource_t_getter(runtime.cudaGraphicsResource_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaGraphicsResource_t] = cudaGraphicsResource_t_getter + {{endif}} + {{if 'cudaExternalMemory_t' in found_types}} + def cudaExternalMemory_t_getter(runtime.cudaExternalMemory_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaExternalMemory_t] = cudaExternalMemory_t_getter + {{endif}} + {{if 'cudaExternalSemaphore_t' in found_types}} + def cudaExternalSemaphore_t_getter(runtime.cudaExternalSemaphore_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaExternalSemaphore_t] = cudaExternalSemaphore_t_getter + {{endif}} + {{if 'cudaGraph_t' in found_types}} + def cudaGraph_t_getter(runtime.cudaGraph_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaGraph_t] = cudaGraph_t_getter + {{endif}} + {{if 'cudaGraphNode_t' in found_types}} + def cudaGraphNode_t_getter(runtime.cudaGraphNode_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaGraphNode_t] = cudaGraphNode_t_getter + {{endif}} + {{if 'cudaUserObject_t' in found_types}} + def cudaUserObject_t_getter(runtime.cudaUserObject_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaUserObject_t] = cudaUserObject_t_getter + {{endif}} + {{if 'cudaFunction_t' in found_types}} + def cudaFunction_t_getter(runtime.cudaFunction_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaFunction_t] = cudaFunction_t_getter + {{endif}} + {{if 'cudaKernel_t' in found_types}} + def cudaKernel_t_getter(runtime.cudaKernel_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaKernel_t] = cudaKernel_t_getter + {{endif}} + {{if 'cudaLibrary_t' in found_types}} + def cudaLibrary_t_getter(runtime.cudaLibrary_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaLibrary_t] = cudaLibrary_t_getter + {{endif}} + {{if 'cudaMemPool_t' in found_types}} + def cudaMemPool_t_getter(runtime.cudaMemPool_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaMemPool_t] = cudaMemPool_t_getter + {{endif}} + {{if 'cudaGraphExec_t' in found_types}} + def cudaGraphExec_t_getter(runtime.cudaGraphExec_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaGraphExec_t] = cudaGraphExec_t_getter + {{endif}} + {{if 'cudaGraphDeviceNode_t' in found_types}} + def cudaGraphDeviceNode_t_getter(runtime.cudaGraphDeviceNode_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaGraphDeviceNode_t] = cudaGraphDeviceNode_t_getter + {{endif}} + {{if 'cudaAsyncCallbackHandle_t' in found_types}} + def cudaAsyncCallbackHandle_t_getter(runtime.cudaAsyncCallbackHandle_t x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaAsyncCallbackHandle_t] = cudaAsyncCallbackHandle_t_getter + {{endif}} + {{if True}} + def cudaEglStreamConnection_getter(runtime.cudaEglStreamConnection x): return (x._pvt_ptr[0]) + _handle_getters[runtime.cudaEglStreamConnection] = cudaEglStreamConnection_getter + {{endif}} + try: + return _handle_getters[obj_type](obj) + except KeyError: + raise TypeError("Unknown type: " + str(obj_type)) from None \ No newline at end of file diff --git a/cuda_bindings/docs/source/module/utils.rst b/cuda_bindings/docs/source/module/utils.rst index 534437dfe..e720b0979 100644 --- a/cuda_bindings/docs/source/module/utils.rst +++ b/cuda_bindings/docs/source/module/utils.rst @@ -3,8 +3,8 @@ .. module:: cuda.bindings.utils -Utils module -============ +utils +===== Functions --------- @@ -12,5 +12,6 @@ Functions .. autosummary:: :toctree: generated/ + get_cuda_native_handle get_minimal_required_cuda_ver_from_ptx_ver get_ptx_ver diff --git a/cuda_bindings/docs/source/release/12.X.Y-notes.rst b/cuda_bindings/docs/source/release/12.X.Y-notes.rst index a6da59185..eb45c3930 100644 --- a/cuda_bindings/docs/source/release/12.X.Y-notes.rst +++ b/cuda_bindings/docs/source/release/12.X.Y-notes.rst @@ -12,6 +12,12 @@ Released on MM DD, 2025 Highlights ---------- +* A utility module :mod:`cuda.bindings.utils` is added + + * Using ``int(cuda_obj)`` to retrieve the underlying address of a CUDA object is deprecated and + subject to future removal. Please switch to use :func:`~cuda.bindings.utils.get_cuda_native_handle` + instead. + * The ``cuda.bindings.cufile`` Python module was added, wrapping the `cuFile C APIs `_. Supported on Linux only. diff --git a/cuda_bindings/docs/source/tips_and_tricks.rst b/cuda_bindings/docs/source/tips_and_tricks.rst index 86b9e5864..97f585f9b 100644 --- a/cuda_bindings/docs/source/tips_and_tricks.rst +++ b/cuda_bindings/docs/source/tips_and_tricks.rst @@ -11,6 +11,12 @@ All CUDA C types are exposed to Python as Python classes. For example, the :clas There is an important distinction between the ``getPtr()`` method and the behaviour of ``__int__()``. Since a ``CUstream`` is itself just a pointer, calling ``instance_of_CUstream.getPtr()`` returns the pointer *to* the pointer, instead of the value of the ``CUstream`` C object that is the pointer to the underlying stream handle. ``int(instance_of_CUstream)`` returns the value of the ``CUstream`` converted to a Python int and is the actual address of the underlying handle. +.. warning:: + + Using ``int(cuda_obj)`` to retrieve the underlying address of a CUDA object is deprecated and + subject to future removal. Please switch to use :func:`~cuda.bindings.utils.get_cuda_native_handle` + instead. + Lifetime management of the CUDA objects ======================================= diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index 50f573a37..409c48eda 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -221,6 +221,7 @@ def generate_output(infile, local): os.path.join("cuda", "bindings", "_lib"), os.path.join("cuda", "bindings", "_lib", "cyruntime"), os.path.join("cuda", "bindings", "_internal"), + os.path.join("cuda", "bindings", "utils"), ] input_files = [] for path in path_list: @@ -287,6 +288,7 @@ def prep_extensions(sources, libraries): # new path for the bindings from cybind def rename_architecture_specific_files(): + path = os.path.join("cuda", "bindings", "_internal") if sys.platform == "linux": src_files = glob.glob(os.path.join(path, "*_linux.pyx")) elif sys.platform == "win32": @@ -341,6 +343,7 @@ def do_cythonize(extensions): (["cuda/bindings/_lib/utils.pyx", "cuda/bindings/_lib/param_packer.cpp"], None), (["cuda/bindings/_lib/cyruntime/cyruntime.pyx"], None), (["cuda/bindings/_lib/cyruntime/utils.pyx"], None), + (["cuda/bindings/utils/*.pyx"], None), # public *(([f], None) for f in cuda_bindings_files), # public (deprecated, to be removed) diff --git a/cuda_bindings/tests/test_utils.py b/cuda_bindings/tests/test_utils.py index b0c228f44..3da7272ed 100644 --- a/cuda_bindings/tests/test_utils.py +++ b/cuda_bindings/tests/test_utils.py @@ -1,9 +1,12 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +import random + import pytest -from cuda.bindings.utils import get_minimal_required_cuda_ver_from_ptx_ver, get_ptx_ver +from cuda.bindings import driver, runtime +from cuda.bindings.utils import get_cuda_native_handle, get_minimal_required_cuda_ver_from_ptx_ver, get_ptx_ver ptx_88_kernel = r""" .version 8.8 @@ -41,3 +44,46 @@ def test_ptx_utils(kernel, actual_ptx_ver, min_cuda_ver): assert ptx_ver == actual_ptx_ver cuda_ver = get_minimal_required_cuda_ver_from_ptx_ver(ptx_ver) assert cuda_ver == min_cuda_ver + + +@pytest.mark.parametrize( + "target", + ( + driver.CUcontext, + driver.CUstream, + driver.CUevent, + driver.CUmodule, + driver.CUlibrary, + driver.CUfunction, + driver.CUkernel, + driver.CUgraph, + driver.CUgraphNode, + driver.CUgraphExec, + driver.CUmemoryPool, + runtime.cudaStream_t, + runtime.cudaEvent_t, + runtime.cudaGraph_t, + runtime.cudaGraphNode_t, + runtime.cudaGraphExec_t, + runtime.cudaMemPool_t, + ), +) +def test_get_handle(target): + ptr = random.randint(1, 1024) + obj = target(ptr) + handle = get_cuda_native_handle(obj) + assert handle == ptr + + +@pytest.mark.parametrize( + "target", + ( + (1, 2, 3, 4), + [5, 6], + {}, + None, + ), +) +def test_get_handle_error(target): + with pytest.raises(TypeError) as e: + handle = get_cuda_native_handle(target)