Skip to content
Merged
Show file tree
Hide file tree
Changes from 44 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
dd8b461
[MNT] Always use cuda.core and fallback to cuda.bindings
rparolin Sep 19, 2025
137edeb
whitespace
rparolin Sep 19, 2025
5388c11
removing import flagged by ruff
rparolin Sep 19, 2025
76e85d1
Checking if nvjitlink is available.
rparolin Sep 19, 2025
025005a
Removing a conditional I missed
rparolin Sep 19, 2025
0d53835
Disabling test_conda_ctypes_binding.sh in CI
rparolin Sep 19, 2025
b4edda0
Removing file ci/test_conda_ctypes_binding.sh
rparolin Sep 19, 2025
474a1a2
Removing ci/test_wheel_ctypes_binding.sh
rparolin Sep 19, 2025
c1e11ce
Removing USE_NV_BINDING conditional
rparolin Sep 20, 2025
3f0ceb3
Restoring USE_NV_BINDING so we don't customer code.
rparolin Sep 22, 2025
f8076f5
debugging test_binary_generation.py
rparolin Sep 22, 2025
6782a6f
wip
rparolin Sep 22, 2025
dedfc5b
merge main
rparolin Oct 1, 2025
84ee39b
remove unneeded import
rparolin Oct 1, 2025
eae0303
ci fix
rparolin Oct 1, 2025
adf5fc1
whitespace
rparolin Oct 1, 2025
c2b20c6
Update numba_cuda/numba/cuda/cudadrv/nvrtc.py
rparolin Oct 8, 2025
62b8543
Update numba_cuda/numba/cuda/cudadrv/nvrtc.py
rparolin Oct 8, 2025
cd8a058
Update numba_cuda/numba/cuda/cudadrv/nvrtc.py
rparolin Oct 8, 2025
028ec59
merge main
rparolin Oct 8, 2025
8dc1292
removing import guards
rparolin Oct 9, 2025
d88aaa6
adding cuda_bindings and cuda_core as explicit dependencies
rparolin Oct 9, 2025
80ae4b2
updating bindings.rst
rparolin Oct 9, 2025
871763e
updating installation.rst
rparolin Oct 9, 2025
90a1601
removing backwards compat branch
rparolin Oct 9, 2025
c736ba8
updating cuda-pathfinder to official docs url
rparolin Oct 9, 2025
a4e77fd
Removing CUDA_ENABLE_PYNVJITLINK
rparolin Oct 9, 2025
babca28
fixing linter errors
rparolin Oct 9, 2025
cf97828
plumbing through debug and lineinfo parameters
rparolin Oct 9, 2025
83f2807
reformatting
rparolin Oct 9, 2025
c845b58
Revert "reformatting"
rparolin Oct 9, 2025
27a3110
Revert "plumbing through debug and lineinfo parameters"
rparolin Oct 9, 2025
1d56c03
Fixing CI
rparolin Oct 9, 2025
2d7d302
wip
rparolin Oct 9, 2025
0613c32
wip
rparolin Oct 9, 2025
d690461
Revert "wip"
rparolin Oct 9, 2025
d93321e
Reapply "plumbing through debug and lineinfo parameters"
rparolin Oct 10, 2025
49a7911
removing unnecessary unit test
rparolin Oct 10, 2025
dbcbbaa
Update numba_cuda/numba/cuda/cudadrv/driver.py
rparolin Oct 10, 2025
2b102ed
Revert "Reapply "plumbing through debug and lineinfo parameters""
rparolin Oct 10, 2025
f8b4d8c
whitespace for formatter
rparolin Oct 10, 2025
7f98a05
Plumb through debug & lineinfo settings
rparolin Oct 13, 2025
006f7c5
merge main
rparolin Oct 13, 2025
ee2c048
adding , to silence linter
rparolin Oct 13, 2025
6d886d6
merge main
rparolin Oct 13, 2025
92b9be2
Merge branch 'main' into rparolin/always_use_cuda_core
gmarkall Oct 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 0 additions & 25 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,11 @@ jobs:
- compute-matrix
- build-conda
- test-conda
- test-conda-ctypes-binding
- test-simulator
- build-wheels
- build-wheels-windows
- test-wheels-windows
- test-wheels
- test-wheels-ctypes-binding
- test-wheels-deps-wheels
- test-thirdparty
- build-docs
Expand Down Expand Up @@ -80,18 +78,6 @@ jobs:
script: "ci/test_conda.sh"
run_codecov: false
matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
test-conda-ctypes-binding:
needs:
- build-conda
- compute-matrix
uses: ./.github/workflows/conda-python-tests.yaml
with:
build_type: pull-request
script: "ci/test_conda_ctypes_binding.sh"
run_codecov: false
# This selects "ARCH=amd64 and CUDA >=12, with the latest supported Python for each CUDA major version".
matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber >= 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
test-simulator:
needs:
- build-conda
Expand Down Expand Up @@ -124,17 +110,6 @@ jobs:
build_type: pull-request
script: "ci/test_wheel.sh false"
matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
test-wheels-ctypes-binding:
needs:
- build-wheels
- compute-matrix
uses: ./.github/workflows/wheels-test.yaml
with:
build_type: pull-request
script: "ci/test_wheel_ctypes_binding.sh"
# This selects "ARCH=amd64 and CUDA >=12, with the latest supported Python for each CUDA major version".
matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber >= 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
test-wheels-deps-wheels:
needs:
- build-wheels
Expand Down
70 changes: 0 additions & 70 deletions ci/test_conda_ctypes_binding.sh

This file was deleted.

37 changes: 0 additions & 37 deletions ci/test_wheel_ctypes_binding.sh

This file was deleted.

9 changes: 4 additions & 5 deletions docs/source/reference/envvars.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,11 @@ target.
Enable warnings if a kernel is launched with host memory which forces a copy to and
from the device. This option is on by default (default value is 1).

.. envvar:: NUMBA_CUDA_USE_NVIDIA_BINDING
.. note::

When set to 1, Numba will attempt to use the `NVIDIA CUDA Python binding
<https://nvidia.github.io/cuda-python/>`_ to make calls to the driver API
instead of using its own ctypes binding. This defaults to 1 (on). Set to
0 to use the ctypes bindings.
Numba-CUDA always uses the NVIDIA CUDA Python bindings. The legacy ctypes
bindings and the ``NUMBA_CUDA_USE_NVIDIA_BINDING`` environment variable have
been removed.

.. envvar:: NUMBA_CUDA_INCLUDE_PATH

Expand Down
35 changes: 12 additions & 23 deletions docs/source/user/bindings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,22 @@
CUDA Bindings
=============

Numba supports two bindings to the CUDA Driver APIs: its own internal bindings
based on ctypes, and the official `NVIDIA CUDA Python bindings
<https://nvidia.github.io/cuda-python/>`_. Functionality is equivalent between
the two bindings.

The internal bindings are used by default. If the NVIDIA bindings are installed,
then they can be used by setting the environment variable
``NUMBA_CUDA_USE_NVIDIA_BINDING`` to ``1`` prior to the import of Numba. Once
Numba has been imported, the selected binding cannot be changed.
Numba-CUDA uses the official `NVIDIA CUDA Python bindings
<https://nvidia.github.io/cuda-python/>`_ for all CUDA Driver interactions.
Numba-CUDA previously provided its own internal ctypes-based bindings; the
public APIs exposing those bindings are kept for compatibility, but if you
need to interact directly with the CUDA Driver or other CUDA libraries we
recommend using the `cuda-python <https://nvidia.github.io/cuda-python/>`_
package directly.


Per-Thread Default Streams
--------------------------

Responsibility for handling Per-Thread Default Streams (PTDS) is delegated to
the NVIDIA bindings when they are in use. To use PTDS with the NVIDIA bindings,
set the environment variable ``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` to
``1`` instead of Numba's environmnent variable
:envvar:`NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM`.
the NVIDIA bindings. To use PTDS, set the environment variable
``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` to ``1`` instead of Numba's
environment variable :envvar:`NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM`.

.. seealso::

Expand All @@ -35,13 +32,5 @@ set the environment variable ``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` to
Roadmap
-------

In Numba 0.56, the NVIDIA Bindings will be used by default, if they are
installed.

In future versions of Numba:

- The internal bindings will be deprecated.
- The internal bindings will be removed.

At present, no specific release is planned for the deprecation or removal of
the internal bindings.
The ctypes-based internal bindings have been removed in favor of the NVIDIA
bindings. Future work focuses on expanding usage of ``cuda.core`` APIs.
32 changes: 8 additions & 24 deletions docs/source/user/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,12 @@ Configuration
CUDA Bindings
-------------

Numba supports interacting with the CUDA Driver API via either the `NVIDIA CUDA
Python bindings <https://nvidia.github.io/cuda-python/>`_ or its own ctypes-based
bindings. Functionality is equivalent between the two binding choices. The
NVIDIA bindings are the default, and the ctypes bindings are now deprecated.

If you do not want to use the NVIDIA bindings, the (deprecated) ctypes bindings
can be enabled by setting the environment variable
:envvar:`NUMBA_CUDA_USE_NVIDIA_BINDING` to ``"0"``.
Numba-CUDA uses the `NVIDIA CUDA Python bindings <https://nvidia.github.io/cuda-python/>`_
for interacting with the CUDA Driver API. Numba-CUDA previously provided its own
internal ctypes-based bindings; the public APIs exposing those bindings are kept
for compatibility, but if you need to interact directly with the CUDA Driver or
other CUDA libraries we recommend using the `cuda-python <https://nvidia.github.io/cuda-python/cuda-pathfinder/latest/generated/cuda.pathfinder.load_nvidia_dynamic_lib.html>`_
package directly.


.. _cudatoolkit-lookup:
Expand All @@ -79,22 +77,8 @@ CUDA Driver and Toolkit search paths
Default behavior
~~~~~~~~~~~~~~~~

When using the NVIDIA bindings, searches for the CUDA driver and toolkit
libraries use its `built-in path-finding logic <https://github.com/NVIDIA/cuda-python/tree/main/cuda_bindings/cuda/bindings/_path_finder>`_.

Ctypes bindings (deprecated) behavior
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

When using the ctypes bindings, Numba searches for a CUDA toolkit installation
in the following order:

1. Conda-installed CUDA Toolkit packages
2. Pip-installed CUDA Toolkit packages
3. The environment variable ``CUDA_HOME``, which points to the directory of the
installed CUDA toolkit (i.e. ``/home/user/cuda-12``)
4. System-wide installation at exactly ``/usr/local/cuda`` on Linux platforms.
Versioned installation paths (i.e. ``/usr/local/cuda-12.0``) are intentionally
ignored. Users can use ``CUDA_HOME`` to select specific versions.
Searches for the CUDA driver and toolkit libraries use the NVIDIA bindings'
`built-in path-finding logic <https://github.com/NVIDIA/cuda-python/tree/main/cuda_bindings/cuda/bindings/_path_finder>`_.

In addition to the CUDA toolkit libraries, which can be installed by conda into
an environment or installed system-wide by the `CUDA SDK installer
Expand Down
68 changes: 9 additions & 59 deletions numba_cuda/numba/cuda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,65 +8,15 @@
import sys


# Enable pynvjitlink based on the following precedence:
# 1. Config setting "CUDA_ENABLE_PYNVJITLINK" (highest priority)
# 2. Environment variable "NUMBA_CUDA_ENABLE_PYNVJITLINK"
# 3. Auto-detection of pynvjitlink module (lowest priority)

pynvjitlink_auto_enabled = False

if getattr(config, "CUDA_ENABLE_PYNVJITLINK", None) is None:
if (
_pynvjitlink_enabled_in_env := _readenv(
"NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, None
)
) is not None:
config.CUDA_ENABLE_PYNVJITLINK = _pynvjitlink_enabled_in_env
else:
pynvjitlink_auto_enabled = (
importlib.util.find_spec("pynvjitlink") is not None
)
config.CUDA_ENABLE_PYNVJITLINK = pynvjitlink_auto_enabled

# Upstream numba sets CUDA_USE_NVIDIA_BINDING to 0 by default, so it always
# exists. Override, but not if explicitly set to 0 in the envioronment.
_nvidia_binding_enabled_in_env = _readenv(
"NUMBA_CUDA_USE_NVIDIA_BINDING", bool, None
)
if _nvidia_binding_enabled_in_env is False:
USE_NV_BINDING = False
else:
USE_NV_BINDING = True
config.CUDA_USE_NVIDIA_BINDING = USE_NV_BINDING
if config.CUDA_USE_NVIDIA_BINDING:
if not (
importlib.util.find_spec("cuda")
and importlib.util.find_spec("cuda.bindings")
):
raise ImportError(
"CUDA bindings not found. Please pip install the "
"cuda-bindings package. Alternatively, install "
"numba-cuda[cuXY], where XY is the required CUDA "
"version, to install the binding automatically. "
"If no CUDA bindings are desired, set the env var "
"NUMBA_CUDA_USE_NVIDIA_BINDING=0 to enable ctypes "
"bindings."
)

if config.CUDA_ENABLE_PYNVJITLINK:
if USE_NV_BINDING and not pynvjitlink_auto_enabled:
warnings.warn(
"Explicitly enabling pynvjitlink is no longer necessary. "
"NVIDIA bindings are enabled. cuda.core will be used "
"in place of pynvjitlink."
)
elif pynvjitlink_auto_enabled:
# Ignore the fact that pynvjitlink is enabled, because that was an
# automatic decision based on discovering pynvjitlink was present; the
# user didn't ask for it
pass
else:
raise RuntimeError("nvJitLink requires the NVIDIA CUDA bindings. ")
# Require NVIDIA CUDA bindings at import time
if not (
importlib.util.find_spec("cuda")
and importlib.util.find_spec("cuda.bindings")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be an impossible code path without actively trying to create a broken installation, so we can remove these checks.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will change this in a follow-up.

):
raise ImportError(
"NVIDIA CUDA Python bindings not found. Install the 'cuda' package "
"(e.g. pip install nvidia-cuda-python or numba-cuda[cuXY])."
)

if config.ENABLE_CUDASIM:
from .simulator_init import *
Expand Down
6 changes: 1 addition & 5 deletions numba_cuda/numba/cuda/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import contextlib
import os

from numba.cuda.cudadrv import drvapi
import numpy as np

from .cudadrv import devicearray, devices, driver
Expand Down Expand Up @@ -48,10 +47,7 @@ def from_cuda_array_interface(desc, owner=None, sync=True):
)
size = driver.memory_size_from_info(shape, strides, dtype.itemsize)

if config.CUDA_USE_NVIDIA_BINDING:
cudevptr_class = driver.binding.CUdeviceptr
else:
cudevptr_class = drvapi.cu_device_ptr
cudevptr_class = driver.binding.CUdeviceptr
devptr = cudevptr_class(desc["data"][0])
data = driver.MemoryPointer(
current_context(), devptr, size=size, owner=owner
Expand Down
5 changes: 1 addition & 4 deletions numba_cuda/numba/cuda/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,7 @@ def run_nvdisasm(cubin, flags):
try:
fd, fname = tempfile.mkstemp()
with open(fname, "wb") as f:
if config.CUDA_USE_NVIDIA_BINDING:
f.write(cubin.code)
else:
f.write(cubin)
f.write(cubin.code)

try:
cp = subprocess.run(
Expand Down
Loading