diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index b2b5a2a6..b0434017 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -19,13 +19,11 @@ jobs:
       - compute-matrix
       - build-conda
       - test-conda
-      - test-conda-ctypes-binding
       - test-simulator
       - build-wheels
       - build-wheels-windows
       - test-wheels-windows
       - test-wheels
-      - test-wheels-ctypes-binding
       - test-wheels-deps-wheels
       - test-thirdparty
       - build-docs
@@ -80,18 +78,6 @@ jobs:
       script: "ci/test_conda.sh"
       run_codecov: false
       matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
-  test-conda-ctypes-binding:
-    needs:
-      - build-conda
-      - compute-matrix
-    uses: ./.github/workflows/conda-python-tests.yaml
-    with:
-      build_type: pull-request
-      script: "ci/test_conda_ctypes_binding.sh"
-      run_codecov: false
-      # This selects "ARCH=amd64 and CUDA >=12, with the latest supported Python for each CUDA major version".
-      matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
-      matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber >= 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
   test-simulator:
     needs:
       - build-conda
@@ -124,17 +110,6 @@ jobs:
       build_type: pull-request
       script: "ci/test_wheel.sh false"
       matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
-  test-wheels-ctypes-binding:
-    needs:
-      - build-wheels
-      - compute-matrix
-    uses: ./.github/workflows/wheels-test.yaml
-    with:
-      build_type: pull-request
-      script: "ci/test_wheel_ctypes_binding.sh"
-      # This selects "ARCH=amd64 and CUDA >=12, with the latest supported Python for each CUDA major version".
-      matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
-      matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber >= 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
   test-wheels-deps-wheels:
     needs:
       - build-wheels
diff --git a/ci/test_conda_ctypes_binding.sh b/ci/test_conda_ctypes_binding.sh
deleted file mode 100755
index eaedde07..00000000
--- a/ci/test_conda_ctypes_binding.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/bin/bash
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: BSD-2-Clause
-
-set -euo pipefail
-
-. /opt/conda/etc/profile.d/conda.sh
-
-CTK_PACKAGE_DEPENDENCIES=(
-    "cuda-nvcc-impl"
-    "cuda-nvrtc"
-    "cuda-cuobjdump"
-    "libcurand-dev"
-)
-
-rapids-logger "Install testing dependencies"
-# TODO: Replace with rapids-dependency-file-generator
-DEPENDENCIES=(
-    "c-compiler"
-    "cxx-compiler"
-    "${CTK_PACKAGE_DEPENDENCIES[@]}"
-    "cuda-python"
-    "cuda-version=${CUDA_VER%.*}"
-    "make"
-    "psutil"
-    "pytest"
-    "pytest-xdist"
-    "cffi"
-    "ml_dtypes"
-    "python=${RAPIDS_PY_VERSION}"
-    "numba-cuda"
-)
-rapids-mamba-retry create \
-    -n test \
-    --strict-channel-priority \
-    --channel "`pwd`/conda-repo" \
-    --channel conda-forge \
-    "${DEPENDENCIES[@]}"
-
-# Temporarily allow unbound variables for conda activation.
-set +u
-conda activate test
-set -u
-
-pip install filecheck
-
-rapids-print-env
-
-rapids-logger "Check GPU usage"
-nvidia-smi
-
-rapids-logger "Build test binaries"
-export NUMBA_CUDA_TEST_BIN_DIR=`pwd`/testing
-pushd $NUMBA_CUDA_TEST_BIN_DIR
-make -j $(nproc)
-
-rapids-logger "Show Numba system info"
-python -m numba --sysinfo
-
-EXITCODE=0
-trap "EXITCODE=1" ERR
-set +e
-
-rapids-logger "Run Tests"
-NUMBA_CUDA_USE_NVIDIA_BINDING=0 NUMBA_CUDA_TEST_BIN_DIR=$NUMBA_CUDA_TEST_BIN_DIR pytest -v
-
-popd
-
-rapids-logger "Test script exiting with value: $EXITCODE"
-exit ${EXITCODE}
diff --git a/ci/test_wheel_ctypes_binding.sh b/ci/test_wheel_ctypes_binding.sh
deleted file mode 100755
index e6795c8a..00000000
--- a/ci/test_wheel_ctypes_binding.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: BSD-2-Clause
-
-set -euo pipefail
-
-CUDA_VER_MAJOR=${CUDA_VER%.*.*}
-
-rapids-logger "Install wheel with testing dependencies"
-package=$(realpath wheel/numba_cuda*.whl)
-echo "Package path: $package"
-python -m pip install \
-    "${package}" \
-    cuda-python \
-    --group test
-
-# FIXME: Find a way to build the tests that does not depend on the CUDA Python bindings
-#rapids-logger "Build tests"
-rapids-logger "Copy and cd into test binaries dir"
-export NUMBA_CUDA_TEST_BIN_DIR=`pwd`/testing
-pushd $NUMBA_CUDA_TEST_BIN_DIR
-# make -j $(nproc)
-
-# Prevent the testsuite trying to use the test binaries
-unset NUMBA_CUDA_TEST_BIN_DIR
-
-rapids-logger "Check GPU usage"
-nvidia-smi
-
-rapids-logger "Show Numba system info"
-NUMBA_CUDA_USE_NVIDIA_BINDING=0 python -m numba --sysinfo
-
-rapids-logger "Run Tests"
-# NUMBA_CUDA_USE_NVIDIA_BINDING=0 NUMBA_CUDA_TEST_BIN_DIR=$NUMBA_CUDA_TEST_BIN_DIR python -m pytest --pyargs numba.cuda.tests -v
-NUMBA_CUDA_USE_NVIDIA_BINDING=0 python -m pytest -v
-
-popd
diff --git a/docs/source/reference/envvars.rst b/docs/source/reference/envvars.rst
index 1076764e..1bcee9a7 100644
--- a/docs/source/reference/envvars.rst
+++ b/docs/source/reference/envvars.rst
@@ -103,12 +103,11 @@ target.
    Enable warnings if a kernel is launched with host memory which forces a copy to and
    from the device. This option is on by default (default value is 1).
 
-.. envvar:: NUMBA_CUDA_USE_NVIDIA_BINDING
+.. note::
 
-   When set to 1, Numba will attempt to use the `NVIDIA CUDA Python binding
-   <https://nvidia.github.io/cuda-python/>`_ to make calls to the driver API
-   instead of using its own ctypes binding. This defaults to 1 (on). Set to
-   0 to use the ctypes bindings.
+   Numba-CUDA always uses the NVIDIA CUDA Python bindings. The legacy ctypes
+   bindings and the ``NUMBA_CUDA_USE_NVIDIA_BINDING`` environment variable have
+   been removed.
 
 .. envvar:: NUMBA_CUDA_INCLUDE_PATH
 
diff --git a/docs/source/user/bindings.rst b/docs/source/user/bindings.rst
index d675e4a2..244c0f08 100644
--- a/docs/source/user/bindings.rst
+++ b/docs/source/user/bindings.rst
@@ -5,25 +5,22 @@
 CUDA Bindings
 =============
 
-Numba supports two bindings to the CUDA Driver APIs: its own internal bindings
-based on ctypes, and the official `NVIDIA CUDA Python bindings
-<https://nvidia.github.io/cuda-python/>`_. Functionality is equivalent between
-the two bindings.
-
-The internal bindings are used by default. If the NVIDIA bindings are installed,
-then they can be used by setting the environment variable
-``NUMBA_CUDA_USE_NVIDIA_BINDING`` to ``1`` prior to the import of Numba. Once
-Numba has been imported, the selected binding cannot be changed.
+Numba-CUDA uses the official `NVIDIA CUDA Python bindings
+<https://nvidia.github.io/cuda-python/>`_ for all CUDA Driver interactions.
+Numba-CUDA previously provided its own internal ctypes-based bindings; the
+public APIs exposing those bindings are kept for compatibility, but if you
+need to interact directly with the CUDA Driver or other CUDA libraries we
+recommend using the `cuda-python <https://nvidia.github.io/cuda-python/>`_
+package directly.
 
 
 Per-Thread Default Streams
 --------------------------
 
 Responsibility for handling Per-Thread Default Streams (PTDS) is delegated to
-the NVIDIA bindings when they are in use. To use PTDS with the NVIDIA bindings,
-set the environment variable ``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` to
-``1`` instead of Numba's environmnent variable
-:envvar:`NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM`.
+the NVIDIA bindings. To use PTDS, set the environment variable
+``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` to ``1`` instead of Numba's
+environment variable :envvar:`NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM`.
 
 .. seealso::
 
@@ -35,13 +32,5 @@ set the environment variable ``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` to
 Roadmap
 -------
 
-In Numba 0.56, the NVIDIA Bindings will be used by default, if they are
-installed.
-
-In future versions of Numba:
-
-- The internal bindings will be deprecated.
-- The internal bindings will be removed.
-
-At present, no specific release is planned for the deprecation or removal of
-the internal bindings.
+The ctypes-based internal bindings have been removed in favor of the NVIDIA
+bindings. Future work focuses on expanding usage of ``cuda.core`` APIs.
diff --git a/docs/source/user/installation.rst b/docs/source/user/installation.rst
index 8acfb775..82b3cd21 100644
--- a/docs/source/user/installation.rst
+++ b/docs/source/user/installation.rst
@@ -61,14 +61,12 @@ Configuration
 CUDA Bindings
 -------------
 
-Numba supports interacting with the CUDA Driver API via either the `NVIDIA CUDA
-Python bindings <https://nvidia.github.io/cuda-python/>`_ or its own ctypes-based
-bindings. Functionality is equivalent between the two binding choices. The
-NVIDIA bindings are the default, and the ctypes bindings are now deprecated.
-
-If you do not want to use the NVIDIA bindings, the (deprecated) ctypes bindings
-can be enabled by setting the environment variable
-:envvar:`NUMBA_CUDA_USE_NVIDIA_BINDING` to ``"0"``.
+Numba-CUDA uses the `NVIDIA CUDA Python bindings <https://nvidia.github.io/cuda-python/>`_
+for interacting with the CUDA Driver API. Numba-CUDA previously provided its own
+internal ctypes-based bindings; the public APIs exposing those bindings are kept
+for compatibility, but if you need to interact directly with the CUDA Driver or
+other CUDA libraries we recommend using the `cuda-python <https://nvidia.github.io/cuda-python/cuda-pathfinder/latest/generated/cuda.pathfinder.load_nvidia_dynamic_lib.html>`_
+package directly.
 
 
 .. _cudatoolkit-lookup:
@@ -79,22 +77,8 @@ CUDA Driver and Toolkit search paths
 Default behavior
 ~~~~~~~~~~~~~~~~
 
-When using the NVIDIA bindings, searches for the CUDA driver and toolkit
-libraries use its `built-in path-finding logic <https://github.com/NVIDIA/cuda-python/tree/main/cuda_bindings/cuda/bindings/_path_finder>`_.
-
-Ctypes bindings (deprecated) behavior
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-When using the ctypes bindings, Numba searches for a CUDA toolkit installation
-in the following order:
-
-1. Conda-installed CUDA Toolkit packages
-2. Pip-installed CUDA Toolkit packages
-3. The environment variable ``CUDA_HOME``, which points to the directory of the
-   installed CUDA toolkit (i.e. ``/home/user/cuda-12``)
-4. System-wide installation at exactly ``/usr/local/cuda`` on Linux platforms.
-   Versioned installation paths (i.e. ``/usr/local/cuda-12.0``) are intentionally
-   ignored. Users can use ``CUDA_HOME`` to select specific versions.
+Searches for the CUDA driver and toolkit libraries use the NVIDIA bindings'
+`built-in path-finding logic <https://github.com/NVIDIA/cuda-python/tree/main/cuda_bindings/cuda/bindings/_path_finder>`_.
 
 In addition to the CUDA toolkit libraries, which can be installed by conda into
 an environment or installed system-wide by the `CUDA SDK installer
diff --git a/numba_cuda/numba/cuda/__init__.py b/numba_cuda/numba/cuda/__init__.py
index a27c8fbb..9362985c 100644
--- a/numba_cuda/numba/cuda/__init__.py
+++ b/numba_cuda/numba/cuda/__init__.py
@@ -8,65 +8,15 @@
 import sys
 
 
-# Enable pynvjitlink based on the following precedence:
-# 1. Config setting "CUDA_ENABLE_PYNVJITLINK" (highest priority)
-# 2. Environment variable "NUMBA_CUDA_ENABLE_PYNVJITLINK"
-# 3. Auto-detection of pynvjitlink module (lowest priority)
-
-pynvjitlink_auto_enabled = False
-
-if getattr(config, "CUDA_ENABLE_PYNVJITLINK", None) is None:
-    if (
-        _pynvjitlink_enabled_in_env := _readenv(
-            "NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, None
-        )
-    ) is not None:
-        config.CUDA_ENABLE_PYNVJITLINK = _pynvjitlink_enabled_in_env
-    else:
-        pynvjitlink_auto_enabled = (
-            importlib.util.find_spec("pynvjitlink") is not None
-        )
-        config.CUDA_ENABLE_PYNVJITLINK = pynvjitlink_auto_enabled
-
-# Upstream numba sets CUDA_USE_NVIDIA_BINDING to 0 by default, so it always
-# exists. Override, but not if explicitly set to 0 in the envioronment.
-_nvidia_binding_enabled_in_env = _readenv(
-    "NUMBA_CUDA_USE_NVIDIA_BINDING", bool, None
-)
-if _nvidia_binding_enabled_in_env is False:
-    USE_NV_BINDING = False
-else:
-    USE_NV_BINDING = True
-    config.CUDA_USE_NVIDIA_BINDING = USE_NV_BINDING
-if config.CUDA_USE_NVIDIA_BINDING:
-    if not (
-        importlib.util.find_spec("cuda")
-        and importlib.util.find_spec("cuda.bindings")
-    ):
-        raise ImportError(
-            "CUDA bindings not found. Please pip install the "
-            "cuda-bindings package. Alternatively, install "
-            "numba-cuda[cuXY], where XY is the required CUDA "
-            "version, to install the binding automatically. "
-            "If no CUDA bindings are desired, set the env var "
-            "NUMBA_CUDA_USE_NVIDIA_BINDING=0 to enable ctypes "
-            "bindings."
-        )
-
-if config.CUDA_ENABLE_PYNVJITLINK:
-    if USE_NV_BINDING and not pynvjitlink_auto_enabled:
-        warnings.warn(
-            "Explicitly enabling pynvjitlink is no longer necessary. "
-            "NVIDIA bindings are enabled. cuda.core will be used "
-            "in place of pynvjitlink."
-        )
-    elif pynvjitlink_auto_enabled:
-        # Ignore the fact that pynvjitlink is enabled, because that was an
-        # automatic decision based on discovering pynvjitlink was present; the
-        # user didn't ask for it
-        pass
-    else:
-        raise RuntimeError("nvJitLink requires the NVIDIA CUDA bindings. ")
+# Require NVIDIA CUDA bindings at import time
+if not (
+    importlib.util.find_spec("cuda")
+    and importlib.util.find_spec("cuda.bindings")
+):
+    raise ImportError(
+        "NVIDIA CUDA Python bindings not found. Install the 'cuda' package "
+        "(e.g. pip install nvidia-cuda-python or numba-cuda[cuXY])."
+    )
 
 if config.ENABLE_CUDASIM:
     from .simulator_init import *
diff --git a/numba_cuda/numba/cuda/api.py b/numba_cuda/numba/cuda/api.py
index 9fe8cd7e..dbb2d94c 100644
--- a/numba_cuda/numba/cuda/api.py
+++ b/numba_cuda/numba/cuda/api.py
@@ -8,7 +8,6 @@
 import contextlib
 import os
 
-from numba.cuda.cudadrv import drvapi
 import numpy as np
 
 from .cudadrv import devicearray, devices, driver
@@ -48,10 +47,7 @@ def from_cuda_array_interface(desc, owner=None, sync=True):
     )
     size = driver.memory_size_from_info(shape, strides, dtype.itemsize)
 
-    if config.CUDA_USE_NVIDIA_BINDING:
-        cudevptr_class = driver.binding.CUdeviceptr
-    else:
-        cudevptr_class = drvapi.cu_device_ptr
+    cudevptr_class = driver.binding.CUdeviceptr
     devptr = cudevptr_class(desc["data"][0])
     data = driver.MemoryPointer(
         current_context(), devptr, size=size, owner=owner
diff --git a/numba_cuda/numba/cuda/codegen.py b/numba_cuda/numba/cuda/codegen.py
index c40253c1..3ad26b36 100644
--- a/numba_cuda/numba/cuda/codegen.py
+++ b/numba_cuda/numba/cuda/codegen.py
@@ -26,10 +26,7 @@ def run_nvdisasm(cubin, flags):
     try:
         fd, fname = tempfile.mkstemp()
         with open(fname, "wb") as f:
-            if config.CUDA_USE_NVIDIA_BINDING:
-                f.write(cubin.code)
-            else:
-                f.write(cubin)
+            f.write(cubin.code)
 
         try:
             cp = subprocess.run(
diff --git a/numba_cuda/numba/cuda/cudadrv/driver.py b/numba_cuda/numba/cuda/cudadrv/driver.py
index d0e863be..1337d77e 100644
--- a/numba_cuda/numba/cuda/cudadrv/driver.py
+++ b/numba_cuda/numba/cuda/cudadrv/driver.py
@@ -37,8 +37,6 @@
     c_char_p,
     addressof,
     c_void_p,
-    c_float,
-    c_uint,
     c_uint8,
 )
 import contextlib
@@ -52,26 +50,27 @@
 from numba.cuda import utils, serialize
 from .error import CudaSupportError, CudaDriverError
 from .drvapi import API_PROTOTYPES
-from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj, cu_uuid
+from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj
 from .mappings import FILE_EXTENSION_MAP
 from .linkable_code import LinkableCode, LTOIR, Fatbin, Object
 from numba.cuda.utils import cached_file_read
 from numba.cuda.cudadrv import enums, drvapi, nvrtc
 
-USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
+from cuda.bindings import driver as binding
+from cuda.core.experimental import (
+    Linker,
+    LinkerOptions,
+    ObjectCode,
+)
 
-if USE_NV_BINDING:
-    from cuda.bindings import driver as binding
-    from cuda.core.experimental import (
-        Linker,
-        LinkerOptions,
-        ObjectCode,
-    )
+# For backwards compatibility: indicate that the NVIDIA CUDA Python bindings are
+# in use. Older code checks this flag to branch on binding-specific behavior.
+USE_NV_BINDING = True
 
-    # There is no definition of the default stream in the Nvidia bindings (nor
-    # is there at the C/C++ level), so we define it here so we don't need to
-    # use a magic number 0 in places where we want the default stream.
-    CU_STREAM_DEFAULT = 0
+# There is no definition of the default stream in the Nvidia bindings (nor
+# is there at the C/C++ level), so we define it here so we don't need to
+# use a magic number 0 in places where we want the default stream.
+CU_STREAM_DEFAULT = 0
 
 
 MIN_REQUIRED_CC = (3, 5)
@@ -83,16 +82,6 @@
 _py_decref.argtypes = [ctypes.py_object]
 _py_incref.argtypes = [ctypes.py_object]
 
-USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
-
-if USE_NV_BINDING:
-    from cuda.bindings import driver as binding
-
-    # There is no definition of the default stream in the Nvidia bindings (nor
-    # is there at the C/C++ level), so we define it here so we don't need to
-    # use a magic number 0 in places where we want the default stream.
-    CU_STREAM_DEFAULT = 0
-
 
 def make_logger():
     logger = logging.getLogger(__name__)
@@ -120,20 +109,27 @@ def make_logger():
 
 @functools.cache
 def _have_nvjitlink():
-    if not USE_NV_BINDING:
-        return False
     try:
         from cuda.bindings._internal import nvjitlink as nvjitlink_internal
         from cuda.bindings._internal.utils import NotSupportedError
     except ImportError:
         return False
+
     try:
-        return (
+        if (
             nvjitlink_internal._inspect_function_pointer("__nvJitLinkVersion")
-            != 0
-        )
+            == 0
+        ):
+            return False
+        try:
+            from cuda.bindings import nvjitlink
+
+            if nvjitlink.version() < (12, 3):
+                return False
+        except Exception:
+            return False
+        return True
     except (RuntimeError, NotSupportedError):
-        # no driver
         return False
 
 
@@ -312,10 +308,7 @@ def __getattr__(self, fname):
                 "Error at driver init: \n%s:" % self.initialization_error
             )
 
-        if USE_NV_BINDING:
-            return self._cuda_python_wrap_fn(fname)
-        else:
-            return self._ctypes_wrap_fn(fname)
+        return self._cuda_python_wrap_fn(fname)
 
     def _ctypes_wrap_fn(self, fname, libfn=None):
         # Wrap a CUDA driver function by default
@@ -375,12 +368,8 @@ def safe_cuda_api_call(*args):
 
     def _find_api(self, fname):
         # We use alternatively-named functions for PTDS with the Numba ctypes
-        # binding. For the NVidia binding, it handles linking to the correct
-        # variant.
-        if config.CUDA_PER_THREAD_DEFAULT_STREAM and not USE_NV_BINDING:
-            variants = ("_v2_ptds", "_v2_ptsz", "_ptds", "_ptsz", "_v2", "")
-        else:
-            variants = ("_v2", "")
+        # binding. It handles linking to the correct variant.
+        variants = ("_v2", "")
 
         if fname in ("cuCtxGetDevice", "cuCtxSynchronize"):
             return getattr(self.lib, fname)
@@ -437,12 +426,7 @@ def get_device(self, devnum=0):
         return weakref.proxy(dev)
 
     def get_device_count(self):
-        if USE_NV_BINDING:
-            return self.cuDeviceGetCount()
-
-        count = c_int()
-        self.cuDeviceGetCount(byref(count))
-        return count.value
+        return self.cuDeviceGetCount()
 
     def list_devices(self):
         """Returns a list of active devices"""
@@ -459,11 +443,7 @@ def pop_active_context(self):
         """
         with self.get_active_context() as ac:
             if ac.devnum is not None:
-                if USE_NV_BINDING:
-                    popped = drvapi.cu_context(int(driver.cuCtxPopCurrent()))
-                else:
-                    popped = drvapi.cu_context()
-                    driver.cuCtxPopCurrent(byref(popped))
+                popped = drvapi.cu_context(int(driver.cuCtxPopCurrent()))
                 return popped
 
     def get_active_context(self):
@@ -474,14 +454,8 @@ def get_version(self):
         """
         Returns the CUDA Driver version as a tuple (major, minor).
         """
-        if USE_NV_BINDING:
-            version = driver.cuDriverGetVersion()
-        else:
-            dv = ctypes.c_int(0)
-            driver.cuDriverGetVersion(ctypes.byref(dv))
-            version = dv.value
-
         # The version is encoded as (1000 * major) + (10 * minor)
+        version = driver.cuDriverGetVersion()
         major = version // 1000
         minor = (version - (major * 1000)) // 10
         return (major, minor)
@@ -504,26 +478,16 @@ def __enter__(self):
             hctx, devnum = self._tls_cache.ctx_devnum
         # Not cached. Query the driver API.
         else:
-            if USE_NV_BINDING:
-                hctx = driver.cuCtxGetCurrent()
-                if int(hctx) == 0:
-                    hctx = None
-                else:
-                    hctx = drvapi.cu_context(int(hctx))
+            hctx = driver.cuCtxGetCurrent()
+            if int(hctx) == 0:
+                hctx = None
             else:
-                hctx = drvapi.cu_context(0)
-                driver.cuCtxGetCurrent(byref(hctx))
-                hctx = hctx if hctx.value else None
+                hctx = drvapi.cu_context(int(hctx))
 
             if hctx is None:
                 devnum = None
             else:
-                if USE_NV_BINDING:
-                    devnum = int(driver.cuCtxGetDevice())
-                else:
-                    hdevice = drvapi.cu_device()
-                    driver.cuCtxGetDevice(byref(hdevice))
-                    devnum = hdevice.value
+                devnum = int(driver.cuCtxGetDevice())
 
                 self._tls_cache.ctx_devnum = (hctx, devnum)
                 is_top = True
@@ -582,15 +546,9 @@ def from_identity(self, identity):
             raise RuntimeError(errmsg)
 
     def __init__(self, devnum):
-        if USE_NV_BINDING:
-            result = driver.cuDeviceGet(devnum)
-            self.id = result
-            got_devnum = int(result)
-        else:
-            result = c_int()
-            driver.cuDeviceGet(byref(result), devnum)
-            got_devnum = result.value
-            self.id = got_devnum
+        result = driver.cuDeviceGet(devnum)
+        self.id = result
+        got_devnum = int(result)
 
         msg = f"Driver returned device {got_devnum} instead of {devnum}"
         if devnum != got_devnum:
@@ -606,25 +564,14 @@ def __init__(self, devnum):
 
         # Read name
         bufsz = 128
-
-        if USE_NV_BINDING:
-            buf = driver.cuDeviceGetName(bufsz, self.id)
-            name = buf.split(b"\x00")[0]
-        else:
-            buf = (c_char * bufsz)()
-            driver.cuDeviceGetName(buf, bufsz, self.id)
-            name = buf.value
+        buf = driver.cuDeviceGetName(bufsz, self.id)
+        name = buf.split(b"\x00")[0]
 
         self.name = name
 
         # Read UUID
-        if USE_NV_BINDING:
-            uuid = driver.cuDeviceGetUuid(self.id)
-            uuid_vals = tuple(uuid.bytes)
-        else:
-            uuid = cu_uuid()
-            driver.cuDeviceGetUuid(byref(uuid), self.id)
-            uuid_vals = tuple(bytes(uuid))
+        uuid = driver.cuDeviceGetUuid(self.id)
+        uuid_vals = tuple(uuid.bytes)
 
         b = "%02x"
         b2 = b * 2
@@ -647,20 +594,10 @@ def __repr__(self):
 
     def __getattr__(self, attr):
         """Read attributes lazily"""
-        if USE_NV_BINDING:
-            code = getattr(
-                binding.CUdevice_attribute, f"CU_DEVICE_ATTRIBUTE_{attr}"
-            )
-            value = driver.cuDeviceGetAttribute(code, self.id)
-        else:
-            try:
-                code = DEVICE_ATTRIBUTES[attr]
-            except KeyError:
-                raise AttributeError(attr)
-
-            result = c_int()
-            driver.cuDeviceGetAttribute(byref(result), code, self.id)
-            value = result.value
+        code = getattr(
+            binding.CUdevice_attribute, f"CU_DEVICE_ATTRIBUTE_{attr}"
+        )
+        value = driver.cuDeviceGetAttribute(code, self.id)
 
         setattr(self, attr, value)
         return value
@@ -686,12 +623,8 @@ def get_primary_context(self):
 
         met_requirement_for_device(self)
         # create primary context
-        if USE_NV_BINDING:
-            hctx = driver.cuDevicePrimaryCtxRetain(self.id)
-            hctx = drvapi.cu_context(int(hctx))
-        else:
-            hctx = drvapi.cu_context()
-            driver.cuDevicePrimaryCtxRetain(byref(hctx), self.id)
+        hctx = driver.cuDevicePrimaryCtxRetain(self.id)
+        hctx = drvapi.cu_context(int(hctx))
 
         ctx = Context(weakref.proxy(self), hctx)
         self.primary_context = ctx
@@ -879,11 +812,7 @@ def _attempt_allocation(self, allocator):
             return allocator()
         except CudaAPIError as e:
             # is out-of-memory?
-            if USE_NV_BINDING:
-                oom_code = binding.CUresult.CUDA_ERROR_OUT_OF_MEMORY
-            else:
-                oom_code = enums.CUDA_ERROR_OUT_OF_MEMORY
-
+            oom_code = binding.CUresult.CUDA_ERROR_OUT_OF_MEMORY
             if e.code == oom_code:
                 # clear pending deallocations
                 self.deallocations.clear()
@@ -906,29 +835,15 @@ def memhostalloc(self, size, mapped=False, portable=False, wc=False):
         if wc:
             flags |= enums.CU_MEMHOSTALLOC_WRITECOMBINED
 
-        if USE_NV_BINDING:
-
-            def allocator():
-                return driver.cuMemHostAlloc(size, flags)
-
-            if mapped:
-                pointer = self._attempt_allocation(allocator)
-            else:
-                pointer = allocator()
+        def allocator():
+            return driver.cuMemHostAlloc(size, flags)
 
-            alloc_key = pointer
+        if mapped:
+            pointer = self._attempt_allocation(allocator)
         else:
-            pointer = c_void_p()
+            pointer = allocator()
 
-            def allocator():
-                driver.cuMemHostAlloc(byref(pointer), size, flags)
-
-            if mapped:
-                self._attempt_allocation(allocator)
-            else:
-                allocator()
-
-            alloc_key = pointer.value
+        alloc_key = pointer
 
         finalizer = _hostalloc_finalizer(self, pointer, alloc_key, size, mapped)
         ctx = weakref.proxy(self.context)
@@ -946,13 +861,7 @@ def mempin(self, owner, pointer, size, mapped=False):
         It is recommended that this method is not overridden by EMM Plugin
         implementations - instead, use the :class:`BaseCUDAMemoryManager`.
         """
-        if isinstance(pointer, int) and not USE_NV_BINDING:
-            pointer = c_void_p(pointer)
-
-        if USE_NV_BINDING:
-            alloc_key = pointer
-        else:
-            alloc_key = pointer.value
+        alloc_key = pointer
 
         # possible flags are "portable" (between context)
         # and "device-map" (map host memory to device thus no need
@@ -985,37 +894,19 @@ def allocator():
             )
 
     def memallocmanaged(self, size, attach_global):
-        if USE_NV_BINDING:
-
-            def allocator():
-                ma_flags = binding.CUmemAttach_flags
-
-                if attach_global:
-                    flags = ma_flags.CU_MEM_ATTACH_GLOBAL.value
-                else:
-                    flags = ma_flags.CU_MEM_ATTACH_HOST.value
-
-                return driver.cuMemAllocManaged(size, flags)
-
-            ptr = self._attempt_allocation(allocator)
-
-            alloc_key = ptr
-
-        else:
-            ptr = drvapi.cu_device_ptr()
+        def allocator():
+            ma_flags = binding.CUmemAttach_flags
 
-            def allocator():
-                flags = c_uint()
-                if attach_global:
-                    flags = enums.CU_MEM_ATTACH_GLOBAL
-                else:
-                    flags = enums.CU_MEM_ATTACH_HOST
+            if attach_global:
+                flags = ma_flags.CU_MEM_ATTACH_GLOBAL.value
+            else:
+                flags = ma_flags.CU_MEM_ATTACH_HOST.value
 
-                driver.cuMemAllocManaged(byref(ptr), size, flags)
+            return driver.cuMemAllocManaged(size, flags)
 
-            self._attempt_allocation(allocator)
+        ptr = self._attempt_allocation(allocator)
 
-            alloc_key = ptr.value
+        alloc_key = ptr
 
         finalizer = _alloc_finalizer(self, ptr, alloc_key, size)
         ctx = weakref.proxy(self.context)
@@ -1055,13 +946,8 @@ def get_ipc_handle(self, memory):
         populated with the underlying ``ipc_mem_handle``.
         """
         base, end = device_extents(memory)
-        if USE_NV_BINDING:
-            ipchandle = driver.cuIpcGetMemHandle(base)
-            offset = int(memory.handle) - int(base)
-        else:
-            ipchandle = drvapi.cu_ipc_mem_handle()
-            driver.cuIpcGetMemHandle(byref(ipchandle), base)
-            offset = memory.handle.value - base
+        ipchandle = driver.cuIpcGetMemHandle(base)
+        offset = int(memory.handle) - int(base)
         source_info = self.context.device.get_device_identity()
 
         return IpcHandle(
@@ -1080,21 +966,11 @@ def initialize(self):
             self.deallocations.memory_capacity = self.get_memory_info().total
 
     def memalloc(self, size):
-        if USE_NV_BINDING:
-
-            def allocator():
-                return driver.cuMemAlloc(size)
-
-            ptr = self._attempt_allocation(allocator)
-            alloc_key = ptr
-        else:
-            ptr = drvapi.cu_device_ptr()
-
-            def allocator():
-                driver.cuMemAlloc(byref(ptr), size)
+        def allocator():
+            return driver.cuMemAlloc(size)
 
-            self._attempt_allocation(allocator)
-            alloc_key = ptr.value
+        ptr = self._attempt_allocation(allocator)
+        alloc_key = ptr
 
         finalizer = _alloc_finalizer(self, ptr, alloc_key, size)
         ctx = weakref.proxy(self.context)
@@ -1103,15 +979,7 @@ def allocator():
         return mem.own()
 
     def get_memory_info(self):
-        if USE_NV_BINDING:
-            free, total = driver.cuMemGetInfo()
-        else:
-            free = c_size_t()
-            total = c_size_t()
-            driver.cuMemGetInfo(byref(free), byref(total))
-            free = free.value
-            total = total.value
-
+        free, total = driver.cuMemGetInfo()
         return MemoryInfo(free=free, total=total)
 
     @property
@@ -1309,10 +1177,7 @@ def get_active_blocks_per_multiprocessor(
         :param memsize: per-block dynamic shared memory usage intended, in bytes
         """
         args = (func, blocksize, memsize, flags)
-        if USE_NV_BINDING:
-            return self._cuda_python_active_blocks_per_multiprocessor(*args)
-        else:
-            return self._ctypes_active_blocks_per_multiprocessor(*args)
+        return self._cuda_python_active_blocks_per_multiprocessor(*args)
 
     def _cuda_python_active_blocks_per_multiprocessor(
         self, func, blocksize, memsize, flags
@@ -1352,10 +1217,7 @@ def get_max_potential_block_size(
                                handle
         """
         args = (func, b2d_func, memsize, blocksizelimit, flags)
-        if USE_NV_BINDING:
-            return self._cuda_python_max_potential_block_size(*args)
-        else:
-            return self._ctypes_max_potential_block_size(*args)
+        return self._cuda_python_max_potential_block_size(*args)
 
     def _ctypes_max_potential_block_size(
         self, func, b2d_func, memsize, blocksizelimit, flags
@@ -1404,10 +1266,7 @@ def push(self):
         """
         Pushes this context on the current CPU Thread.
         """
-        if USE_NV_BINDING:
-            driver.cuCtxPushCurrent(self.handle.value)
-        else:
-            driver.cuCtxPushCurrent(self.handle)
+        driver.cuCtxPushCurrent(self.handle.value)
         self.prepare_for_use()
 
     def pop(self):
@@ -1443,11 +1302,7 @@ def get_ipc_handle(self, memory):
     def open_ipc_handle(self, handle, size):
         # open the IPC handle to get the device pointer
         flags = 1  # CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS
-        if USE_NV_BINDING:
-            dptr = driver.cuIpcOpenMemHandle(handle, flags)
-        else:
-            dptr = drvapi.cu_device_ptr()
-            driver.cuIpcOpenMemHandle(byref(dptr), handle, flags)
+        dptr = driver.cuIpcOpenMemHandle(handle, flags)
 
         # wrap it
         return MemoryPointer(
@@ -1463,28 +1318,17 @@ def can_access_peer(self, peer_device):
         """Returns a bool indicating whether the peer access between the
         current and peer device is possible.
         """
-        if USE_NV_BINDING:
-            peer_device = binding.CUdevice(peer_device)
-            can_access_peer = driver.cuDeviceCanAccessPeer(
-                self.device.id, peer_device
-            )
-        else:
-            can_access_peer = c_int()
-            driver.cuDeviceCanAccessPeer(
-                byref(can_access_peer),
-                self.device.id,
-                peer_device,
-            )
+        peer_device = binding.CUdevice(peer_device)
+        can_access_peer = driver.cuDeviceCanAccessPeer(
+            self.device.id, peer_device
+        )
 
         return bool(can_access_peer)
 
     def create_module_ptx(self, ptx):
         if isinstance(ptx, str):
             ptx = ptx.encode("utf8")
-        if USE_NV_BINDING:
-            image = ObjectCode.from_ptx(ptx)
-        else:
-            image = c_char_p(ptx)
+        image = ObjectCode.from_ptx(ptx)
         return self.create_module_image(image)
 
     def create_module_image(
@@ -1493,56 +1337,37 @@ def create_module_image(
         module = load_module_image(
             self, image, setup_callbacks, teardown_callbacks
         )
-        if USE_NV_BINDING:
-            key = module.handle
-        else:
-            key = module.handle.value
+        key = module.handle
         self.modules[key] = module
         return weakref.proxy(module)
 
     def unload_module(self, module):
-        if USE_NV_BINDING:
-            key = module.handle
-        else:
-            key = module.handle.value
+        key = module.handle
         del self.modules[key]
 
     def get_default_stream(self):
-        if USE_NV_BINDING:
-            handle = drvapi.cu_stream(int(binding.CUstream(CU_STREAM_DEFAULT)))
-        else:
-            handle = drvapi.cu_stream(drvapi.CU_STREAM_DEFAULT)
+        handle = drvapi.cu_stream(int(binding.CUstream(CU_STREAM_DEFAULT)))
         return Stream(weakref.proxy(self), handle, None)
 
     def get_legacy_default_stream(self):
-        if USE_NV_BINDING:
-            handle = drvapi.cu_stream(
-                int(binding.CUstream(binding.CU_STREAM_LEGACY))
-            )
-        else:
-            handle = drvapi.cu_stream(drvapi.CU_STREAM_LEGACY)
+        handle = drvapi.cu_stream(
+            int(binding.CUstream(binding.CU_STREAM_LEGACY))
+        )
         return Stream(weakref.proxy(self), handle, None)
 
     def get_per_thread_default_stream(self):
-        if USE_NV_BINDING:
-            handle = drvapi.cu_stream(
-                int(binding.CUstream(binding.CU_STREAM_PER_THREAD))
-            )
-        else:
-            handle = drvapi.cu_stream(drvapi.CU_STREAM_PER_THREAD)
+        handle = drvapi.cu_stream(
+            int(binding.CUstream(binding.CU_STREAM_PER_THREAD))
+        )
         return Stream(weakref.proxy(self), handle, None)
 
     def create_stream(self):
-        if USE_NV_BINDING:
-            # The default stream creation flag, specifying that the created
-            # stream synchronizes with stream 0 (this is different from the
-            # default stream, which we define also as CU_STREAM_DEFAULT when
-            # the NV binding is in use).
-            flags = binding.CUstream_flags.CU_STREAM_DEFAULT.value
-            handle = drvapi.cu_stream(int(driver.cuStreamCreate(flags)))
-        else:
-            handle = drvapi.cu_stream()
-            driver.cuStreamCreate(byref(handle), 0)
+        # The default stream creation flag, specifying that the created
+        # stream synchronizes with stream 0 (this is different from the
+        # default stream, which we define also as CU_STREAM_DEFAULT when
+        # the NV binding is in use).
+        flags = binding.CUstream_flags.CU_STREAM_DEFAULT.value
+        handle = drvapi.cu_stream(int(driver.cuStreamCreate(flags)))
         return Stream(
             weakref.proxy(self),
             handle,
@@ -1552,21 +1377,14 @@ def create_stream(self):
     def create_external_stream(self, ptr):
         if not isinstance(ptr, int):
             raise TypeError("ptr for external stream must be an int")
-        if USE_NV_BINDING:
-            handle = drvapi.cu_stream(int(binding.CUstream(ptr)))
-        else:
-            handle = drvapi.cu_stream(ptr)
+        handle = drvapi.cu_stream(int(binding.CUstream(ptr)))
         return Stream(weakref.proxy(self), handle, None, external=True)
 
     def create_event(self, timing=True):
         flags = 0
         if not timing:
             flags |= enums.CU_EVENT_DISABLE_TIMING
-        if USE_NV_BINDING:
-            handle = drvapi.cu_event(int(driver.cuEventCreate(flags)))
-        else:
-            handle = drvapi.cu_event()
-            driver.cuEventCreate(byref(handle), flags)
+        handle = drvapi.cu_event(int(driver.cuEventCreate(flags)))
         return Event(
             weakref.proxy(self),
             handle,
@@ -1601,14 +1419,9 @@ def load_module_image(
     """
     image must be a pointer
     """
-    if USE_NV_BINDING:
-        return load_module_image_cuda_python(
-            context, image, setup_callbacks, teardown_callbacks
-        )
-    else:
-        return load_module_image_ctypes(
-            context, image, setup_callbacks, teardown_callbacks
-        )
+    return load_module_image_cuda_python(
+        context, image, setup_callbacks, teardown_callbacks
+    )
 
 
 def load_module_image_ctypes(
@@ -1769,11 +1582,7 @@ def core():
 def _module_finalizer(context, handle):
     dealloc = context.deallocations
     modules = context.modules
-
-    if USE_NV_BINDING:
-        key = handle
-    else:
-        key = handle.value
+    key = handle
 
     def core():
         shutting_down = utils.shutting_down  # early bind
@@ -1843,10 +1652,7 @@ def open(self, context):
         from numba import cuda
 
         srcdev = Device.from_identity(self.source_info)
-        if USE_NV_BINDING:
-            srcdev_id = int(srcdev.id)
-        else:
-            srcdev_id = srcdev.id
+        srcdev_id = int(srcdev.id)
 
         impl = _CudaIpcImpl(parent=self.parent)
         # Open context on the source device.
@@ -1968,10 +1774,7 @@ def close(self):
 
     def __reduce__(self):
         # Preprocess the IPC handle, which is defined as a byte array.
-        if USE_NV_BINDING:
-            preprocessed_handle = self.handle.reserved
-        else:
-            preprocessed_handle = tuple(self.handle.reserved)
+        preprocessed_handle = self.handle.reserved
         args = (
             self.__class__,
             preprocessed_handle,
@@ -1983,10 +1786,7 @@ def __reduce__(self):
 
     @classmethod
     def _rebuild(cls, handle_ary, size, source_info, offset):
-        if USE_NV_BINDING:
-            handle = binding.CUipcMemHandle()
-        else:
-            handle = drvapi.cu_ipc_mem_handle()
+        handle = binding.CUipcMemHandle()
         handle.reserved = handle_ary
         return cls(
             base=None,
@@ -2032,7 +1832,7 @@ class MemoryPointer(object):
     __cuda_memory__ = True
 
     def __init__(self, context, pointer, size, owner=None, finalizer=None):
-        if USE_NV_BINDING and isinstance(pointer, ctypes.c_void_p):
+        if isinstance(pointer, ctypes.c_void_p):
             pointer = binding.CUdeviceptr(pointer.value)
 
         self.context = context
@@ -2067,10 +1867,7 @@ def free(self):
     def memset(self, byte, count=None, stream=0):
         count = self.size if count is None else count
         if stream:
-            if USE_NV_BINDING:
-                handle = stream.handle.value
-            else:
-                handle = stream.handle
+            handle = stream.handle.value
             driver.cuMemsetD8Async(self.device_pointer, byte, count, handle)
         else:
             driver.cuMemsetD8(self.device_pointer, byte, count)
@@ -2091,12 +1888,9 @@ def view(self, start, stop=None):
             base = self.device_pointer_value + start
             if size < 0:
                 raise RuntimeError("size cannot be negative")
-            if USE_NV_BINDING:
-                pointer = binding.CUdeviceptr()
-                ctypes_ptr = drvapi.cu_device_ptr.from_address(pointer.getPtr())
-                ctypes_ptr.value = base
-            else:
-                pointer = drvapi.cu_device_ptr(base)
+            pointer = binding.CUdeviceptr()
+            ctypes_ptr = drvapi.cu_device_ptr.from_address(pointer.getPtr())
+            ctypes_ptr.value = base
             view = MemoryPointer(self.context, pointer, size, owner=self.owner)
 
         if isinstance(self.owner, (MemoryPointer, OwnedPointer)):
@@ -2108,16 +1902,11 @@ def view(self, start, stop=None):
 
     @property
     def device_ctypes_pointer(self):
-        if USE_NV_BINDING:
-            return drvapi.cu_device_ptr(int(self.device_pointer))
-        return self.device_pointer
+        return drvapi.cu_device_ptr(int(self.device_pointer))
 
     @property
     def device_pointer_value(self):
-        if USE_NV_BINDING:
-            return int(self.device_pointer) or None
-        else:
-            return self.device_pointer.value
+        return int(self.device_pointer) or None
 
 
 class AutoFreePointer(MemoryPointer):
@@ -2162,13 +1951,8 @@ def __init__(self, context, pointer, size, owner=None, finalizer=None):
         self.owned = owner
         self.host_pointer = pointer
 
-        if USE_NV_BINDING:
-            devptr = driver.cuMemHostGetDevicePointer(pointer, 0)
-            self._bufptr_ = self.host_pointer
-        else:
-            devptr = drvapi.cu_device_ptr()
-            driver.cuMemHostGetDevicePointer(byref(devptr), pointer, 0)
-            self._bufptr_ = self.host_pointer.value
+        devptr = driver.cuMemHostGetDevicePointer(pointer, 0)
+        self._bufptr_ = self.host_pointer
 
         self.device_pointer = devptr
         super(MappedMemory, self).__init__(
@@ -2212,10 +1996,7 @@ def __init__(self, context, pointer, size, owner=None, finalizer=None):
 
         # For buffer interface
         self._buflen_ = self.size
-        if USE_NV_BINDING:
-            self._bufptr_ = self.host_pointer
-        else:
-            self._bufptr_ = self.host_pointer.value
+        self._bufptr_ = self.host_pointer
 
         if finalizer is not None:
             weakref.finalize(self, finalizer)
@@ -2253,10 +2034,7 @@ def __init__(self, context, pointer, size, owner=None, finalizer=None):
 
         # For buffer interface
         self._buflen_ = self.size
-        if USE_NV_BINDING:
-            self._bufptr_ = self.device_pointer
-        else:
-            self._bufptr_ = self.device_pointer.value
+        self._bufptr_ = self.device_pointer
 
     def own(self):
         return ManagedOwnedPointer(weakref.proxy(self))
@@ -2332,10 +2110,7 @@ def synchronize(self):
         Wait for all commands in this stream to execute. This will commit any
         pending memory transfers.
         """
-        if USE_NV_BINDING:
-            handle = self.handle.value
-        else:
-            handle = self.handle
+        handle = self.handle.value
         driver.cuStreamSynchronize(handle)
 
     @contextlib.contextmanager
@@ -2379,15 +2154,11 @@ def add_callback(self, callback, arg=None):
         """
         data = (self, callback, arg)
         _py_incref(data)
-        if USE_NV_BINDING:
-            ptr = int.from_bytes(self._stream_callback, byteorder="little")
-            stream_callback = binding.CUstreamCallback(ptr)
-            # The callback needs to receive a pointer to the data PyObject
-            data = id(data)
-            handle = self.handle.value
-        else:
-            stream_callback = self._stream_callback
-            handle = self.handle
+        ptr = int.from_bytes(self._stream_callback, byteorder="little")
+        stream_callback = binding.CUstreamCallback(ptr)
+        # The callback needs to receive a pointer to the data PyObject
+        data = id(data)
+        handle = self.handle.value
         driver.cuStreamAddCallback(handle, stream_callback, data, 0)
 
     @staticmethod
@@ -2465,34 +2236,23 @@ def record(self, stream=0):
         queued in the stream at the time of the call to ``record()`` has been
         completed.
         """
-        if USE_NV_BINDING:
-            hstream = stream.handle.value if stream else binding.CUstream(0)
-            handle = self.handle.value
-        else:
-            hstream = stream.handle if stream else 0
-            handle = self.handle
+        hstream = stream.handle.value if stream else binding.CUstream(0)
+        handle = self.handle.value
         driver.cuEventRecord(handle, hstream)
 
     def synchronize(self):
         """
         Synchronize the host thread for the completion of the event.
         """
-        if USE_NV_BINDING:
-            handle = self.handle.value
-        else:
-            handle = self.handle
+        handle = self.handle.value
         driver.cuEventSynchronize(handle)
 
     def wait(self, stream=0):
         """
         All future works submitted to stream will wait util the event completes.
         """
-        if USE_NV_BINDING:
-            hstream = stream.handle.value if stream else binding.CUstream(0)
-            handle = self.handle.value
-        else:
-            hstream = stream.handle if stream else 0
-            handle = self.handle
+        hstream = stream.handle.value if stream else binding.CUstream(0)
+        handle = self.handle.value
         flags = 0
         driver.cuStreamWaitEvent(hstream, handle, flags)
 
@@ -2504,14 +2264,7 @@ def event_elapsed_time(evtstart, evtend):
     """
     Compute the elapsed time between two events in milliseconds.
     """
-    if USE_NV_BINDING:
-        return driver.cuEventElapsedTime(
-            evtstart.handle.value, evtend.handle.value
-        )
-    else:
-        msec = c_float()
-        driver.cuEventElapsedTime(byref(msec), evtstart.handle, evtend.handle)
-        return msec.value
+    return driver.cuEventElapsedTime(evtstart.handle.value, evtend.handle.value)
 
 
 class Module(metaclass=ABCMeta):
@@ -2729,12 +2482,8 @@ def launch_kernel(
     param_ptrs = [addressof(arg) for arg in args]
     params = (c_void_p * len(param_ptrs))(*param_ptrs)
 
-    if USE_NV_BINDING:
-        params_for_launch = addressof(params)
-        extra = 0
-    else:
-        params_for_launch = params
-        extra = None
+    params_for_launch = addressof(params)
+    extra = 0
 
     if cooperative:
         driver.cuLaunchCooperativeKernel(
@@ -2777,10 +2526,7 @@ def new(
         lto=None,
         additional_flags=None,
     ):
-        if USE_NV_BINDING:
-            linker = _Linker
-        else:
-            linker = CtypesLinker
+        linker = _Linker
 
         params = (max_registers, lineinfo, cc)
         if linker is _Linker:
@@ -3217,21 +2963,12 @@ def get_devptr_for_active_ctx(ptr):
     pointer.
     """
     if ptr != 0:
-        if USE_NV_BINDING:
-            ptr_attrs = binding.CUpointer_attribute
-            attr = ptr_attrs.CU_POINTER_ATTRIBUTE_DEVICE_POINTER
-            ptrobj = binding.CUdeviceptr(ptr)
-            return driver.cuPointerGetAttribute(attr, ptrobj)
-        else:
-            devptr = drvapi.cu_device_ptr()
-            attr = enums.CU_POINTER_ATTRIBUTE_DEVICE_POINTER
-            driver.cuPointerGetAttribute(byref(devptr), attr, ptr)
-            return devptr
+        ptr_attrs = binding.CUpointer_attribute
+        attr = ptr_attrs.CU_POINTER_ATTRIBUTE_DEVICE_POINTER
+        ptrobj = binding.CUdeviceptr(ptr)
+        return driver.cuPointerGetAttribute(attr, ptrobj)
     else:
-        if USE_NV_BINDING:
-            return binding.CUdeviceptr()
-        else:
-            return drvapi.cu_device_ptr()
+        return binding.CUdeviceptr()
 
 
 def device_extents(devmem):
@@ -3242,15 +2979,8 @@ def device_extents(devmem):
     of the device memory view that can be a subsection of the entire allocation.
     """
     devptr = device_ctypes_pointer(devmem)
-    if USE_NV_BINDING:
-        s, n = driver.cuMemGetAddressRange(devptr.value)
-        return int(s), int(binding.CUdeviceptr(int(s) + n))
-    else:
-        s = drvapi.cu_device_ptr()
-        n = c_size_t()
-        driver.cuMemGetAddressRange(byref(s), byref(n), devptr)
-        s, n = s.value, n.value
-        return s, s + n
+    s, n = driver.cuMemGetAddressRange(devptr.value)
+    return int(s), int(binding.CUdeviceptr(int(s) + n))
 
 
 def device_memory_size(devmem):
@@ -3376,10 +3106,7 @@ def host_to_device(dst, src, size, stream=0):
     if stream:
         assert isinstance(stream, Stream)
         fn = driver.cuMemcpyHtoDAsync
-        if USE_NV_BINDING:
-            handle = stream.handle.value
-        else:
-            handle = stream.handle
+        handle = stream.handle.value
         varargs.append(handle)
     else:
         fn = driver.cuMemcpyHtoD
@@ -3398,10 +3125,7 @@ def device_to_host(dst, src, size, stream=0):
     if stream:
         assert isinstance(stream, Stream)
         fn = driver.cuMemcpyDtoHAsync
-        if USE_NV_BINDING:
-            handle = stream.handle.value
-        else:
-            handle = stream.handle
+        handle = stream.handle.value
         varargs.append(handle)
     else:
         fn = driver.cuMemcpyDtoH
@@ -3420,10 +3144,7 @@ def device_to_device(dst, src, size, stream=0):
     if stream:
         assert isinstance(stream, Stream)
         fn = driver.cuMemcpyDtoDAsync
-        if USE_NV_BINDING:
-            handle = stream.handle.value
-        else:
-            handle = stream.handle
+        handle = stream.handle.value
         varargs.append(handle)
     else:
         fn = driver.cuMemcpyDtoD
@@ -3447,10 +3168,7 @@ def device_memset(dst, val, size, stream=0):
     if stream:
         assert isinstance(stream, Stream)
         fn = driver.cuMemsetD8Async
-        if USE_NV_BINDING:
-            handle = stream.handle.value
-        else:
-            handle = stream.handle
+        handle = stream.handle.value
         varargs.append(handle)
     else:
         fn = driver.cuMemsetD8
diff --git a/numba_cuda/numba/cuda/cudadrv/mappings.py b/numba_cuda/numba/cuda/cudadrv/mappings.py
index 5c45299d..d74fe6e4 100644
--- a/numba_cuda/numba/cuda/cudadrv/mappings.py
+++ b/numba_cuda/numba/cuda/cudadrv/mappings.py
@@ -1,28 +1,14 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: BSD-2-Clause
 
-from numba.cuda import config
-from . import enums
+from cuda.bindings.driver import CUjitInputType
 
-if config.CUDA_USE_NVIDIA_BINDING:
-    from cuda.bindings.driver import CUjitInputType
-
-    FILE_EXTENSION_MAP = {
-        "o": CUjitInputType.CU_JIT_INPUT_OBJECT,
-        "ptx": CUjitInputType.CU_JIT_INPUT_PTX,
-        "a": CUjitInputType.CU_JIT_INPUT_LIBRARY,
-        "lib": CUjitInputType.CU_JIT_INPUT_LIBRARY,
-        "cubin": CUjitInputType.CU_JIT_INPUT_CUBIN,
-        "fatbin": CUjitInputType.CU_JIT_INPUT_FATBINARY,
-        "ltoir": CUjitInputType.CU_JIT_INPUT_NVVM,
-    }
-else:
-    FILE_EXTENSION_MAP = {
-        "o": enums.CU_JIT_INPUT_OBJECT,
-        "ptx": enums.CU_JIT_INPUT_PTX,
-        "a": enums.CU_JIT_INPUT_LIBRARY,
-        "lib": enums.CU_JIT_INPUT_LIBRARY,
-        "cubin": enums.CU_JIT_INPUT_CUBIN,
-        "fatbin": enums.CU_JIT_INPUT_FATBINARY,
-        "ltoir": enums.CU_JIT_INPUT_NVVM,
-    }
+FILE_EXTENSION_MAP = {
+    "o": CUjitInputType.CU_JIT_INPUT_OBJECT,
+    "ptx": CUjitInputType.CU_JIT_INPUT_PTX,
+    "a": CUjitInputType.CU_JIT_INPUT_LIBRARY,
+    "lib": CUjitInputType.CU_JIT_INPUT_LIBRARY,
+    "cubin": CUjitInputType.CU_JIT_INPUT_CUBIN,
+    "fatbin": CUjitInputType.CU_JIT_INPUT_FATBINARY,
+    "ltoir": CUjitInputType.CU_JIT_INPUT_NVVM,
+}
diff --git a/numba_cuda/numba/cuda/cudadrv/nvrtc.py b/numba_cuda/numba/cuda/cudadrv/nvrtc.py
index cef0b576..8d861fcc 100644
--- a/numba_cuda/numba/cuda/cudadrv/nvrtc.py
+++ b/numba_cuda/numba/cuda/cudadrv/nvrtc.py
@@ -1,23 +1,19 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: BSD-2-Clause
 
-from ctypes import byref, c_char, c_char_p, c_int, c_size_t, c_void_p, POINTER
-from enum import IntEnum
 from numba.cuda.cudadrv.error import (
     CCSupportError,
-    NvrtcError,
-    NvrtcBuiltinOperationFailure,
-    NvrtcCompilationError,
-    NvrtcSupportError,
 )
 from numba.cuda import config
 from numba.cuda.cuda_paths import get_cuda_paths
 from numba.cuda.utils import _readenv
 
-import functools
 import os
-import threading
 import warnings
+import functools
+
+from cuda.core.experimental import Program, ProgramOptions
+from cuda.bindings import nvrtc as bindings_nvrtc
 
 NVRTC_EXTRA_SEARCH_PATHS = _readenv(
     "NUMBA_CUDA_NVRTC_EXTRA_SEARCH_PATHS", str, ""
@@ -25,268 +21,13 @@
 if not hasattr(config, "CUDA_NVRTC_EXTRA_SEARCH_PATHS"):
     config.CUDA_NVRTC_EXTRA_SEARCH_PATHS = NVRTC_EXTRA_SEARCH_PATHS
 
-# Opaque handle for compilation unit
-nvrtc_program = c_void_p
-
-# Result code
-nvrtc_result = c_int
-
-if config.CUDA_USE_NVIDIA_BINDING:
-    from cuda.bindings import nvrtc as bindings_nvrtc
-    from cuda.core.experimental import Program, ProgramOptions
-
-
-class NvrtcResult(IntEnum):
-    NVRTC_SUCCESS = 0
-    NVRTC_ERROR_OUT_OF_MEMORY = 1
-    NVRTC_ERROR_PROGRAM_CREATION_FAILURE = 2
-    NVRTC_ERROR_INVALID_INPUT = 3
-    NVRTC_ERROR_INVALID_PROGRAM = 4
-    NVRTC_ERROR_INVALID_OPTION = 5
-    NVRTC_ERROR_COMPILATION = 6
-    NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7
-    NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8
-    NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9
-    NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10
-    NVRTC_ERROR_INTERNAL_ERROR = 11
-
-
-_nvrtc_lock = threading.Lock()
 
-
-class NvrtcProgram:
-    """
-    A class for managing the lifetime of nvrtcProgram instances. Instances of
-    the class own an nvrtcProgram; when an instance is deleted, the underlying
-    nvrtcProgram is destroyed using the appropriate NVRTC API.
-    """
-
-    def __init__(self, nvrtc, handle):
-        self._nvrtc = nvrtc
-        self._handle = handle
-
-    @property
-    def handle(self):
-        return self._handle
-
-    def __del__(self):
-        if self._handle:
-            self._nvrtc.destroy_program(self)
-
-
-class NVRTC:
-    """
-    Provides a Pythonic interface to the NVRTC APIs, abstracting away the C API
-    calls.
-
-    The sole instance of this class is a process-wide singleton, similar to the
-    NVVM interface. Initialization is protected by a lock and uses the standard
-    (for Numba) open_cudalib function to load the NVRTC library.
-    """
-
-    _PROTOTYPES = {
-        # nvrtcResult nvrtcVersion(int *major, int *minor)
-        "nvrtcVersion": (nvrtc_result, POINTER(c_int), POINTER(c_int)),
-        # nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog,
-        #                                const char *src,
-        #                                const char *name,
-        #                                int numHeaders,
-        #                                const char * const *headers,
-        #                                const char * const *includeNames)
-        "nvrtcCreateProgram": (
-            nvrtc_result,
-            nvrtc_program,
-            c_char_p,
-            c_char_p,
-            c_int,
-            POINTER(c_char_p),
-            POINTER(c_char_p),
-        ),
-        # nvrtcResult nvrtcDestroyProgram(nvrtcProgram *prog);
-        "nvrtcDestroyProgram": (nvrtc_result, POINTER(nvrtc_program)),
-        # nvrtcResult nvrtcCompileProgram(nvrtcProgram prog,
-        #                                 int numOptions,
-        #                                 const char * const *options)
-        "nvrtcCompileProgram": (
-            nvrtc_result,
-            nvrtc_program,
-            c_int,
-            POINTER(c_char_p),
-        ),
-        # nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet);
-        "nvrtcGetPTXSize": (nvrtc_result, nvrtc_program, POINTER(c_size_t)),
-        # nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx);
-        "nvrtcGetPTX": (nvrtc_result, nvrtc_program, c_char_p),
-        # nvrtcResult nvrtcGetCUBINSize(nvrtcProgram prog,
-        #                               size_t *cubinSizeRet);
-        "nvrtcGetCUBINSize": (nvrtc_result, nvrtc_program, POINTER(c_size_t)),
-        # nvrtcResult nvrtcGetCUBIN(nvrtcProgram prog, char *cubin);
-        "nvrtcGetCUBIN": (nvrtc_result, nvrtc_program, c_char_p),
-        # nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog,
-        #                                    size_t *logSizeRet);
-        "nvrtcGetProgramLogSize": (
-            nvrtc_result,
-            nvrtc_program,
-            POINTER(c_size_t),
-        ),
-        # nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
-        "nvrtcGetProgramLog": (nvrtc_result, nvrtc_program, c_char_p),
-        # nvrtcResult nvrtcGetNumSupportedArchs(int *numArchs);
-        "nvrtcGetNumSupportedArchs": (nvrtc_result, POINTER(c_int)),
-        # nvrtcResult nvrtcGetSupportedArchs(int *supportedArchs);
-        "nvrtcGetSupportedArchs": (nvrtc_result, POINTER(c_int)),
-        # nvrtcResult nvrtcGetLTOIRSize(nvrtcProgram prog, size_t *ltoSizeRet);
-        "nvrtcGetLTOIRSize": (nvrtc_result, nvrtc_program, POINTER(c_size_t)),
-        # nvrtcResult nvrtcGetLTOIR(nvrtcProgram prog, char *lto);
-        "nvrtcGetLTOIR": (nvrtc_result, nvrtc_program, c_char_p),
-    }
-
-    # Singleton reference
-    __INSTANCE = None
-
-    def __new__(cls):
-        with _nvrtc_lock:
-            if config.CUDA_USE_NVIDIA_BINDING:
-                raise RuntimeError(
-                    "NVRTC objects should not be used with cuda-python bindings"
-                )
-            if cls.__INSTANCE is None:
-                from numba.cuda.cudadrv.libs import open_cudalib
-
-                cls.__INSTANCE = inst = object.__new__(cls)
-                try:
-                    lib = open_cudalib("nvrtc")
-                except OSError as e:
-                    cls.__INSTANCE = None
-                    raise NvrtcSupportError("NVRTC cannot be loaded") from e
-
-                # Find & populate functions
-                for name, proto in inst._PROTOTYPES.items():
-                    func = getattr(lib, name)
-                    func.restype = proto[0]
-                    func.argtypes = proto[1:]
-
-                    @functools.wraps(func)
-                    def checked_call(*args, func=func, name=name):
-                        error = func(*args)
-                        if error == NvrtcResult.NVRTC_ERROR_COMPILATION:
-                            raise NvrtcCompilationError()
-                        elif (
-                            error
-                            == NvrtcResult.NVRTC_ERROR_BUILTIN_OPERATION_FAILURE
-                        ):
-                            raise NvrtcBuiltinOperationFailure()
-                        elif error != NvrtcResult.NVRTC_SUCCESS:
-                            try:
-                                error_name = NvrtcResult(error).name
-                            except ValueError:
-                                error_name = (
-                                    "Unknown nvrtc_result "
-                                    f"(error code: {error})"
-                                )
-                            msg = f"Failed to call {name}: {error_name}"
-                            raise NvrtcError(msg)
-
-                    setattr(inst, name, checked_call)
-
-        return cls.__INSTANCE
-
-    @functools.cache
-    def get_supported_archs(self):
-        """
-        Get Supported Architectures by NVRTC as list of arch tuples.
-        """
-        num = c_int()
-        self.nvrtcGetNumSupportedArchs(byref(num))
-        archs = (c_int * num.value)()
-        self.nvrtcGetSupportedArchs(archs)
-        return [(archs[i] // 10, archs[i] % 10) for i in range(num.value)]
-
-    def get_version(self):
-        """
-        Get the NVRTC version as a tuple (major, minor).
-        """
-        major = c_int()
-        minor = c_int()
-        self.nvrtcVersion(byref(major), byref(minor))
-        return major.value, minor.value
-
-    def create_program(self, src, name):
-        """
-        Create an NVRTC program with managed lifetime.
-        """
-        if isinstance(src, str):
-            src = src.encode()
-        if isinstance(name, str):
-            name = name.encode()
-
-        handle = nvrtc_program()
-
-        # The final three arguments are for passing the contents of headers -
-        # this is not supported, so there are 0 headers and the header names
-        # and contents are null.
-        self.nvrtcCreateProgram(byref(handle), src, name, 0, None, None)
-        return NvrtcProgram(self, handle)
-
-    def compile_program(self, program, options):
-        """
-        Compile an NVRTC program. Compilation may fail due to a user error in
-        the source; this function returns ``True`` if there is a compilation
-        error and ``False`` on success.
-        """
-        # We hold a list of encoded options to ensure they can't be collected
-        # prior to the call to nvrtcCompileProgram
-        encoded_options = [opt.encode() for opt in options]
-        option_pointers = [c_char_p(opt) for opt in encoded_options]
-        c_options_type = c_char_p * len(options)
-        c_options = c_options_type(*option_pointers)
-        try:
-            self.nvrtcCompileProgram(program.handle, len(options), c_options)
-            return False
-        except (NvrtcCompilationError, NvrtcBuiltinOperationFailure):
-            return True
-
-    def destroy_program(self, program):
-        """
-        Destroy an NVRTC program.
-        """
-        self.nvrtcDestroyProgram(byref(program.handle))
-
-    def get_compile_log(self, program):
-        """
-        Get the compile log as a Python string.
-        """
-        log_size = c_size_t()
-        self.nvrtcGetProgramLogSize(program.handle, byref(log_size))
-
-        log = (c_char * log_size.value)()
-        self.nvrtcGetProgramLog(program.handle, log)
-
-        return log.value.decode()
-
-    def get_ptx(self, program):
-        """
-        Get the compiled PTX as a Python string.
-        """
-        ptx_size = c_size_t()
-        self.nvrtcGetPTXSize(program.handle, byref(ptx_size))
-
-        ptx = (c_char * ptx_size.value)()
-        self.nvrtcGetPTX(program.handle, ptx)
-
-        return ptx.value.decode()
-
-    def get_lto(self, program):
-        """
-        Get the compiled LTOIR as a Python bytes object.
-        """
-        lto_size = c_size_t()
-        self.nvrtcGetLTOIRSize(program.handle, byref(lto_size))
-
-        lto = b" " * lto_size.value
-        self.nvrtcGetLTOIR(program.handle, lto)
-
-        return lto
+@functools.cache
+def _get_nvrtc_version():
+    retcode, major, minor = bindings_nvrtc.nvrtcVersion()
+    if retcode != bindings_nvrtc.nvrtcResult.NVRTC_SUCCESS:
+        raise RuntimeError(f"{retcode.name} when calling nvrtcVersion()")
+    return (major, minor)
 
 
 def compile(src, name, cc, ltoir=False, lineinfo=False, debug=False):
@@ -308,17 +49,7 @@ def compile(src, name, cc, ltoir=False, lineinfo=False, debug=False):
     :return: The compiled PTX or LTOIR and compilation log
     :rtype: tuple
     """
-
-    if config.CUDA_USE_NVIDIA_BINDING:
-        retcode, *version = bindings_nvrtc.nvrtcVersion()
-        if retcode != bindings_nvrtc.nvrtcResult.NVRTC_SUCCESS:
-            raise RuntimeError(
-                f"{retcode.name} when calling nvrtcGetSupportedArchs()"
-            )
-        version = tuple(version)
-    else:
-        nvrtc = NVRTC()
-        version = nvrtc.get_version()
+    version = _get_nvrtc_version()
 
     ver_str = lambda version: ".".join(str(v) for v in version)
     supported_ccs = get_supported_ccs()
@@ -345,10 +76,7 @@ def compile(src, name, cc, ltoir=False, lineinfo=False, debug=False):
     #   being optimized away.
     major, minor = found
 
-    if config.CUDA_USE_NVIDIA_BINDING:
-        arch = f"sm_{major}{minor}"
-    else:
-        arch = f"--gpu-architecture=compute_{major}{minor}"
+    arch = f"sm_{major}{minor}"
 
     cuda_include_dir = get_cuda_paths()["include_dir"].info
     cuda_includes = [f"{cuda_include_dir}"]
@@ -381,76 +109,35 @@ def compile(src, name, cc, ltoir=False, lineinfo=False, debug=False):
 
     includes = [numba_include, *cuda_includes, nrt_include, *extra_includes]
 
-    if config.CUDA_USE_NVIDIA_BINDING:
-        options = ProgramOptions(
-            arch=arch,
-            include_path=includes,
-            relocatable_device_code=True,
-            link_time_optimization=ltoir,
-            name=name,
-            debug=debug,
-            lineinfo=lineinfo,
-        )
-
-        class Logger:
-            def __init__(self):
-                self.log = []
-
-            def write(self, msg):
-                self.log.append(msg)
-
-        logger = Logger()
-        if isinstance(src, bytes):
-            src = src.decode("utf8")
-
-        prog = Program(src, "c++", options=options)
-        result = prog.compile("ltoir" if ltoir else "ptx", logs=logger)
-        log = ""
-        if logger.log:
-            log = logger.log
-            joined_logs = "\n".join(log)
-            warnings.warn(f"NVRTC log messages: {joined_logs}")
-        return result, log
-
-    else:
-        program = nvrtc.create_program(src, name)
-        includes = [f"-I{path}" for path in includes]
-        options = [
-            arch,
-            *includes,
-            "-rdc",
-            "true",
-        ]
-
-        if ltoir:
-            options.append("-dlto")
-        if lineinfo:
-            options.append("-lineinfo")
-        if debug:
-            options.append("-G")
-
-        # Compile the program
-        compile_error = nvrtc.compile_program(program, options)
-
-        # Get log from compilation
-        log = nvrtc.get_compile_log(program)
-
-        # If the compile failed, provide the log in an exception
-        if compile_error:
-            msg = f"NVRTC Compilation failure whilst compiling {name}:\n\n{log}"
-            raise NvrtcError(msg)
-
-        # Otherwise, if there's any content in the log, present it as a warning
-        if log:
-            msg = f"NVRTC log messages whilst compiling {name}:\n\n{log}"
-            warnings.warn(msg)
-
-        if ltoir:
-            ltoir = nvrtc.get_lto(program)
-            return ltoir, log
-        else:
-            ptx = nvrtc.get_ptx(program)
-            return ptx, log
+    options = ProgramOptions(
+        arch=arch,
+        include_path=includes,
+        relocatable_device_code=True,
+        link_time_optimization=ltoir,
+        name=name,
+        debug=debug,
+        lineinfo=lineinfo,
+    )
+
+    class Logger:
+        def __init__(self):
+            self.log = []
+
+        def write(self, msg):
+            self.log.append(msg)
+
+    logger = Logger()
+    if isinstance(src, bytes):
+        src = src.decode("utf8")
+
+    prog = Program(src, "c++", options=options)
+    result = prog.compile("ltoir" if ltoir else "ptx", logs=logger)
+    log = ""
+    if logger.log:
+        log = logger.log
+        joined_logs = "\n".join(log)
+        warnings.warn(f"NVRTC log messages: {joined_logs}")
+    return result, log
 
 
 def find_closest_arch(mycc):
@@ -498,12 +185,9 @@ def get_lowest_supported_cc():
 
 
 def get_supported_ccs():
-    if config.CUDA_USE_NVIDIA_BINDING:
-        retcode, archs = bindings_nvrtc.nvrtcGetSupportedArchs()
-        if retcode != bindings_nvrtc.nvrtcResult.NVRTC_SUCCESS:
-            raise RuntimeError(
-                f"{retcode.name} when calling nvrtcGetSupportedArchs()"
-            )
-        return [(arch // 10, arch % 10) for arch in archs]
-    else:
-        return NVRTC().get_supported_archs()
+    retcode, archs = bindings_nvrtc.nvrtcGetSupportedArchs()
+    if retcode != bindings_nvrtc.nvrtcResult.NVRTC_SUCCESS:
+        raise RuntimeError(
+            f"{retcode.name} when calling nvrtcGetSupportedArchs()"
+        )
+    return [(arch // 10, arch % 10) for arch in archs]
diff --git a/numba_cuda/numba/cuda/cudadrv/runtime.py b/numba_cuda/numba/cuda/cudadrv/runtime.py
index ebe615e0..004d4184 100644
--- a/numba_cuda/numba/cuda/cudadrv/runtime.py
+++ b/numba_cuda/numba/cuda/cudadrv/runtime.py
@@ -8,23 +8,12 @@
 to the runtime anymore. This file is provided to maintain the existing API.
 """
 
-from numba.cuda import config
-from numba.cuda.cudadrv.nvrtc import NVRTC
+from numba.cuda.cudadrv.nvrtc import _get_nvrtc_version
 
 
 class Runtime:
     def get_version(self):
-        if config.CUDA_USE_NVIDIA_BINDING:
-            from cuda.bindings import nvrtc
-
-            retcode, *version = nvrtc.nvrtcVersion()
-            if retcode != nvrtc.nvrtcResult.NVRTC_SUCCESS:
-                raise RuntimeError(
-                    f"{retcode.name} when calling nvrtcGetVersion()"
-                )
-            return tuple(version)
-        else:
-            return NVRTC().get_version()
+        return _get_nvrtc_version()
 
 
 runtime = Runtime()
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py b/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py
index c3671266..972c869e 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py
@@ -3,7 +3,6 @@
 
 import numpy as np
 import warnings
-from numba.cuda import config
 from numba.cuda.testing import unittest
 from numba.cuda.testing import (
     skip_on_cudasim,
@@ -15,7 +14,6 @@
 from numba.cuda import require_context
 from numba.cuda.tests.support import ignore_internal_warnings
 from numba import cuda, void, float64, int64, int32, typeof, float32
-from numba.cuda.cudadrv.error import NvrtcError
 
 CONST1D = np.arange(10, dtype=np.float64)
 
@@ -179,23 +177,27 @@ def test_linking_cu_log_warning(self):
             def kernel(x):
                 bar(x)
 
-        self.assertEqual(len(w), 1, "Expected warnings from NVRTC")
+        nvrtc_log_warnings = [
+            wi for wi in w if "NVRTC log messages" in str(wi.message)
+        ]
+        self.assertEqual(
+            len(nvrtc_log_warnings), 1, "Expected warnings from NVRTC"
+        )
         # Check the warning refers to the log messages
-        self.assertIn("NVRTC log messages", str(w[0].message))
+        self.assertIn("NVRTC log messages", str(nvrtc_log_warnings[0].message))
         # Check the message pertaining to the unused variable is provided
-        self.assertIn("declared but never referenced", str(w[0].message))
+        self.assertIn(
+            "declared but never referenced", str(nvrtc_log_warnings[0].message)
+        )
 
     def test_linking_cu_error(self):
         bar = cuda.declare_device("bar", "int32(int32)")
 
         link = str(test_data_dir / "error.cu")
 
-        if config.CUDA_USE_NVIDIA_BINDING:
-            from cuda.core.experimental._utils.cuda_utils import NVRTCError
+        from cuda.core.experimental._utils.cuda_utils import NVRTCError
 
-            errty = NVRTCError
-        else:
-            errty = NvrtcError
+        errty = NVRTCError
         with self.assertRaises(errty) as e:
 
             @cuda.jit("void(int32)", link=[link])
@@ -204,11 +206,7 @@ def kernel(x):
 
         msg = e.exception.args[0]
         # Check the error message refers to the NVRTC compile
-        nvrtc_err_str = (
-            "NVRTC_ERROR_COMPILATION"
-            if config.CUDA_USE_NVIDIA_BINDING
-            else "NVRTC Compilation failure"
-        )
+        nvrtc_err_str = "NVRTC_ERROR_COMPILATION"
         self.assertIn(nvrtc_err_str, msg)
         # Check the expected error in the CUDA source is reported
         self.assertIn('identifier "SYNTAX" is undefined', msg)
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py b/numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py
index 8a4f5b44..e74d97e9 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py
@@ -17,10 +17,7 @@
 if not config.ENABLE_CUDASIM:
     from cuda.bindings.driver import cuModuleGetGlobal, cuMemcpyHtoD
 
-    if config.CUDA_USE_NVIDIA_BINDING:
-        from cuda.bindings.driver import CUmodule as cu_module_type
-    else:
-        from numba.cuda.cudadrv.drvapi import cu_module as cu_module_type
+    from cuda.bindings.driver import CUmodule as cu_module_type
 
 
 def wipe_all_modules_in_context():
@@ -35,8 +32,6 @@ def wipe_all_modules_in_context():
 
 
 def get_hashable_handle_value(handle):
-    if not config.CUDA_USE_NVIDIA_BINDING:
-        handle = handle.value
     return handle
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py b/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
index 17b54d85..d7344541 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
@@ -45,10 +45,8 @@
 
 
 @unittest.skipIf(
-    not config.CUDA_USE_NVIDIA_BINDING
-    or not TEST_BIN_DIR
-    or not _have_nvjitlink(),
-    "NVIDIA cuda bindings not enabled or nvJitLink not installed or new enough (>12.3)",
+    not TEST_BIN_DIR or not _have_nvjitlink(),
+    "nvJitLink not installed or new enough (>12.3)",
 )
 @skip_on_cudasim("Linking unsupported in the simulator")
 class TestLinker(CUDATestCase):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py b/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py
index 5142c539..45f5634c 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py
@@ -557,12 +557,9 @@ def f(z, x, y):
                 link_obj = LinkableCode.from_path(link)
                 if link_obj.kind == "cu":
                     # if link is a cu file, result contains a compiled object code
-                    if cuda.config.CUDA_USE_NVIDIA_BINDING:
-                        from cuda.core.experimental import ObjectCode
+                    from cuda.core.experimental import ObjectCode
 
-                        assert isinstance(code_list[1], ObjectCode)
-                    else:
-                        assert isinstance(code_list[1], bytes)
+                    assert isinstance(code_list[1], ObjectCode)
                 else:
                     assert code_list[1].kind == link_obj.kind
 
@@ -581,13 +578,10 @@ def f(z, x, y):
         )
         assert len(code_list) == 2
 
-        if cuda.config.CUDA_USE_NVIDIA_BINDING:
-            self.assertRegex(
-                str(code_list[1].code.decode()),
-                r"\.file.*test_device_functions",
-            )
-        else:
-            self.assertRegex(code_list[1], r"\.file.*test_device_functions")
+        self.assertRegex(
+            str(code_list[1].code.decode()),
+            r"\.file.*test_device_functions",
+        )
 
     @unittest.skipIf(not TEST_BIN_DIR, "necessary binaries not generated.")
     def test_compile_all_debug(self):
@@ -604,12 +598,9 @@ def f(z, x, y):
         )
         assert len(code_list) == 2
 
-        if cuda.config.CUDA_USE_NVIDIA_BINDING:
-            self.assertRegex(
-                str(code_list[1].code.decode()), r"\.section\s+\.debug_info"
-            )
-        else:
-            self.assertRegex(code_list[1], r"\.section\s+\.debug_info")
+        self.assertRegex(
+            str(code_list[1].code.decode()), r"\.section\s+\.debug_info"
+        )
 
 
 @skip_on_cudasim("Compilation unsupported in the simulator")
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_errors.py b/numba_cuda/numba/cuda/tests/cudapy/test_errors.py
index 48669e7d..3ea51a1a 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_errors.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_errors.py
@@ -4,7 +4,7 @@
 from numba import cuda
 from numba.core.errors import TypingError
 from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
-from numba.cuda import config
+from numba.cuda.cudadrv import driver
 
 
 def noop(x):
@@ -95,7 +95,7 @@ def kernel_func():
 
     @skip_on_cudasim("Simulator does not use nvjitlink")
     @unittest.skipIf(
-        config.CUDA_USE_NVIDIA_BINDING, "NVIDIA cuda bindings enabled"
+        driver._have_nvjitlink(), "nvJitLink available; LTO should not error"
     )
     def test_lto_without_nvjitlink_error(self):
         with self.assertRaisesRegex(RuntimeError, "LTO requires nvjitlink"):
diff --git a/numba_cuda/numba/cuda/tests/nrt/test_nrt.py b/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
index b87f4d35..2757d576 100644
--- a/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
+++ b/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
@@ -173,16 +173,7 @@ def test_nrt_detect_linked_ptx_file(self):
         cc = get_current_device().compute_capability
         ptx, _ = compile(src, "external_nrt.cu", cc)
 
-        @cuda.jit(
-            link=[
-                PTXSource(
-                    ptx.code
-                    if config.CUDA_USE_NVIDIA_BINDING
-                    else ptx.encode(),
-                    nrt=True,
-                )
-            ]
-        )
+        @cuda.jit(link=[PTXSource(ptx.code, nrt=True)])
         def kernel():
             allocate_deallocate_handle()
 
diff --git a/pyproject.toml b/pyproject.toml
index 54ee6511..12ed8284 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,7 @@ authors = [
 license = "BSD-2-clause"
 license-files = ["LICENSE", "LICENSE.numba"]
 requires-python = ">=3.9"
-dependencies = ["numba>=0.60.0"]
+dependencies = ["numba>=0.60.0", "cuda-bindings>=12.9.1,<14.0.0", "cuda-core>=0.3.2,<0.4.0dev0"]
 
 [project.optional-dependencies]
 cu12 = [