diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index ea7e7a98b..20efc1f6d 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -19,11 +19,11 @@ jobs:
       - compute-matrix
       - build-conda
       - test-conda
-      - test-conda-pynvjitlink
+      - test-conda-ctypes-binding
       - test-simulator
       - build-wheels
       - test-wheels
-      - test-wheels-pynvjitlink
+      - test-wheels-ctypes-binding
       - test-wheels-deps-wheels
       - build-docs
     secrets: inherit
@@ -76,14 +76,14 @@ jobs:
       script: "ci/test_conda.sh"
       run_codecov: false
       matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
-  test-conda-pynvjitlink:
+  test-conda-ctypes-binding:
     needs:
       - build-conda
       - compute-matrix
     uses: ./.github/workflows/conda-python-tests.yaml
     with:
       build_type: pull-request
-      script: "ci/test_conda_pynvjitlink.sh"
+      script: "ci/test_conda_ctypes_binding.sh"
       run_codecov: false
       # This selects "ARCH=amd64 and CUDA >=12, with the latest supported Python for each CUDA major version".
       matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
@@ -114,14 +114,14 @@ jobs:
       build_type: pull-request
       script: "ci/test_wheel.sh false"
       matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
-  test-wheels-pynvjitlink:
+  test-wheels-ctypes-binding:
     needs:
       - build-wheels
       - compute-matrix
     uses: ./.github/workflows/wheels-test.yaml
     with:
       build_type: pull-request
-      script: "ci/test_wheel_pynvjitlink.sh"
+      script: "ci/test_wheel_ctypes_binding.sh"
       # This selects "ARCH=amd64 and CUDA >=12, with the latest supported Python for each CUDA major version".
       matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
       matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber >= 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
diff --git a/ci/test_conda.sh b/ci/test_conda.sh
index 2fbc53b71..04958da67 100755
--- a/ci/test_conda.sh
+++ b/ci/test_conda.sh
@@ -8,7 +8,7 @@ set -euo pipefail
 if [ "${CUDA_VER%.*.*}" = "11" ]; then
   CTK_PACKAGES="cudatoolkit=11"
 else
-  CTK_PACKAGES="cuda-cccl cuda-nvcc-impl cuda-nvrtc libcurand-dev"
+  CTK_PACKAGES="cuda-cccl cuda-nvcc-impl cuda-nvrtc libcurand-dev cuda-cuobjdump"
   apt-get update
   apt remove --purge `dpkg --get-selections | grep cuda-nvvm | awk '{print $1}'` -y
   apt remove --purge `dpkg --get-selections | grep cuda-nvrtc | awk '{print $1}'` -y
diff --git a/ci/test_conda_pynvjitlink.sh b/ci/test_conda_ctypes_binding.sh
similarity index 89%
rename from ci/test_conda_pynvjitlink.sh
rename to ci/test_conda_ctypes_binding.sh
index 78c636a45..06aea95af 100755
--- a/ci/test_conda_pynvjitlink.sh
+++ b/ci/test_conda_ctypes_binding.sh
@@ -48,12 +48,6 @@ EXITCODE=0
 trap "EXITCODE=1" ERR
 set +e
 
-
-rapids-logger "Install pynvjitlink"
-set +u
-rapids-mamba-retry install -c rapidsai pynvjitlink
-set -u
-
 rapids-logger "Build tests"
 
 PY_SCRIPT="
@@ -70,7 +64,7 @@ popd
 
 
 rapids-logger "Run Tests"
-NUMBA_CUDA_ENABLE_PYNVJITLINK=1 NUMBA_CUDA_TEST_BIN_DIR=$NUMBA_CUDA_TEST_BIN_DIR python -m numba.runtests numba.cuda.tests -v
+NUMBA_CUDA_USE_NVIDIA_BINDING=0 NUMBA_CUDA_TEST_BIN_DIR=$NUMBA_CUDA_TEST_BIN_DIR python -m numba.runtests numba.cuda.tests -v
 
 popd
 
diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
index 3d75032a2..8e4687890 100755
--- a/ci/test_wheel.sh
+++ b/ci/test_wheel.sh
@@ -10,7 +10,8 @@ package=$(realpath wheel/numba_cuda*.whl)
 echo "Package path: ${package}"
 python -m pip install \
     "${package}[test]" \
-    "cuda-python==${CUDA_VER_MAJOR_MINOR%.*}.*"
+    "cuda-python==${CUDA_VER_MAJOR_MINOR%.*}.*" \
+    "cuda-core==0.3.*"
 
 GET_TEST_BINARY_DIR="
 import numba_cuda
diff --git a/ci/test_wheel_pynvjitlink.sh b/ci/test_wheel_ctypes_binding.sh
similarity index 84%
rename from ci/test_wheel_pynvjitlink.sh
rename to ci/test_wheel_ctypes_binding.sh
index 4ede2b967..67bff6126 100755
--- a/ci/test_wheel_pynvjitlink.sh
+++ b/ci/test_wheel_ctypes_binding.sh
@@ -11,7 +11,6 @@ echo "Package path: $package"
 python -m pip install \
     "${package}[test]" \
     cuda-python \
-    "pynvjitlink-cu${CUDA_VER_MAJOR}"
 
 rapids-logger "Build tests"
 PY_SCRIPT="
@@ -23,7 +22,7 @@ print(test_dir)
 
 NUMBA_CUDA_TEST_BIN_DIR=$(python -c "$PY_SCRIPT")
 pushd $NUMBA_CUDA_TEST_BIN_DIR
-make
+NUMBA_CUDA_USE_NVIDIA_BINDING=0 make
 popd
 
 
@@ -35,9 +34,9 @@ mkdir -p "${RAPIDS_TESTS_DIR}"
 pushd "${RAPIDS_TESTS_DIR}"
 
 rapids-logger "Show Numba system info"
-python -m numba --sysinfo
+NUMBA_CUDA_USE_NVIDIA_BINDING=0 python -m numba --sysinfo
 
 rapids-logger "Run Tests"
-NUMBA_CUDA_ENABLE_PYNVJITLINK=1 NUMBA_CUDA_TEST_BIN_DIR=$NUMBA_CUDA_TEST_BIN_DIR python -m numba.runtests numba.cuda.tests -v
+NUMBA_CUDA_USE_NVIDIA_BINDING=0 NUMBA_CUDA_TEST_BIN_DIR=$NUMBA_CUDA_TEST_BIN_DIR python -m numba.runtests numba.cuda.tests -v
 
 popd
diff --git a/conda/recipes/numba-cuda/meta.yaml b/conda/recipes/numba-cuda/meta.yaml
index 86c00476e..5aecbfcb7 100644
--- a/conda/recipes/numba-cuda/meta.yaml
+++ b/conda/recipes/numba-cuda/meta.yaml
@@ -29,6 +29,7 @@ requirements:
     - python
     - numba >=0.59.1
     - cuda-bindings
+    - cuda-core ==0.3.*
 
 about:
   home: {{ project_urls["Homepage"] }}
diff --git a/numba_cuda/numba/cuda/__init__.py b/numba_cuda/numba/cuda/__init__.py
index a17b5b186..e944fe0bf 100644
--- a/numba_cuda/numba/cuda/__init__.py
+++ b/numba_cuda/numba/cuda/__init__.py
@@ -2,11 +2,16 @@
 from numba import runtests
 from numba.core import config
 from .utils import _readenv
+import warnings
+
 
 # Enable pynvjitlink based on the following precedence:
 # 1. Config setting "CUDA_ENABLE_PYNVJITLINK" (highest priority)
 # 2. Environment variable "NUMBA_CUDA_ENABLE_PYNVJITLINK"
 # 3. Auto-detection of pynvjitlink module (lowest priority)
+
+pynvjitlink_auto_enabled = False
+
 if getattr(config, "CUDA_ENABLE_PYNVJITLINK", None) is None:
     if (
         _pynvjitlink_enabled_in_env := _readenv(
@@ -15,9 +20,10 @@
     ) is not None:
         config.CUDA_ENABLE_PYNVJITLINK = _pynvjitlink_enabled_in_env
     else:
-        config.CUDA_ENABLE_PYNVJITLINK = (
+        pynvjitlink_auto_enabled = (
             importlib.util.find_spec("pynvjitlink") is not None
         )
+        config.CUDA_ENABLE_PYNVJITLINK = pynvjitlink_auto_enabled
 
 # Upstream numba sets CUDA_USE_NVIDIA_BINDING to 0 by default, so it always
 # exists. Override, but not if explicitly set to 0 in the envioronment.
@@ -44,6 +50,21 @@
             "bindings."
         )
 
+if config.CUDA_ENABLE_PYNVJITLINK:
+    if USE_NV_BINDING:
+        warnings.warn(
+            "Explicitly enabling pynvjitlink is no longer necessary. "
+            "NVIDIA bindings are enabled. cuda.core will be used "
+            "in place of pynvjitlink."
+        )
+    elif pynvjitlink_auto_enabled:
+        # Ignore the fact that pynvjitlink is enabled, because that was an
+        # automatic decision based on discovering pynvjitlink was present; the
+        # user didn't ask for it
+        pass
+    else:
+        raise RuntimeError("nvJitLink requires the NVIDIA CUDA bindings. ")
+
 if config.ENABLE_CUDASIM:
     from .simulator_init import *
 else:
diff --git a/numba_cuda/numba/cuda/codegen.py b/numba_cuda/numba/cuda/codegen.py
index 61c62a8f1..0b269ed52 100644
--- a/numba_cuda/numba/cuda/codegen.py
+++ b/numba_cuda/numba/cuda/codegen.py
@@ -22,7 +22,10 @@ def run_nvdisasm(cubin, flags):
     try:
         fd, fname = tempfile.mkstemp()
         with open(fname, "wb") as f:
-            f.write(cubin)
+            if config.CUDA_USE_NVIDIA_BINDING:
+                f.write(cubin.code)
+            else:
+                f.write(cubin)
 
         try:
             cp = subprocess.run(
@@ -271,7 +274,7 @@ def get_cubin(self, cc=None):
             return cubin
 
         if self._lto and config.DUMP_ASSEMBLY:
-            linker = driver.Linker.new(
+            linker = driver._Linker.new(
                 max_registers=self._max_registers,
                 cc=cc,
                 additional_flags=["-ptx"],
@@ -280,14 +283,14 @@ def get_cubin(self, cc=None):
             # `-ptx` flag is meant to view the optimized PTX for LTO objects.
             # Non-LTO objects are not passed to linker.
             self._link_all(linker, cc, ignore_nonlto=True)
-
-            ptx = linker.get_linked_ptx().decode("utf-8")
+            ptx = linker.get_linked_ptx()
+            ptx = ptx.decode("utf-8")
 
             print(("ASSEMBLY (AFTER LTO) %s" % self._name).center(80, "-"))
             print(ptx)
             print("=" * 80)
 
-        linker = driver.Linker.new(
+        linker = driver._Linker.new(
             max_registers=self._max_registers, cc=cc, lto=self._lto
         )
         self._link_all(linker, cc, ignore_nonlto=False)
@@ -312,7 +315,6 @@ def get_cufunc(self):
         cufunc = self._cufunc_cache.get(device.id, None)
         if cufunc:
             return cufunc
-
         cubin = self.get_cubin(cc=device.compute_capability)
         module = ctx.create_module_image(
             cubin, self._setup_functions, self._teardown_functions
diff --git a/numba_cuda/numba/cuda/cudadrv/driver.py b/numba_cuda/numba/cuda/cudadrv/driver.py
index a04f69753..b041d2688 100644
--- a/numba_cuda/numba/cuda/cudadrv/driver.py
+++ b/numba_cuda/numba/cuda/cudadrv/driver.py
@@ -42,6 +42,7 @@
 import numpy as np
 from collections import namedtuple, deque
 
+
 from numba import mviewbuf
 from numba.core import utils, serialize, config
 from .error import CudaSupportError, CudaDriverError
@@ -58,6 +59,22 @@
     NvJitLinker, NvJitLinkError = None, None
 
 
+USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
+
+if USE_NV_BINDING:
+    from cuda.bindings import driver as binding
+    from cuda.core.experimental import (
+        Linker,
+        LinkerOptions,
+        ObjectCode,
+    )
+
+    # There is no definition of the default stream in the Nvidia bindings (nor
+    # is there at the C/C++ level), so we define it here so we don't need to
+    # use a magic number 0 in places where we want the default stream.
+    CU_STREAM_DEFAULT = 0
+
+
 MIN_REQUIRED_CC = (3, 5)
 SUPPORTS_IPC = sys.platform.startswith("linux")
 
@@ -108,6 +125,25 @@ def make_logger():
     return logger
 
 
+@functools.cache
+def _have_nvjitlink():
+    if not USE_NV_BINDING:
+        return False
+    try:
+        from cuda.bindings._internal import nvjitlink as nvjitlink_internal
+        from cuda.bindings._internal.utils import NotSupportedError
+    except ImportError:
+        return False
+    try:
+        return (
+            nvjitlink_internal._inspect_function_pointer("__nvJitLinkVersion")
+            != 0
+        )
+    except NotSupportedError:
+        # no driver
+        return False
+
+
 class DeadMemoryError(RuntimeError):
     pass
 
@@ -1472,7 +1508,7 @@ def create_module_ptx(self, ptx):
         if isinstance(ptx, str):
             ptx = ptx.encode("utf8")
         if USE_NV_BINDING:
-            image = ptx
+            image = ObjectCode.from_ptx(ptx)
         else:
             image = c_char_p(ptx)
         return self.create_module_image(image)
@@ -1615,7 +1651,6 @@ def load_module_image_ctypes(
 
     option_keys = (drvapi.cu_jit_option * len(options))(*options.keys())
     option_vals = (c_void_p * len(options))(*options.values())
-
     handle = drvapi.cu_module()
     try:
         driver.cuModuleLoadDataEx(
@@ -1662,7 +1697,7 @@ def load_module_image_cuda_python(
 
     try:
         handle = driver.cuModuleLoadDataEx(
-            image, len(options), option_keys, option_vals
+            image.code, len(options), option_keys, option_vals
         )
     except CudaAPIError as e:
         err_string = jiterrors.decode("utf-8")
@@ -2722,7 +2757,7 @@ def launch_kernel(
         )
 
 
-class Linker(metaclass=ABCMeta):
+class _LinkerBase(metaclass=ABCMeta):
     """Abstract base class for linkers"""
 
     @classmethod
@@ -2735,30 +2770,27 @@ def new(
         additional_flags=None,
     ):
         driver_ver = driver.get_version()
-        if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY and driver_ver >= (
-            12,
-            0,
-        ):
-            raise ValueError("Use CUDA_ENABLE_PYNVJITLINK for CUDA >= 12.0 MVC")
-        if config.CUDA_ENABLE_PYNVJITLINK and driver_ver < (12, 0):
-            raise ValueError("Enabling pynvjitlink requires CUDA 12.")
-        if config.CUDA_ENABLE_PYNVJITLINK:
-            linker = PyNvJitLinker
-
-        elif config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
-            linker = MVCLinker
+        if driver_ver < (12, 0):
+            if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
+                linker = MVCLinker
+            elif USE_NV_BINDING:
+                linker = _Linker
+            else:
+                linker = CtypesLinker
         else:
             if USE_NV_BINDING:
-                linker = CudaPythonLinker
+                linker = _Linker
             else:
                 linker = CtypesLinker
 
-        if linker is PyNvJitLinker:
-            return linker(max_registers, lineinfo, cc, lto, additional_flags)
-        elif additional_flags or lto:
-            raise ValueError("LTO and additional flags require PyNvJitLinker")
+        params = (max_registers, lineinfo, cc)
+        if linker is _Linker:
+            params = (*params, lto, additional_flags)
         else:
-            return linker(max_registers, lineinfo, cc)
+            if lto or additional_flags:
+                raise ValueError("LTO and additional flags require nvjitlink")
+
+        return linker(*params)
 
     @abstractmethod
     def __init__(self, max_registers, lineinfo, cc):
@@ -2786,7 +2818,6 @@ def add_cu(self, cu, name):
         with driver.get_active_context() as ac:
             dev = driver.get_device(ac.devnum)
             cc = dev.compute_capability
-
         ptx, log = nvrtc.compile(cu, name, cc)
 
         if config.DUMP_ASSEMBLY:
@@ -2821,7 +2852,6 @@ def add_file_guess_ext(self, path_or_code, ignore_nonlto=False):
         LTO-ed portion of the PTX when linker is added with objects that can be
         both LTO-ed and not LTO-ed.
         """
-
         if isinstance(path_or_code, str):
             ext = pathlib.Path(path_or_code).suffix
             if ext == "":
@@ -2901,7 +2931,148 @@ def complete(self):
         """
 
 
-class MVCLinker(Linker):
+class _Linker(_LinkerBase):
+    def __init__(
+        self,
+        max_registers=None,
+        lineinfo=False,
+        cc=None,
+        lto=None,
+        additional_flags=None,
+    ):
+        arch = f"sm_{cc[0]}{cc[1]}"
+        self.max_registers = max_registers if max_registers else None
+        self.lineinfo = lineinfo
+        self.cc = cc
+        self.arch = arch
+        if lto is False:
+            # WAR for apparent nvjitlink issue
+            lto = None
+        self.lto = lto
+        self.additional_flags = additional_flags
+
+        self.options = LinkerOptions(
+            max_register_count=self.max_registers,
+            lineinfo=lineinfo,
+            arch=arch,
+            link_time_optimization=lto,
+        )
+        self._complete = False
+        self._object_codes = []
+        self.linker = None  # need at least one program
+
+    @property
+    def info_log(self):
+        if not self.linker:
+            raise ValueError("Not Initialized")
+        if self._complete:
+            return self._info_log
+        raise RuntimeError("Link not yet complete.")
+
+    @property
+    def error_log(self):
+        if not self.linker:
+            raise ValueError("Not Initialized")
+        if self._complete:
+            return self._error_log
+        raise RuntimeError("Link not yet complete.")
+
+    def add_ptx(self, ptx, name="<cudapy-ptx>"):
+        obj = ObjectCode.from_ptx(ptx, name=name)
+        self._object_codes.append(obj)
+
+    def add_cu(self, cu, name="<cudapy-cu>"):
+        with driver.get_active_context() as ac:
+            dev = driver.get_device(ac.devnum)
+            cc = dev.compute_capability
+        obj, log = nvrtc.compile(cu, name, cc, ltoir=self.lto)
+
+        if not self.lto and config.DUMP_ASSEMBLY:
+            print(("ASSEMBLY %s" % name).center(80, "-"))
+            print(obj.code)
+
+        self._object_codes.append(obj)
+
+    def add_cubin(self, cubin, name="<cudapy-cubin>"):
+        obj = ObjectCode.from_cubin(cubin, name=name)
+        self._object_codes.append(obj)
+
+    def add_ltoir(self, ltoir, name="<cudapy-ltoir>"):
+        obj = ObjectCode.from_ltoir(ltoir, name=name)
+        self._object_codes.append(obj)
+
+    def add_fatbin(self, fatbin, name="<cudapy-fatbin>"):
+        obj = ObjectCode.from_fatbin(fatbin, name=name)
+        self._object_codes.append(obj)
+
+    def add_object(self, obj, name="<cudapy-object>"):
+        obj = ObjectCode.from_object(obj, name=name)
+        self._object_codes.append(obj)
+
+    def add_library(self, lib, name="<cudapy-lib>"):
+        obj = ObjectCode.from_library(lib, name=name)
+        self._object_codes.append(obj)
+
+    def add_file(self, path, kind):
+        try:
+            data = cached_file_read(path, how="rb")
+        except FileNotFoundError:
+            raise LinkerError(f"{path} not found")
+        name = pathlib.Path(path).name
+        self.add_data(data, kind, name)
+
+    def add_data(self, data, kind, name):
+        if kind == FILE_EXTENSION_MAP["ptx"]:
+            fn = self.add_ptx
+        elif kind == FILE_EXTENSION_MAP["cubin"]:
+            fn = self.add_cubin
+        elif kind == "cu":
+            fn = self.add_cu
+        elif (
+            kind == FILE_EXTENSION_MAP["lib"] or kind == FILE_EXTENSION_MAP["a"]
+        ):
+            fn = self.add_library
+        elif kind == FILE_EXTENSION_MAP["fatbin"]:
+            fn = self.add_fatbin
+        elif kind == FILE_EXTENSION_MAP["o"]:
+            fn = self.add_object
+        elif kind == FILE_EXTENSION_MAP["ltoir"]:
+            fn = self.add_ltoir
+        else:
+            raise LinkerError(f"Don't know how to link {kind}")
+
+        fn(data, name)
+
+    def get_linked_ptx(self):
+        options = LinkerOptions(
+            max_register_count=self.max_registers,
+            lineinfo=self.lineinfo,
+            arch=self.arch,
+            link_time_optimization=True,
+            ptx=True,
+        )
+
+        self.linker = Linker(*self._object_codes, options=options)
+
+        result = self.linker.link("ptx")
+        self.close()
+        self._complete = True
+        return result.code
+
+    def close(self):
+        self._info_log = self.linker.get_info_log()
+        self._error_log = self.linker.get_error_log()
+        self.linker.close()
+
+    def complete(self):
+        self.linker = Linker(*self._object_codes, options=self.options)
+        result = self.linker.link("cubin")
+        self.close()
+        self._complete = True
+        return result
+
+
+class MVCLinker(_LinkerBase):
     """
     Linker supporting Minor Version Compatibility, backed by the cubinlinker
     package.
@@ -2996,7 +3167,7 @@ def complete(self):
             raise LinkerError from e
 
 
-class CtypesLinker(Linker):
+class CtypesLinker(_LinkerBase):
     """
     Links for current device if no CC given
     """
@@ -3139,266 +3310,6 @@ def complete(self):
         return bytes(np.ctypeslib.as_array(cubin_ptr, shape=(size,)))
 
 
-class CudaPythonLinker(Linker):
-    """
-    Links for current device if no CC given
-    """
-
-    def __init__(self, max_registers=0, lineinfo=False, cc=None):
-        super().__init__(max_registers, lineinfo, cc)
-
-        logsz = config.CUDA_LOG_SIZE
-        linkerinfo = bytearray(logsz)
-        linkererrors = bytearray(logsz)
-
-        jit_option = binding.CUjit_option
-
-        options = {
-            jit_option.CU_JIT_INFO_LOG_BUFFER: linkerinfo,
-            jit_option.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES: logsz,
-            jit_option.CU_JIT_ERROR_LOG_BUFFER: linkererrors,
-            jit_option.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: logsz,
-            jit_option.CU_JIT_LOG_VERBOSE: 1,
-        }
-        if max_registers:
-            options[jit_option.CU_JIT_MAX_REGISTERS] = max_registers
-        if lineinfo:
-            options[jit_option.CU_JIT_GENERATE_LINE_INFO] = 1
-
-        if cc is None:
-            # No option value is needed, but we need something as a placeholder
-            options[jit_option.CU_JIT_TARGET_FROM_CUCONTEXT] = 1
-        else:
-            cc_val = cc[0] * 10 + cc[1]
-            cc_enum = getattr(
-                binding.CUjit_target, f"CU_TARGET_COMPUTE_{cc_val}"
-            )
-            options[jit_option.CU_JIT_TARGET] = cc_enum
-
-        raw_keys = list(options.keys())
-        raw_values = list(options.values())
-        self.handle = driver.cuLinkCreate(len(raw_keys), raw_keys, raw_values)
-
-        weakref.finalize(self, driver.cuLinkDestroy, self.handle)
-
-        self.linker_info_buf = linkerinfo
-        self.linker_errors_buf = linkererrors
-
-        self._keep_alive = [linkerinfo, linkererrors, raw_keys, raw_values]
-
-    @property
-    def info_log(self):
-        return self.linker_info_buf.decode("utf8")
-
-    @property
-    def error_log(self):
-        return self.linker_errors_buf.decode("utf8")
-
-    def add_cubin(self, cubin, name="<unnamed-cubin>"):
-        input_type = binding.CUjitInputType.CU_JIT_INPUT_CUBIN
-        return self._add_data(input_type, cubin, name)
-
-    def add_ptx(self, ptx, name="<unnamed-ptx>"):
-        input_type = binding.CUjitInputType.CU_JIT_INPUT_PTX
-        return self._add_data(input_type, ptx, name)
-
-    def add_object(self, object_, name="<unnamed-object>"):
-        input_type = binding.CUjitInputType.CU_JIT_INPUT_OBJECT
-        return self._add_data(input_type, object_, name)
-
-    def add_fatbin(self, fatbin, name="<unnamed-fatbin>"):
-        input_type = binding.CUjitInputType.CU_JIT_INPUT_FATBINARY
-        return self._add_data(input_type, fatbin, name)
-
-    def add_library(self, library, name="<unnamed-library>"):
-        input_type = binding.CUjitInputType.CU_JIT_INPUT_LIBRARY
-        return self._add_data(input_type, library, name)
-
-    def _add_data(self, input_type, data, name):
-        name_buffer = name.encode("utf8")
-        self._keep_alive += [data, name_buffer]
-        try:
-            driver.cuLinkAddData(
-                self.handle, input_type, data, len(data), name_buffer, 0, [], []
-            )
-        except CudaAPIError as e:
-            raise LinkerError("%s\n%s" % (e, self.error_log))
-
-    def add_data(self, data, kind, name=None):
-        # We pass the name as **kwargs to ensure the default name for the input
-        # type is used if none is supplied
-        kws = {}
-        if name is not None:
-            kws["name"] = name
-
-        if kind == FILE_EXTENSION_MAP["cubin"]:
-            self.add_cubin(data, **kws)
-        elif kind == FILE_EXTENSION_MAP["fatbin"]:
-            self.add_fatbin(data, **kws)
-        elif kind == FILE_EXTENSION_MAP["a"]:
-            self.add_library(data, **kws)
-        elif kind == FILE_EXTENSION_MAP["ptx"]:
-            self.add_ptx(data, **kws)
-        elif kind == FILE_EXTENSION_MAP["o"]:
-            self.add_object(data, **kws)
-        elif kind == FILE_EXTENSION_MAP["ltoir"]:
-            raise LinkerError("CudaPythonLinker cannot link LTO-IR")
-        else:
-            raise LinkerError(f"Don't know how to link {kind}")
-
-    def add_file(self, path, kind):
-        pathbuf = path.encode("utf8")
-        self._keep_alive.append(pathbuf)
-
-        try:
-            driver.cuLinkAddFile(self.handle, kind, pathbuf, 0, [], [])
-        except CudaAPIError as e:
-            if e.code == binding.CUresult.CUDA_ERROR_FILE_NOT_FOUND:
-                msg = f"{path} not found"
-            else:
-                msg = "%s\n%s" % (e, self.error_log)
-            raise LinkerError(msg)
-
-    def complete(self):
-        try:
-            cubin_buf, size = driver.cuLinkComplete(self.handle)
-        except CudaAPIError as e:
-            raise LinkerError("%s\n%s" % (e, self.error_log))
-
-        assert size > 0, "linker returned a zero sized cubin"
-        del self._keep_alive[:]
-        # We return a copy of the cubin because it's owned by the linker
-        cubin_ptr = ctypes.cast(cubin_buf, ctypes.POINTER(ctypes.c_char))
-        return bytes(np.ctypeslib.as_array(cubin_ptr, shape=(size,)))
-
-
-class PyNvJitLinker(Linker):
-    def __init__(
-        self,
-        max_registers=None,
-        lineinfo=False,
-        cc=None,
-        lto=False,
-        additional_flags=None,
-    ):
-        if NvJitLinker is None:
-            raise ImportError(
-                "Using pynvjitlink requires the pynvjitlink package to be "
-                "available"
-            )
-
-        if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
-            raise ValueError(
-                "Can't set CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY and "
-                "CUDA_ENABLE_PYNVJITLINK at the same time"
-            )
-
-        if cc is None:
-            raise RuntimeError("PyNvJitLinker requires CC to be specified")
-        if not any(isinstance(cc, t) for t in [list, tuple]):
-            raise TypeError("`cc` must be a list or tuple of length 2")
-
-        sm_ver = f"{cc[0] * 10 + cc[1]}"
-        arch = f"-arch=sm_{sm_ver}"
-        options = [arch]
-        if max_registers:
-            options.append(f"-maxrregcount={max_registers}")
-        if lineinfo:
-            options.append("-lineinfo")
-        if lto:
-            options.append("-lto")
-        if additional_flags is not None:
-            options.extend(additional_flags)
-
-        self._linker = NvJitLinker(*options)
-        self.lto = lto
-        self.options = options
-
-    @property
-    def info_log(self):
-        return self._linker.info_log
-
-    @property
-    def error_log(self):
-        return self._linker.error_log
-
-    def add_ptx(self, ptx, name="<cudapy-ptx>"):
-        self._linker.add_ptx(ptx, name)
-
-    def add_fatbin(self, fatbin, name="<external-fatbin>"):
-        self._linker.add_fatbin(fatbin, name)
-
-    def add_ltoir(self, ltoir, name="<external-ltoir>"):
-        self._linker.add_ltoir(ltoir, name)
-
-    def add_object(self, obj, name="<external-object>"):
-        self._linker.add_object(obj, name)
-
-    def add_file(self, path, kind):
-        try:
-            data = cached_file_read(path, "rb")
-        except FileNotFoundError:
-            raise LinkerError(f"{path} not found")
-
-        name = pathlib.Path(path).name
-        self.add_data(data, kind, name)
-
-    def add_cu(self, cu, name):
-        """Add CUDA source in a string to the link. The name of the source
-        file should be specified in `name`."""
-        with driver.get_active_context() as ac:
-            dev = driver.get_device(ac.devnum)
-            cc = dev.compute_capability
-
-        program, log = nvrtc.compile(cu, name, cc, ltoir=self.lto)
-
-        if not self.lto and config.DUMP_ASSEMBLY:
-            print(("ASSEMBLY %s" % name).center(80, "-"))
-            print(program)
-            print("=" * 80)
-
-        suffix = ".ltoir" if self.lto else ".ptx"
-        program_name = os.path.splitext(name)[0] + suffix
-        # Link the program's PTX or LTOIR using the normal linker mechanism
-        if self.lto:
-            self.add_ltoir(program, program_name)
-        else:
-            self.add_ptx(program.encode(), program_name)
-
-    def add_data(self, data, kind, name):
-        if kind == FILE_EXTENSION_MAP["cubin"]:
-            fn = self._linker.add_cubin
-        elif kind == FILE_EXTENSION_MAP["fatbin"]:
-            fn = self._linker.add_fatbin
-        elif kind == FILE_EXTENSION_MAP["a"]:
-            fn = self._linker.add_library
-        elif kind == FILE_EXTENSION_MAP["ptx"]:
-            return self.add_ptx(data, name)
-        elif kind == FILE_EXTENSION_MAP["o"]:
-            fn = self._linker.add_object
-        elif kind == FILE_EXTENSION_MAP["ltoir"]:
-            fn = self._linker.add_ltoir
-        else:
-            raise LinkerError(f"Don't know how to link {kind}")
-
-        try:
-            fn(data, name)
-        except NvJitLinkError as e:
-            raise LinkerError from e
-
-    def get_linked_ptx(self):
-        try:
-            return self._linker.get_linked_ptx()
-        except NvJitLinkError as e:
-            raise LinkerError from e
-
-    def complete(self):
-        try:
-            return self._linker.get_linked_cubin()
-        except NvJitLinkError as e:
-            raise LinkerError from e
-
-
 # -----------------------------------------------------------------------------
 
 
diff --git a/numba_cuda/numba/cuda/cudadrv/mappings.py b/numba_cuda/numba/cuda/cudadrv/mappings.py
index ca785e681..3ddc60724 100644
--- a/numba_cuda/numba/cuda/cudadrv/mappings.py
+++ b/numba_cuda/numba/cuda/cudadrv/mappings.py
@@ -2,17 +2,16 @@
 from . import enums
 
 if config.CUDA_USE_NVIDIA_BINDING:
-    from cuda.bindings import driver
+    from cuda.bindings.driver import CUjitInputType
 
-    jitty = driver.CUjitInputType
     FILE_EXTENSION_MAP = {
-        "o": jitty.CU_JIT_INPUT_OBJECT,
-        "ptx": jitty.CU_JIT_INPUT_PTX,
-        "a": jitty.CU_JIT_INPUT_LIBRARY,
-        "lib": jitty.CU_JIT_INPUT_LIBRARY,
-        "cubin": jitty.CU_JIT_INPUT_CUBIN,
-        "fatbin": jitty.CU_JIT_INPUT_FATBINARY,
-        "ltoir": jitty.CU_JIT_INPUT_NVVM,
+        "o": CUjitInputType.CU_JIT_INPUT_OBJECT,
+        "ptx": CUjitInputType.CU_JIT_INPUT_PTX,
+        "a": CUjitInputType.CU_JIT_INPUT_LIBRARY,
+        "lib": CUjitInputType.CU_JIT_INPUT_LIBRARY,
+        "cubin": CUjitInputType.CU_JIT_INPUT_CUBIN,
+        "fatbin": CUjitInputType.CU_JIT_INPUT_FATBINARY,
+        "ltoir": CUjitInputType.CU_JIT_INPUT_NVVM,
     }
 else:
     FILE_EXTENSION_MAP = {
diff --git a/numba_cuda/numba/cuda/cudadrv/nvrtc.py b/numba_cuda/numba/cuda/cudadrv/nvrtc.py
index b01c8c60c..2513e2a48 100644
--- a/numba_cuda/numba/cuda/cudadrv/nvrtc.py
+++ b/numba_cuda/numba/cuda/cudadrv/nvrtc.py
@@ -27,6 +27,9 @@
 # Result code
 nvrtc_result = c_int
 
+if config.CUDA_USE_NVIDIA_BINDING:
+    from cuda.core.experimental import Program, ProgramOptions
+
 
 class NvrtcResult(IntEnum):
     NVRTC_SUCCESS = 0
@@ -374,10 +377,14 @@ def compile(src, name, cc, ltoir=False):
     # - Relocatable Device Code (rdc) is needed to prevent device functions
     #   being optimized away.
     major, minor = found
-    arch = f"--gpu-architecture=compute_{major}{minor}"
+
+    if config.CUDA_USE_NVIDIA_BINDING:
+        arch = f"sm_{major}{minor}"
+    else:
+        arch = f"--gpu-architecture=compute_{major}{minor}"
 
     cuda_include = [
-        f"-I{get_cuda_paths()['include_dir'].info}",
+        f"{get_cuda_paths()['include_dir'].info}",
     ]
 
     nvrtc_version = nvrtc.get_version()
@@ -387,54 +394,83 @@ def compile(src, name, cc, ltoir=False):
     numba_cuda_path = os.path.dirname(cudadrv_path)
 
     if nvrtc_ver_major == 11:
-        numba_include = f"-I{os.path.join(numba_cuda_path, 'include', '11')}"
+        numba_include = f"{os.path.join(numba_cuda_path, 'include', '11')}"
     else:
-        numba_include = f"-I{os.path.join(numba_cuda_path, 'include', '12')}"
+        numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
 
     if config.CUDA_NVRTC_EXTRA_SEARCH_PATHS:
-        extra_search_paths = config.CUDA_NVRTC_EXTRA_SEARCH_PATHS.split(":")
-        extra_includes = [f"-I{p}" for p in extra_search_paths]
+        extra_includes = config.CUDA_NVRTC_EXTRA_SEARCH_PATHS.split(":")
     else:
         extra_includes = []
 
-    nrt_path = os.path.join(numba_cuda_path, "memory_management")
-    nrt_include = f"-I{nrt_path}"
-
-    options = [
-        arch,
-        numba_include,
-        *cuda_include,
-        nrt_include,
-        *extra_includes,
-        "-rdc",
-        "true",
-    ]
+    nrt_include = os.path.join(numba_cuda_path, "memory_management")
 
-    if ltoir:
-        options.append("-dlto")
+    includes = [numba_include, *cuda_include, nrt_include, *extra_includes]
 
-    if nvrtc_version < (12, 0):
-        options += ["-std=c++17"]
+    if config.CUDA_USE_NVIDIA_BINDING:
+        options = ProgramOptions(
+            arch=arch,
+            include_path=includes,
+            relocatable_device_code=True,
+            std="c++17" if nvrtc_version < (12, 0) else None,
+            link_time_optimization=ltoir,
+            name=name,
+        )
 
-    # Compile the program
-    compile_error = nvrtc.compile_program(program, options)
+        class Logger:
+            def __init__(self):
+                self.log = []
 
-    # Get log from compilation
-    log = nvrtc.get_compile_log(program)
+            def write(self, msg):
+                self.log.append(msg)
 
-    # If the compile failed, provide the log in an exception
-    if compile_error:
-        msg = f"NVRTC Compilation failure whilst compiling {name}:\n\n{log}"
-        raise NvrtcError(msg)
+        logger = Logger()
+        if isinstance(src, bytes):
+            src = src.decode("utf8")
 
-    # Otherwise, if there's any content in the log, present it as a warning
-    if log:
-        msg = f"NVRTC log messages whilst compiling {name}:\n\n{log}"
-        warnings.warn(msg)
+        prog = Program(src, "c++", options=options)
+        result = prog.compile("ltoir" if ltoir else "ptx", logs=logger)
+        log = ""
+        if logger.log:
+            log = logger.log
+            joined_logs = "\n".join(log)
+            warnings.warn(f"NVRTC log messages: {joined_logs}")
+        return result, log
 
-    if ltoir:
-        ltoir = nvrtc.get_lto(program)
-        return ltoir, log
     else:
-        ptx = nvrtc.get_ptx(program)
-        return ptx, log
+        includes = [f"-I{path}" for path in includes]
+        options = [
+            arch,
+            *includes,
+            "-rdc",
+            "true",
+        ]
+
+        if ltoir:
+            options.append("-dlto")
+
+        if nvrtc_version < (12, 0):
+            options.append("-std=c++17")
+
+        # Compile the program
+        compile_error = nvrtc.compile_program(program, options)
+
+        # Get log from compilation
+        log = nvrtc.get_compile_log(program)
+
+        # If the compile failed, provide the log in an exception
+        if compile_error:
+            msg = f"NVRTC Compilation failure whilst compiling {name}:\n\n{log}"
+            raise NvrtcError(msg)
+
+        # Otherwise, if there's any content in the log, present it as a warning
+        if log:
+            msg = f"NVRTC log messages whilst compiling {name}:\n\n{log}"
+            warnings.warn(msg)
+
+        if ltoir:
+            ltoir = nvrtc.get_lto(program)
+            return ltoir, log
+        else:
+            ptx = nvrtc.get_ptx(program)
+            return ptx, log
diff --git a/numba_cuda/numba/cuda/decorators.py b/numba_cuda/numba/cuda/decorators.py
index 704d7c9f1..d1b096a05 100644
--- a/numba_cuda/numba/cuda/decorators.py
+++ b/numba_cuda/numba/cuda/decorators.py
@@ -4,6 +4,7 @@
 from numba.cuda.compiler import declare_device_function
 from numba.cuda.dispatcher import CUDADispatcher
 from numba.cuda.simulator.kernel import FakeCUDAKernel
+from numba.cuda.cudadrv.driver import _have_nvjitlink
 
 
 _msg_deprecated_signature_arg = (
@@ -143,10 +144,13 @@ def jit(
 
     if lto is None:
         # Default to using LTO if pynvjitlink is available and we're not debugging
-        lto = config.CUDA_ENABLE_PYNVJITLINK and not debug
+        lto = _have_nvjitlink() and not debug
     else:
-        if lto and not config.CUDA_ENABLE_PYNVJITLINK:
-            raise RuntimeError("LTO requires pynvjitlink, which is not enabled")
+        if lto and not _have_nvjitlink():
+            raise RuntimeError(
+                "LTO requires nvjitlink, which is not available"
+                "or not sufficiently recent (>=12.3)"
+            )
 
     if sigutils.is_signature(func_or_sig):
         signatures = [func_or_sig]
diff --git a/numba_cuda/numba/cuda/dispatcher.py b/numba_cuda/numba/cuda/dispatcher.py
index f7f7f8948..08bd96098 100644
--- a/numba_cuda/numba/cuda/dispatcher.py
+++ b/numba_cuda/numba/cuda/dispatcher.py
@@ -129,6 +129,7 @@ def __init__(
             nvvm_options["g"] = None
 
         cc = get_current_device().compute_capability
+
         cres = compile_cuda(
             self.py_func,
             types.void,
diff --git a/numba_cuda/numba/cuda/memory_management/nrt.py b/numba_cuda/numba/cuda/memory_management/nrt.py
index 8edf40b2e..d6e4f53ec 100644
--- a/numba_cuda/numba/cuda/memory_management/nrt.py
+++ b/numba_cuda/numba/cuda/memory_management/nrt.py
@@ -6,7 +6,7 @@
 from numba import cuda, config
 from numba.core.runtime.nrt import _nrt_mstats
 from numba.cuda.cudadrv.driver import (
-    Linker,
+    _Linker,
     driver,
     launch_kernel,
     USE_NV_BINDING,
@@ -80,7 +80,7 @@ def _compile_memsys_module(self):
         cc = get_current_device().compute_capability
 
         # Create a new linker instance and add the cu file
-        linker = Linker.new(cc=cc)
+        linker = _Linker.new(cc=cc)
         linker.add_cu_file(memsys_mod)
 
         # Complete the linker and create a module from it
diff --git a/numba_cuda/numba/cuda/simulator/cudadrv/driver.py b/numba_cuda/numba/cuda/simulator/cudadrv/driver.py
index 924924c93..8376969f1 100644
--- a/numba_cuda/numba/cuda/simulator/cudadrv/driver.py
+++ b/numba_cuda/numba/cuda/simulator/cudadrv/driver.py
@@ -34,10 +34,10 @@ def get_device_count(self):
 driver = FakeDriver()
 
 
-class Linker:
+class _Linker:
     @classmethod
     def new(cls, max_registers=0, lineinfo=False, cc=None):
-        return Linker()
+        return _Linker()
 
     @property
     def lto(self):
@@ -67,3 +67,7 @@ def launch_kernel(*args, **kwargs):
 
 if config.ENABLE_CUDASIM:
     config.CUDA_ENABLE_PYNVJITLINK = False
+
+
+def _have_nvjitlink():
+    return False
diff --git a/numba_cuda/numba/cuda/testing.py b/numba_cuda/numba/cuda/testing.py
index d0bb25b68..745514a9f 100644
--- a/numba_cuda/numba/cuda/testing.py
+++ b/numba_cuda/numba/cuda/testing.py
@@ -39,7 +39,7 @@ def skip_if_lto(self, reason):
         # Some linkers need the compute capability to be specified, so we
         # always specify it here.
         cc = devices.get_context().device.compute_capability
-        linker = driver.Linker.new(cc=cc)
+        linker = driver._Linker.new(cc=cc)
         if linker.lto:
             self.skipTest(reason)
 
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py b/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py
index 6b16216f4..ed503dab6 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py
@@ -1,14 +1,14 @@
 import numpy as np
 import warnings
+from numba import config
 from numba.cuda.testing import unittest
 from numba.cuda.testing import skip_on_cudasim, skip_if_cuda_includes_missing
 from numba.cuda.testing import CUDATestCase, test_data_dir
-from numba.cuda.cudadrv.driver import CudaAPIError, Linker, LinkerError
-from numba.cuda.cudadrv.error import NvrtcError
+from numba.cuda.cudadrv.driver import CudaAPIError, _Linker, LinkerError
 from numba.cuda import require_context
 from numba.tests.support import ignore_internal_warnings
 from numba import cuda, void, float64, int64, int32, typeof, float32
-
+from numba.cuda.cudadrv.error import NvrtcError
 
 CONST1D = np.arange(10, dtype=np.float64)
 
@@ -107,7 +107,7 @@ class TestLinker(CUDATestCase):
     @require_context
     def test_linker_basic(self):
         """Simply go through the constructor and destructor"""
-        linker = Linker.new(cc=(5, 3))
+        linker = _Linker.new(cc=(7, 5))
         del linker
 
     def _test_linking(self, eager):
@@ -183,7 +183,13 @@ def test_linking_cu_error(self):
 
         link = str(test_data_dir / "error.cu")
 
-        with self.assertRaises(NvrtcError) as e:
+        if config.CUDA_USE_NVIDIA_BINDING:
+            from cuda.core.experimental._utils.cuda_utils import NVRTCError
+
+            errty = NVRTCError
+        else:
+            errty = NvrtcError
+        with self.assertRaises(errty) as e:
 
             @cuda.jit("void(int32)", link=[link])
             def kernel(x):
@@ -191,7 +197,12 @@ def kernel(x):
 
         msg = e.exception.args[0]
         # Check the error message refers to the NVRTC compile
-        self.assertIn("NVRTC Compilation failure", msg)
+        nvrtc_err_str = (
+            "NVRTC_ERROR_COMPILATION"
+            if config.CUDA_USE_NVIDIA_BINDING
+            else "NVRTC Compilation failure"
+        )
+        self.assertIn(nvrtc_err_str, msg)
         # Check the expected error in the CUDA source is reported
         self.assertIn('identifier "SYNTAX" is undefined', msg)
         # Check the filename is reported correctly
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py b/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
index 8dca18901..97a580187 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
@@ -1,22 +1,12 @@
 from numba.cuda.testing import unittest
 from numba.cuda.testing import skip_on_cudasim
 from numba.cuda.testing import CUDATestCase
-from numba.cuda.cudadrv.driver import PyNvJitLinker
 from numba.cuda import get_current_device
+from numba.cuda.cudadrv.driver import _Linker, _have_nvjitlink
 
 from numba import cuda
 from numba import config
-from numba.tests.support import run_in_subprocess, override_config
 
-try:
-    import pynvjitlink  # noqa: F401
-
-    PYNVJITLINK_INSTALLED = True
-except ImportError:
-    PYNVJITLINK_INSTALLED = False
-
-
-import itertools
 import os
 import io
 import contextlib
@@ -52,85 +42,13 @@
 
 
 @unittest.skipIf(
-    not config.CUDA_ENABLE_PYNVJITLINK or not TEST_BIN_DIR,
-    "pynvjitlink not enabled",
+    not config.CUDA_USE_NVIDIA_BINDING
+    or not TEST_BIN_DIR
+    or not _have_nvjitlink(),
+    "NVIDIA cuda bindings not enabled or nvJitLink not installed or new enough (>12.3)",
 )
 @skip_on_cudasim("Linking unsupported in the simulator")
 class TestLinker(CUDATestCase):
-    def test_nvjitlink_create(self):
-        patched_linker = PyNvJitLinker(cc=(7, 5))
-        assert "-arch=sm_75" in patched_linker.options
-
-    def test_nvjitlink_create_no_cc_error(self):
-        # nvJitLink expects at least the architecture to be specified.
-        with self.assertRaisesRegex(
-            RuntimeError, "PyNvJitLinker requires CC to be specified"
-        ):
-            PyNvJitLinker()
-
-    def test_nvjitlink_invalid_arch_error(self):
-        from pynvjitlink.api import NvJitLinkError
-
-        # CC 0.0 is not a valid compute capability
-        with self.assertRaisesRegex(
-            NvJitLinkError, "NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"
-        ):
-            PyNvJitLinker(cc=(0, 0))
-
-    def test_nvjitlink_invalid_cc_type_error(self):
-        with self.assertRaisesRegex(
-            TypeError, "`cc` must be a list or tuple of length 2"
-        ):
-            PyNvJitLinker(cc=0)
-
-    def test_nvjitlink_ptx_compile_options(self):
-        max_registers = (None, 32)
-        lineinfo = (False, True)
-        lto = (False, True)
-        additional_flags = (None, ("-g",), ("-g", "-time"))
-        for (
-            max_registers_i,
-            line_info_i,
-            lto_i,
-            additional_flags_i,
-        ) in itertools.product(max_registers, lineinfo, lto, additional_flags):
-            with self.subTest(
-                max_registers=max_registers_i,
-                lineinfo=line_info_i,
-                lto=lto_i,
-                additional_flags=additional_flags_i,
-            ):
-                patched_linker = PyNvJitLinker(
-                    cc=(7, 5),
-                    max_registers=max_registers_i,
-                    lineinfo=line_info_i,
-                    lto=lto_i,
-                    additional_flags=additional_flags_i,
-                )
-                assert "-arch=sm_75" in patched_linker.options
-
-                if max_registers_i:
-                    assert (
-                        f"-maxrregcount={max_registers_i}"
-                        in patched_linker.options
-                    )
-                else:
-                    assert "-maxrregcount" not in patched_linker.options
-
-                if line_info_i:
-                    assert "-lineinfo" in patched_linker.options
-                else:
-                    assert "-lineinfo" not in patched_linker.options
-
-                if lto_i:
-                    assert "-lto" in patched_linker.options
-                else:
-                    assert "-lto" not in patched_linker.options
-
-                if additional_flags_i:
-                    for flag in additional_flags_i:
-                        assert flag in patched_linker.options
-
     def test_nvjitlink_add_file_guess_ext_linkable_code(self):
         files = (
             test_device_functions_a,
@@ -142,24 +60,20 @@ def test_nvjitlink_add_file_guess_ext_linkable_code(self):
         )
         for file in files:
             with self.subTest(file=file):
-                patched_linker = PyNvJitLinker(
-                    cc=get_current_device().compute_capability
-                )
-                patched_linker.add_file_guess_ext(file)
+                linker = _Linker(cc=get_current_device().compute_capability)
+                linker.add_file_guess_ext(file)
 
     def test_nvjitlink_test_add_file_guess_ext_invalid_input(self):
         with open(test_device_functions_cubin, "rb") as f:
             content = f.read()
 
-        patched_linker = PyNvJitLinker(
-            cc=get_current_device().compute_capability
-        )
+        linker = _Linker(cc=get_current_device().compute_capability)
         with self.assertRaisesRegex(
             TypeError, "Expected path to file or a LinkableCode"
         ):
             # Feeding raw data as bytes to add_file_guess_ext should raise,
             # because there's no way to know what kind of file to treat it as
-            patched_linker.add_file_guess_ext(content)
+            linker.add_file_guess_ext(content)
 
     def test_nvjitlink_jit_with_linkable_code(self):
         files = (
@@ -261,77 +175,5 @@ def kernel():
                 pass
 
 
-@unittest.skipIf(
-    not PYNVJITLINK_INSTALLED or not TEST_BIN_DIR,
-    reason="pynvjitlink not enabled",
-)
-@skip_on_cudasim("Linking unsupported in the simulator")
-class TestLinkerUsage(CUDATestCase):
-    """Test that whether pynvjitlink can be enabled by both environment variable
-    and modification of config at runtime.
-    """
-
-    src = """if 1:
-        import os
-        from numba import cuda, config
-
-        {config}
-
-        TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
-        if TEST_BIN_DIR:
-            test_device_functions_cubin = os.path.join(
-                TEST_BIN_DIR, "test_device_functions.cubin"
-            )
-
-        sig = "uint32(uint32, uint32)"
-        add_from_numba = cuda.declare_device("add_from_numba", sig)
-
-        @cuda.jit(link=[test_device_functions_cubin], lto=True)
-        def kernel(result):
-            result[0] = add_from_numba(1, 2)
-
-        result = cuda.device_array(1)
-        kernel[1, 1](result)
-        assert result[0] == 3
-        """
-
-    def test_linker_enabled_envvar(self):
-        env = os.environ.copy()
-        env.pop("NUMBA_CUDA_ENABLE_PYNVJITLINK", None)
-        run_in_subprocess(self.src.format(config=""), env=env)
-
-    def test_linker_disabled_envvar(self):
-        env = os.environ.copy()
-        env["NUMBA_CUDA_ENABLE_PYNVJITLINK"] = "0"
-        with self.assertRaisesRegex(
-            AssertionError, "LTO requires pynvjitlink, which is not enabled"
-        ):
-            # Actual error raised is `ValueError`, but `run_in_subprocess`
-            # reraises as AssertionError.
-            run_in_subprocess(self.src.format(config=""), env=env)
-
-    def test_linker_enabled_config(self):
-        env = os.environ.copy()
-        env.pop("NUMBA_CUDA_ENABLE_PYNVJITLINK", None)
-        run_in_subprocess(
-            self.src.format(config="config.CUDA_ENABLE_PYNVJITLINK = True"),
-            env=env,
-        )
-
-    def test_linker_disabled_config(self):
-        env = os.environ.copy()
-        env.pop("NUMBA_CUDA_ENABLE_PYNVJITLINK", None)
-        with override_config("CUDA_ENABLE_PYNVJITLINK", False):
-            with self.assertRaisesRegex(
-                AssertionError, "LTO requires pynvjitlink, which is not enabled"
-            ):
-                run_in_subprocess(
-                    self.src.format(
-                        config="config.CUDA_ENABLE_PYNVJITLINK = False"
-                    ),
-                    env=env,
-                )
-
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_errors.py b/numba_cuda/numba/cuda/tests/cudapy/test_errors.py
index c663da0a6..b1f6a1835 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_errors.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_errors.py
@@ -1,7 +1,7 @@
 from numba import cuda
 from numba.core.errors import TypingError
 from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
-from numba.tests.support import override_config
+from numba import config
 
 
 def noop(x):
@@ -91,15 +91,17 @@ def kernel_func():
         self.assertIn("NameError: name 'floor' is not defined", excstr)
 
     @skip_on_cudasim("Simulator does not use pynvjitlink")
-    def test_lto_without_pynvjitlink_error(self):
-        with self.assertRaisesRegex(RuntimeError, "LTO requires pynvjitlink"):
-            with override_config("CUDA_ENABLE_PYNVJITLINK", False):
+    @unittest.skipIf(
+        config.CUDA_USE_NVIDIA_BINDING, "NVIDIA cuda bindings enabled"
+    )
+    def test_lto_without_nvjitlink_error(self):
+        with self.assertRaisesRegex(RuntimeError, "LTO requires nvjitlink"):
 
-                @cuda.jit(lto=True)
-                def f():
-                    pass
+            @cuda.jit(lto=True)
+            def f():
+                pass
 
-                f[1, 1]()
+            f[1, 1]()
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_extending.py b/numba_cuda/numba/cuda/tests/cudapy/test_extending.py
index 3dc24b60b..fdfb54464 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_extending.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_extending.py
@@ -1,4 +1,5 @@
 from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
+from numba.cuda.cudadrv.driver import _have_nvjitlink
 from llvmlite import ir
 
 import numpy as np
@@ -210,7 +211,7 @@ def test_extension_adds_linkable_code(self):
             (test_device_functions_ltoir, cuda.LTOIR),
         )
 
-        lto = config.CUDA_ENABLE_PYNVJITLINK
+        lto = _have_nvjitlink()
 
         for path, ctor in files:
             if ctor == cuda.LTOIR and not lto:
diff --git a/numba_cuda/numba/cuda/tests/nrt/test_nrt.py b/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
index 81591b58e..2e14549cc 100644
--- a/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
+++ b/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
@@ -169,7 +169,16 @@ def test_nrt_detect_linked_ptx_file(self):
         cc = get_current_device().compute_capability
         ptx, _ = compile(src, "external_nrt.cu", cc)
 
-        @cuda.jit(link=[PTXSource(ptx.encode(), nrt=True)])
+        @cuda.jit(
+            link=[
+                PTXSource(
+                    ptx.code
+                    if config.CUDA_USE_NVIDIA_BINDING
+                    else ptx.encode(),
+                    nrt=True,
+                )
+            ]
+        )
         def kernel():
             allocate_deallocate_handle()
 
diff --git a/pyproject.toml b/pyproject.toml
index 51cca1cba..476be59ef 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,7 @@ dependencies = ["numba>=0.59.1"]
 [project.optional-dependencies]
 cu11 = [
     "cuda-bindings==11.8.*",
+    "cuda-core==0.3.*",
     "cuda-python==11.8.*",  # supports all CTK 11.x
     "nvidia-cuda-nvcc-cu11",  # for libNVVM
     "nvidia-cuda-runtime-cu11",
@@ -30,10 +31,12 @@ cu11 = [
 ]
 cu12 = [
     "cuda-bindings==12.9.*",
+    "cuda-core==0.3.*",
     "cuda-python==12.9.*",  # supports all CTK 12.x
     "nvidia-cuda-nvcc-cu12",  # for libNVVM
     "nvidia-cuda-runtime-cu12",
     "nvidia-cuda-nvrtc-cu12",
+    "nvidia-nvjitlink-cu12"
 ]
 test = [
     "psutil",