diff --git a/docs/source/reference/host.rst b/docs/source/reference/host.rst
index c472c5ed6..acf883464 100644
--- a/docs/source/reference/host.rst
+++ b/docs/source/reference/host.rst
@@ -229,20 +229,3 @@ stream, and the stream must remain valid whilst the Numba ``Stream`` object is
 in use.
 
 .. autofunction:: numba.cuda.external_stream
-
-
-Runtime
--------
-
-Numba generally uses the Driver API, but it provides a simple wrapper to the
-Runtime API so that the version of the runtime in use can be queried. This is
-accessed through ``cuda.runtime``, which is an instance of the
-:class:`numba.cuda.cudadrv.runtime.Runtime` class:
-
-.. autoclass:: numba.cuda.cudadrv.runtime.Runtime
-   :members: get_version, is_supported_version, supported_versions
-
-Whether the current runtime is officially supported and tested with the current
-version of Numba can also be queried:
-
-.. autofunction:: numba.cuda.is_supported_version
diff --git a/numba_cuda/numba/cuda/__init__.py b/numba_cuda/numba/cuda/__init__.py
index e944fe0bf..af5f95595 100644
--- a/numba_cuda/numba/cuda/__init__.py
+++ b/numba_cuda/numba/cuda/__init__.py
@@ -83,6 +83,19 @@
 implementation = "NVIDIA"
 
 
+# The default compute capability as set by the upstream Numba implementation.
+config_default_cc = config.CUDA_DEFAULT_PTX_CC
+
+# The default compute capability for Numba-CUDA. This will usually override the
+# upstream Numba built-in default of 5.0, unless the user has set it even
+# higher, in which case we should use the user-specified value. This default is
+# aligned with recent toolkit versions.
+numba_cuda_default_ptx_cc = (7, 5)
+
+if numba_cuda_default_ptx_cc > config_default_cc:
+    config.CUDA_DEFAULT_PTX_CC = numba_cuda_default_ptx_cc
+
+
 def test(*args, **kwargs):
     if not is_available():
         raise cuda_error()
diff --git a/numba_cuda/numba/cuda/codegen.py b/numba_cuda/numba/cuda/codegen.py
index 0b269ed52..d2ec579ad 100644
--- a/numba_cuda/numba/cuda/codegen.py
+++ b/numba_cuda/numba/cuda/codegen.py
@@ -2,7 +2,7 @@
 
 from numba.core import config, serialize
 from numba.core.codegen import Codegen, CodeLibrary
-from .cudadrv import devices, driver, nvvm, runtime
+from .cudadrv import devices, driver, nvrtc, nvvm, runtime
 from numba.cuda.cudadrv.libs import get_cudalib
 from numba.cuda.cudadrv.linkable_code import LinkableCode
 from numba.cuda.memory_management.nrt import NRT_LIBRARY
@@ -211,7 +211,7 @@ def get_asm_str(self, cc=None):
         if ptxes:
             return ptxes
 
-        arch = nvvm.get_arch_option(*cc)
+        arch = nvrtc.get_arch_option(*cc)
         options = self._nvvm_options.copy()
         options["arch"] = arch
 
@@ -240,7 +240,7 @@ def get_ltoir(self, cc=None):
         if ltoir is not None:
             return ltoir
 
-        arch = nvvm.get_arch_option(*cc)
+        arch = nvrtc.get_arch_option(*cc)
         options = self._nvvm_options.copy()
         options["arch"] = arch
         options["gen-lto"] = None
diff --git a/numba_cuda/numba/cuda/compiler.py b/numba_cuda/numba/cuda/compiler.py
index 555b4c558..b58332c5a 100644
--- a/numba_cuda/numba/cuda/compiler.py
+++ b/numba_cuda/numba/cuda/compiler.py
@@ -35,7 +35,7 @@
 from numba.cuda import nvvmutils
 from numba.cuda.api import get_current_device
 from numba.cuda.codegen import ExternalCodeLibrary
-from numba.cuda.cudadrv import nvvm
+from numba.cuda.cudadrv import nvvm, nvrtc
 from numba.cuda.descriptor import cuda_target
 from numba.cuda.flags import CUDAFlags
 from numba.cuda.target import CUDACABICallConv
@@ -640,7 +640,7 @@ def compile(
     # If the user has used the config variable to specify a non-default that is
     # greater than the lowest non-deprecated one, then we should default to
     # their specified CC instead of the lowest non-deprecated one.
-    MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, nvvm.LOWEST_CURRENT_CC)
+    MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, nvrtc.get_lowest_supported_cc())
     cc = cc or MIN_CC
 
     cres = compile_cuda(
diff --git a/numba_cuda/numba/cuda/cuda_paths.py b/numba_cuda/numba/cuda/cuda_paths.py
index 9bcccf9ce..66db135d0 100644
--- a/numba_cuda/numba/cuda/cuda_paths.py
+++ b/numba_cuda/numba/cuda/cuda_paths.py
@@ -132,16 +132,9 @@ def _get_nvvm_wheel():
     return None
 
 
-def get_major_cuda_version():
-    # TODO: remove once cuda-python is
-    # a hard dependency
-    from numba.cuda.cudadrv.runtime import get_version
-
-    return get_version()[0]
-
-
 def get_nvrtc_dso_path():
     site_paths = [site.getusersitepackages()] + site.getsitepackages()
+
     for sp in site_paths:
         lib_dir = os.path.join(
             sp,
@@ -150,23 +143,28 @@ def get_nvrtc_dso_path():
             ("bin" if IS_WIN32 else "lib") if sp else None,
         )
         if lib_dir and os.path.exists(lib_dir):
-            try:
-                major = get_major_cuda_version()
-                if major == 11:
-                    cu_ver = "112" if IS_WIN32 else "11.2"
-                elif major == 12:
-                    cu_ver = "120" if IS_WIN32 else "12"
-                else:
-                    raise NotImplementedError(f"CUDA {major} is not supported")
-
-                return os.path.join(
+            chosen_path = None
+
+            # Check for each version of the NVRTC DLL, preferring the most
+            # recent.
+            versions = (
+                "112" if IS_WIN32 else "11.2",
+                "120" if IS_WIN32 else "12",
+                "130" if IS_WIN32 else "13",
+            )
+
+            for version in versions:
+                dso_path = os.path.join(
                     lib_dir,
-                    f"nvrtc64_{cu_ver}_0.dll"
+                    f"nvrtc64_{version}_0.dll"
                     if IS_WIN32
-                    else f"libnvrtc.so.{cu_ver}",
+                    else f"libnvrtc.so.{version}",
                 )
-            except RuntimeError:
-                continue
+
+                if os.path.exists(dso_path) and os.path.isfile(dso_path):
+                    chosen_path = dso_path
+
+            return chosen_path
 
 
 def _get_nvrtc_wheel():
diff --git a/numba_cuda/numba/cuda/cudadrv/error.py b/numba_cuda/numba/cuda/cudadrv/error.py
index f395f0721..b40f0abdb 100644
--- a/numba_cuda/numba/cuda/cudadrv/error.py
+++ b/numba_cuda/numba/cuda/cudadrv/error.py
@@ -38,3 +38,7 @@ class NvrtcBuiltinOperationFailure(NvrtcError):
 
 class NvrtcSupportError(ImportError):
     pass
+
+
+class CCSupportError(RuntimeError):
+    pass
diff --git a/numba_cuda/numba/cuda/cudadrv/libs.py b/numba_cuda/numba/cuda/cudadrv/libs.py
index ce4d99113..b3105942a 100644
--- a/numba_cuda/numba/cuda/cudadrv/libs.py
+++ b/numba_cuda/numba/cuda/cudadrv/libs.py
@@ -154,7 +154,7 @@ def test():
                 print(f"\t\t{location}")
 
     # Checks for dynamic libraries
-    libs = "nvvm nvrtc cudart".split()
+    libs = "nvvm nvrtc".split()
     for lib in libs:
         path = get_cudalib(lib)
         print("Finding {} from {}".format(lib, _get_source_variable(lib)))
diff --git a/numba_cuda/numba/cuda/cudadrv/nvrtc.py b/numba_cuda/numba/cuda/cudadrv/nvrtc.py
index 2513e2a48..0c4074a73 100644
--- a/numba_cuda/numba/cuda/cudadrv/nvrtc.py
+++ b/numba_cuda/numba/cuda/cudadrv/nvrtc.py
@@ -1,6 +1,7 @@
 from ctypes import byref, c_char, c_char_p, c_int, c_size_t, c_void_p, POINTER
 from enum import IntEnum
 from numba.cuda.cudadrv.error import (
+    CCSupportError,
     NvrtcError,
     NvrtcBuiltinOperationFailure,
     NvrtcCompilationError,
@@ -79,20 +80,6 @@ class NVRTC:
     (for Numba) open_cudalib function to load the NVRTC library.
     """
 
-    _CU11_2ONLY_PROTOTYPES = {
-        # nvrtcResult nvrtcGetNumSupportedArchs(int *numArchs);
-        "nvrtcGetNumSupportedArchs": (nvrtc_result, POINTER(c_int)),
-        # nvrtcResult nvrtcGetSupportedArchs(int *supportedArchs);
-        "nvrtcGetSupportedArchs": (nvrtc_result, POINTER(c_int)),
-    }
-
-    _CU12ONLY_PROTOTYPES = {
-        # nvrtcResult nvrtcGetLTOIRSize(nvrtcProgram prog, size_t *ltoSizeRet);
-        "nvrtcGetLTOIRSize": (nvrtc_result, nvrtc_program, POINTER(c_size_t)),
-        # nvrtcResult nvrtcGetLTOIR(nvrtcProgram prog, char *lto);
-        "nvrtcGetLTOIR": (nvrtc_result, nvrtc_program, c_char_p),
-    }
-
     _PROTOTYPES = {
         # nvrtcResult nvrtcVersion(int *major, int *minor)
         "nvrtcVersion": (nvrtc_result, POINTER(c_int), POINTER(c_int)),
@@ -140,6 +127,14 @@ class NVRTC:
         ),
         # nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
         "nvrtcGetProgramLog": (nvrtc_result, nvrtc_program, c_char_p),
+        # nvrtcResult nvrtcGetNumSupportedArchs(int *numArchs);
+        "nvrtcGetNumSupportedArchs": (nvrtc_result, POINTER(c_int)),
+        # nvrtcResult nvrtcGetSupportedArchs(int *supportedArchs);
+        "nvrtcGetSupportedArchs": (nvrtc_result, POINTER(c_int)),
+        # nvrtcResult nvrtcGetLTOIRSize(nvrtcProgram prog, size_t *ltoSizeRet);
+        "nvrtcGetLTOIRSize": (nvrtc_result, nvrtc_program, POINTER(c_size_t)),
+        # nvrtcResult nvrtcGetLTOIR(nvrtcProgram prog, char *lto);
+        "nvrtcGetLTOIR": (nvrtc_result, nvrtc_program, c_char_p),
     }
 
     # Singleton reference
@@ -157,18 +152,18 @@ def __new__(cls):
                     cls.__INSTANCE = None
                     raise NvrtcSupportError("NVRTC cannot be loaded") from e
 
-                from numba.cuda.cudadrv.runtime import get_version
-
-                if get_version() >= (11, 2):
-                    inst._PROTOTYPES |= inst._CU11_2ONLY_PROTOTYPES
-                if get_version() >= (12, 0):
-                    inst._PROTOTYPES |= inst._CU12ONLY_PROTOTYPES
-
                 # Find & populate functions
                 for name, proto in inst._PROTOTYPES.items():
-                    func = getattr(lib, name)
-                    func.restype = proto[0]
-                    func.argtypes = proto[1:]
+                    try:
+                        func = getattr(lib, name)
+                        func.restype = proto[0]
+                        func.argtypes = proto[1:]
+                    except AttributeError:
+                        if "LTOIR" in name:
+                            # CUDA 11 does not have LTOIR functions; ignore
+                            continue
+                        else:
+                            raise
 
                     @functools.wraps(func)
                     def checked_call(*args, func=func, name=name):
@@ -195,52 +190,16 @@ def checked_call(*args, func=func, name=name):
 
         return cls.__INSTANCE
 
+    @functools.cache
     def get_supported_archs(self):
         """
         Get Supported Architectures by NVRTC as list of arch tuples.
         """
-        ver = self.get_version()
-        if ver < (11, 0):
-            raise RuntimeError(
-                "Unsupported CUDA version. CUDA 11.0 or higher is required."
-            )
-        elif ver == (11, 0):
-            return [
-                (3, 0),
-                (3, 2),
-                (3, 5),
-                (3, 7),
-                (5, 0),
-                (5, 2),
-                (5, 3),
-                (6, 0),
-                (6, 1),
-                (6, 2),
-                (7, 0),
-                (7, 2),
-                (7, 5),
-            ]
-        elif ver == (11, 1):
-            return [
-                (3, 5),
-                (3, 7),
-                (5, 0),
-                (5, 2),
-                (5, 3),
-                (6, 0),
-                (6, 1),
-                (6, 2),
-                (7, 0),
-                (7, 2),
-                (7, 5),
-                (8, 0),
-            ]
-        else:
-            num = c_int()
-            self.nvrtcGetNumSupportedArchs(byref(num))
-            archs = (c_int * num.value)()
-            self.nvrtcGetSupportedArchs(archs)
-            return [(archs[i] // 10, archs[i] % 10) for i in range(num.value)]
+        num = c_int()
+        self.nvrtcGetNumSupportedArchs(byref(num))
+        archs = (c_int * num.value)()
+        self.nvrtcGetSupportedArchs(archs)
+        return [(archs[i] // 10, archs[i] % 10) for i in range(num.value)]
 
     def get_version(self):
         """
@@ -349,9 +308,9 @@ def compile(src, name, cc, ltoir=False):
 
     version = nvrtc.get_version()
     ver_str = lambda v: ".".join(v)
-    if version < (11, 0):
+    if version < (11, 2):
         raise RuntimeError(
-            "Unsupported CUDA version. CUDA 11.0 or higher is required."
+            "Unsupported CUDA version. CUDA 11.2 or higher is required."
         )
     else:
         supported_arch = nvrtc.get_supported_archs()
@@ -383,8 +342,10 @@ def compile(src, name, cc, ltoir=False):
     else:
         arch = f"--gpu-architecture=compute_{major}{minor}"
 
-    cuda_include = [
-        f"{get_cuda_paths()['include_dir'].info}",
+    cuda_include_dir = get_cuda_paths()["include_dir"].info
+    cuda_includes = [
+        f"{cuda_include_dir}",
+        f"{os.path.join(cuda_include_dir, 'cccl')}",
     ]
 
     nvrtc_version = nvrtc.get_version()
@@ -405,7 +366,7 @@ def compile(src, name, cc, ltoir=False):
 
     nrt_include = os.path.join(numba_cuda_path, "memory_management")
 
-    includes = [numba_include, *cuda_include, nrt_include, *extra_includes]
+    includes = [numba_include, *cuda_includes, nrt_include, *extra_includes]
 
     if config.CUDA_USE_NVIDIA_BINDING:
         options = ProgramOptions(
@@ -474,3 +435,51 @@ def write(self, msg):
         else:
             ptx = nvrtc.get_ptx(program)
             return ptx, log
+
+
+def find_closest_arch(mycc):
+    """
+    Given a compute capability, return the closest compute capability supported
+    by the CUDA toolkit.
+
+    :param mycc: Compute capability as a tuple ``(MAJOR, MINOR)``
+    :return: Closest supported CC as a tuple ``(MAJOR, MINOR)``
+    """
+    supported_ccs = get_supported_ccs()
+
+    for i, cc in enumerate(supported_ccs):
+        if cc == mycc:
+            # Matches
+            return cc
+        elif cc > mycc:
+            # Exceeded
+            if i == 0:
+                # CC lower than supported
+                msg = (
+                    "GPU compute capability %d.%d is not supported"
+                    "(requires >=%d.%d)" % (mycc + cc)
+                )
+                raise CCSupportError(msg)
+            else:
+                # return the previous CC
+                return supported_ccs[i - 1]
+
+    # CC higher than supported
+    return supported_ccs[-1]  # Choose the highest
+
+
+def get_arch_option(major, minor):
+    """Matches with the closest architecture option"""
+    if config.FORCE_CUDA_CC:
+        arch = config.FORCE_CUDA_CC
+    else:
+        arch = find_closest_arch((major, minor))
+    return "compute_%d%d" % arch
+
+
+def get_lowest_supported_cc():
+    return min(get_supported_ccs())
+
+
+def get_supported_ccs():
+    return NVRTC().get_supported_archs()
diff --git a/numba_cuda/numba/cuda/cudadrv/nvvm.py b/numba_cuda/numba/cuda/cudadrv/nvvm.py
index 0dde2359c..235604567 100644
--- a/numba_cuda/numba/cuda/cudadrv/nvvm.py
+++ b/numba_cuda/numba/cuda/cudadrv/nvvm.py
@@ -14,7 +14,7 @@
 
 from .error import NvvmError, NvvmSupportError, NvvmWarning
 from .libs import get_libdevice, open_libdevice, open_cudalib
-from numba.core import cgutils, config
+from numba.core import cgutils
 
 
 logger = logging.getLogger(__name__)
@@ -179,7 +179,6 @@ def __init__(self):
         self._minorIR = ir_versions[1]
         self._majorDbg = ir_versions[2]
         self._minorDbg = ir_versions[3]
-        self._supported_ccs = get_supported_ccs()
 
     @property
     def data_layout(self):
@@ -188,10 +187,6 @@ def data_layout(self):
         else:
             return _datalayout_i128
 
-    @property
-    def supported_ccs(self):
-        return self._supported_ccs
-
     def get_version(self):
         major = c_int()
         minor = c_int()
@@ -350,197 +345,6 @@ def get_log(self):
         return ""
 
 
-COMPUTE_CAPABILITIES = (
-    (3, 5),
-    (3, 7),
-    (5, 0),
-    (5, 2),
-    (5, 3),
-    (6, 0),
-    (6, 1),
-    (6, 2),
-    (7, 0),
-    (7, 2),
-    (7, 5),
-    (8, 0),
-    (8, 6),
-    (8, 7),
-    (8, 9),
-    (9, 0),
-    (10, 0),
-    (10, 1),
-    (10, 3),
-    (12, 0),
-    (12, 1),
-)
-
-
-# Maps CTK version -> (min supported cc, max supported cc) ranges, bounds inclusive
-_CUDA_CC_MIN_MAX_SUPPORT = {
-    (11, 2): [
-        ((3, 5), (8, 6)),
-    ],
-    (11, 3): [
-        ((3, 5), (8, 6)),
-    ],
-    (11, 4): [
-        ((3, 5), (8, 7)),
-    ],
-    (11, 5): [
-        ((3, 5), (8, 7)),
-    ],
-    (11, 6): [
-        ((3, 5), (8, 7)),
-    ],
-    (11, 7): [
-        ((3, 5), (8, 7)),
-    ],
-    (11, 8): [
-        ((3, 5), (9, 0)),
-    ],
-    (12, 0): [
-        ((5, 0), (9, 0)),
-    ],
-    (12, 1): [
-        ((5, 0), (9, 0)),
-    ],
-    (12, 2): [
-        ((5, 0), (9, 0)),
-    ],
-    (12, 3): [
-        ((5, 0), (9, 0)),
-    ],
-    (12, 4): [
-        ((5, 0), (9, 0)),
-    ],
-    (12, 5): [
-        ((5, 0), (9, 0)),
-    ],
-    (12, 6): [
-        ((5, 0), (9, 0)),
-    ],
-    (12, 8): [
-        ((5, 0), (10, 1)),
-        ((12, 0), (12, 0)),
-    ],
-    (12, 9): [
-        ((5, 0), (12, 1)),
-    ],
-}
-
-# From CUDA 12.9 Release notes, Section 1.5.4, "Deprecated Architectures"
-# https://docs.nvidia.com/cuda/archive/12.9.0/cuda-toolkit-release-notes/index.html#deprecated-architectures
-#
-#   "Maxwell, Pascal, and Volta architectures are now feature-complete with no
-#   further enhancements planned. While CUDA Toolkit 12.x series will continue
-#   to support building applications for these architectures, offline
-#   compilation and library support will be removed in the next major CUDA
-#   Toolkit version release. Users should plan migration to newer
-#   architectures, as future toolkits will be unable to target Maxwell, Pascal,
-#   and Volta GPUs."
-#
-# In order to maintain compatibility with future toolkits, we use Turing (7.5)
-# as the default CC if it is not otherwise specified.
-LOWEST_CURRENT_CC = (7, 5)
-
-
-def ccs_supported_by_ctk(ctk_version):
-    try:
-        # For supported versions, we look up the range of supported CCs
-        return tuple(
-            [
-                cc
-                for min_cc, max_cc in _CUDA_CC_MIN_MAX_SUPPORT[ctk_version]
-                for cc in COMPUTE_CAPABILITIES
-                if min_cc <= cc <= max_cc
-            ]
-        )
-    except KeyError:
-        # For unsupported CUDA toolkit versions, all we can do is assume all
-        # non-deprecated versions we are aware of are supported.
-        #
-        # If the user has specified a non-default CC that is greater than the
-        # lowest non-deprecated one, then we should assume that instead.
-        MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, LOWEST_CURRENT_CC)
-
-        return tuple([cc for cc in COMPUTE_CAPABILITIES if cc >= MIN_CC])
-
-
-def get_supported_ccs():
-    try:
-        from numba.cuda.cudadrv.runtime import runtime
-
-        cudart_version = runtime.get_version()
-    except:  # noqa: E722
-        # We can't support anything if there's an error getting the runtime
-        # version (e.g. if it's not present or there's another issue)
-        _supported_cc = ()
-        return _supported_cc
-
-    # Ensure the minimum CTK version requirement is met
-    min_cudart = min(_CUDA_CC_MIN_MAX_SUPPORT)
-    if cudart_version < min_cudart:
-        _supported_cc = ()
-        ctk_ver = f"{cudart_version[0]}.{cudart_version[1]}"
-        unsupported_ver = (
-            f"CUDA Toolkit {ctk_ver} is unsupported by Numba - "
-            f"{min_cudart[0]}.{min_cudart[1]} is the minimum "
-            "required version."
-        )
-        warnings.warn(unsupported_ver)
-        return _supported_cc
-
-    _supported_cc = ccs_supported_by_ctk(cudart_version)
-    return _supported_cc
-
-
-def find_closest_arch(mycc):
-    """
-    Given a compute capability, return the closest compute capability supported
-    by the CUDA toolkit.
-
-    :param mycc: Compute capability as a tuple ``(MAJOR, MINOR)``
-    :return: Closest supported CC as a tuple ``(MAJOR, MINOR)``
-    """
-    supported_ccs = NVVM().supported_ccs
-
-    if not supported_ccs:
-        msg = (
-            "No supported GPU compute capabilities found. "
-            "Please check your cudatoolkit version matches your CUDA version."
-        )
-        raise NvvmSupportError(msg)
-
-    for i, cc in enumerate(supported_ccs):
-        if cc == mycc:
-            # Matches
-            return cc
-        elif cc > mycc:
-            # Exceeded
-            if i == 0:
-                # CC lower than supported
-                msg = (
-                    "GPU compute capability %d.%d is not supported"
-                    "(requires >=%d.%d)" % (mycc + cc)
-                )
-                raise NvvmSupportError(msg)
-            else:
-                # return the previous CC
-                return supported_ccs[i - 1]
-
-    # CC higher than supported
-    return supported_ccs[-1]  # Choose the highest
-
-
-def get_arch_option(major, minor):
-    """Matches with the closest architecture option"""
-    if config.FORCE_CUDA_CC:
-        arch = config.FORCE_CUDA_CC
-    else:
-        arch = find_closest_arch((major, minor))
-    return "compute_%d%d" % arch
-
-
 MISSING_LIBDEVICE_FILE_MSG = """Missing libdevice file.
 Please ensure you have a CUDA Toolkit 11.2 or higher.
 For CUDA 12, ``cuda-nvcc`` and ``cuda-nvrtc`` are required:
diff --git a/numba_cuda/numba/cuda/cudadrv/runtime.py b/numba_cuda/numba/cuda/cudadrv/runtime.py
index d665f4db1..79c478283 100644
--- a/numba_cuda/numba/cuda/cudadrv/runtime.py
+++ b/numba_cuda/numba/cuda/cudadrv/runtime.py
@@ -1,147 +1,16 @@
 """
-CUDA Runtime wrapper.
+Former CUDA Runtime wrapper.
 
-This provides a very minimal set of bindings, since the Runtime API is not
-really used in Numba except for querying the Runtime version.
+The toolkit version can now be obtained from NVRTC, so we don't use a binding
+to the runtime anymore. This file is provided to maintain the existing API.
 """
 
-import ctypes
-import functools
-import sys
-
-from numba.core import config
-from numba.cuda.cudadrv.driver import ERROR_MAP, make_logger
-from numba.cuda.cudadrv.error import CudaSupportError, CudaRuntimeError
-from numba.cuda.cudadrv.libs import open_cudalib
-from numba.cuda.cudadrv.rtapi import API_PROTOTYPES
-from numba.cuda.cudadrv import enums
-
-
-class CudaRuntimeAPIError(CudaRuntimeError):
-    """
-    Raised when there is an error accessing a C API from the CUDA Runtime.
-    """
-
-    def __init__(self, code, msg):
-        self.code = code
-        self.msg = msg
-        super().__init__(code, msg)
-
-    def __str__(self):
-        return "[%s] %s" % (self.code, self.msg)
+from numba.cuda.cudadrv.nvrtc import NVRTC
 
 
 class Runtime:
-    """
-    Runtime object that lazily binds runtime API functions.
-    """
-
-    def __init__(self):
-        self.is_initialized = False
-
-    def _initialize(self):
-        # lazily initialize logger
-        global _logger
-        _logger = make_logger()
-
-        if config.DISABLE_CUDA:
-            msg = (
-                "CUDA is disabled due to setting NUMBA_DISABLE_CUDA=1 "
-                "in the environment, or because CUDA is unsupported on "
-                "32-bit systems."
-            )
-            raise CudaSupportError(msg)
-        self.lib = open_cudalib("cudart")
-
-        self.is_initialized = True
-
-    def __getattr__(self, fname):
-        # First request of a runtime API function
-        try:
-            proto = API_PROTOTYPES[fname]
-        except KeyError:
-            raise AttributeError(fname)
-        restype = proto[0]
-        argtypes = proto[1:]
-
-        if not self.is_initialized:
-            self._initialize()
-
-        # Find function in runtime library
-        libfn = self._find_api(fname)
-        libfn.restype = restype
-        libfn.argtypes = argtypes
-
-        safe_call = self._wrap_api_call(fname, libfn)
-        setattr(self, fname, safe_call)
-        return safe_call
-
-    def _wrap_api_call(self, fname, libfn):
-        @functools.wraps(libfn)
-        def safe_cuda_api_call(*args):
-            _logger.debug("call runtime api: %s", libfn.__name__)
-            retcode = libfn(*args)
-            self._check_error(fname, retcode)
-
-        return safe_cuda_api_call
-
-    def _check_error(self, fname, retcode):
-        if retcode != enums.CUDA_SUCCESS:
-            errname = ERROR_MAP.get(retcode, "cudaErrorUnknown")
-            msg = "Call to %s results in %s" % (fname, errname)
-            _logger.error(msg)
-            raise CudaRuntimeAPIError(retcode, msg)
-
-    def _find_api(self, fname):
-        try:
-            return getattr(self.lib, fname)
-        except AttributeError:
-            pass
-
-        # Not found.
-        # Delay missing function error to use
-        def absent_function(*args, **kws):
-            msg = "runtime missing function: %s."
-            raise CudaRuntimeError(msg % fname)
-
-        setattr(self, fname, absent_function)
-        return absent_function
-
     def get_version(self):
-        """
-        Returns the CUDA Runtime version as a tuple (major, minor).
-        """
-        rtver = ctypes.c_int()
-        self.cudaRuntimeGetVersion(ctypes.byref(rtver))
-        # The version is encoded as (1000 * major) + (10 * minor)
-        major = rtver.value // 1000
-        minor = (rtver.value - (major * 1000)) // 10
-        return (major, minor)
-
-    def is_supported_version(self):
-        """
-        Returns True if the CUDA Runtime is a supported version.
-        """
-
-        return self.get_version() in self.supported_versions
-
-    @property
-    def supported_versions(self):
-        """A tuple of all supported CUDA toolkit versions. Versions are given in
-        the form ``(major_version, minor_version)``."""
-        if sys.platform not in ("linux", "win32") or config.MACHINE_BITS != 64:
-            # Only 64-bit Linux and Windows are supported
-            return ()
-        return (
-            (11, 0),
-            (11, 1),
-            (11, 2),
-            (11, 3),
-            (11, 4),
-            (11, 5),
-            (11, 6),
-            (11, 7),
-        )
+        return NVRTC().get_version()
 
 
 runtime = Runtime()
diff --git a/numba_cuda/numba/cuda/testing.py b/numba_cuda/numba/cuda/testing.py
index 7e3cd6f85..217f7d370 100644
--- a/numba_cuda/numba/cuda/testing.py
+++ b/numba_cuda/numba/cuda/testing.py
@@ -51,20 +51,6 @@ def tearDown(self):
         reset()
 
 
-def ensure_supported_ccs_initialized():
-    from numba.cuda import is_available as cuda_is_available
-    from numba.cuda.cudadrv import nvvm
-
-    if cuda_is_available():
-        # Ensure that cudart.so is loaded and the list of supported compute
-        # capabilities in the nvvm module is populated before a fork. This is
-        # needed because some compilation tests don't require a CUDA context,
-        # but do use NVVM, and it is required that libcudart.so should be
-        # loaded before a fork (note that the requirement is not explicitly
-        # documented).
-        nvvm.get_supported_ccs()
-
-
 def skip_on_cudasim(reason):
     """Skip this test if running on the CUDA simulator"""
     return unittest.skipIf(config.ENABLE_CUDASIM, reason)
diff --git a/numba_cuda/numba/cuda/tests/__init__.py b/numba_cuda/numba/cuda/tests/__init__.py
index d04d546ed..59aae1cee 100644
--- a/numba_cuda/numba/cuda/tests/__init__.py
+++ b/numba_cuda/numba/cuda/tests/__init__.py
@@ -1,5 +1,4 @@
 from fnmatch import fnmatch
-from numba.cuda.testing import ensure_supported_ccs_initialized
 from numba.testing import unittest
 from numba import cuda
 from os.path import dirname, isfile, join, normpath, relpath, splitext
@@ -42,7 +41,6 @@ def load_testsuite(loader, dir):
 def load_tests(loader, tests, pattern):
     suite = unittest.TestSuite()
     this_dir = dirname(__file__)
-    ensure_supported_ccs_initialized()
     suite.addTests(load_testsuite(loader, join(this_dir, "nocuda")))
     if cuda.is_available():
         suite.addTests(load_testsuite(loader, join(this_dir, "cudasim")))
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/__init__.py b/numba_cuda/numba/cuda/tests/cudadrv/__init__.py
index d8eb827b8..e0df12c1b 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/__init__.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/__init__.py
@@ -1,8 +1,6 @@
-from numba.cuda.testing import ensure_supported_ccs_initialized
 from numba.cuda.tests import load_testsuite
 import os
 
 
 def load_tests(loader, tests, pattern):
-    ensure_supported_ccs_initialized()
     return load_testsuite(loader, os.path.dirname(__file__))
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py b/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
index 049804e7a..ac1b36a07 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
@@ -109,7 +109,21 @@ def test_attached_non_primary(self):
         if driver.USE_NV_BINDING:
             flags = 0
             dev = driver.binding.CUdevice(0)
-            hctx = the_driver.cuCtxCreate(flags, dev)
+
+            result, version = driver.binding.cuDriverGetVersion()
+            self.assertEqual(
+                result,
+                driver.binding.CUresult.CUDA_SUCCESS,
+                "Error getting CUDA driver version",
+            )
+
+            # CUDA 13's cuCtxCreate has an optional parameter prepended
+            if version >= 13000:
+                args = (None, flags, dev)
+            else:
+                args = (flags, dev)
+
+            hctx = the_driver.cuCtxCreate(*args)
         else:
             hctx = driver.drvapi.cu_context()
             the_driver.cuCtxCreate(byref(hctx), 0, 0)
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py b/numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py
new file mode 100644
index 000000000..c605a6e64
--- /dev/null
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py
@@ -0,0 +1,27 @@
+from numba.cuda.cudadrv import nvrtc
+from numba.cuda.testing import skip_on_cudasim
+
+import unittest
+
+
+@skip_on_cudasim("NVVM Driver unsupported in the simulator")
+class TestArchOption(unittest.TestCase):
+    def test_get_arch_option(self):
+        # Test returning the nearest lowest arch.
+        self.assertEqual(nvrtc.get_arch_option(7, 5), "compute_75")
+        self.assertEqual(nvrtc.get_arch_option(7, 7), "compute_75")
+        self.assertEqual(nvrtc.get_arch_option(8, 5), "compute_80")
+        self.assertEqual(nvrtc.get_arch_option(9, 1), "compute_90")
+        # Test known arch.
+        supported_cc = nvrtc.NVRTC().get_supported_archs()
+        for arch in supported_cc:
+            self.assertEqual(
+                nvrtc.get_arch_option(*arch), "compute_%d%d" % arch
+            )
+        self.assertEqual(
+            nvrtc.get_arch_option(1000, 0), "compute_%d%d" % supported_cc[-1]
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py b/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py
index ca027ccef..779967d43 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py
@@ -1,7 +1,7 @@
 import warnings
 
 from llvmlite import ir
-from numba.cuda.cudadrv import nvvm, runtime
+from numba.cuda.cudadrv import nvrtc, nvvm, runtime
 from numba.cuda.testing import unittest
 from numba.cuda.cudadrv.nvvm import LibDevice, NvvmError, NVVM
 from numba.cuda.testing import skip_on_cudasim
@@ -30,7 +30,7 @@ def test_nvvm_compile_nullary_option(self):
             self.skipTest("-gen-lto unavailable in this toolkit version")
 
         nvvmir = self.get_nvvmir()
-        arch = "compute_%d%d" % nvvm.LOWEST_CURRENT_CC
+        arch = "compute_%d%d" % nvrtc.get_lowest_supported_cc()
         ltoir = nvvm.compile_ir(nvvmir, opt=3, gen_lto=None, arch=arch)
 
         # Verify we correctly passed the option by checking if we got LTOIR
@@ -110,7 +110,7 @@ def _test_nvvm_support(self, arch):
 
     def test_nvvm_support(self):
         """Test supported CC by NVVM"""
-        for arch in nvvm.get_supported_ccs():
+        for arch in nvrtc.get_supported_ccs():
             self._test_nvvm_support(arch=arch)
 
     def test_nvvm_warning(self):
@@ -135,22 +135,6 @@ def test_nvvm_warning(self):
         self.assertIn("overriding noinline attribute", str(w[0]))
 
 
-@skip_on_cudasim("NVVM Driver unsupported in the simulator")
-class TestArchOption(unittest.TestCase):
-    def test_get_arch_option(self):
-        # Test returning the nearest lowest arch.
-        self.assertEqual(nvvm.get_arch_option(7, 5), "compute_75")
-        self.assertEqual(nvvm.get_arch_option(7, 7), "compute_75")
-        self.assertEqual(nvvm.get_arch_option(8, 8), "compute_87")
-        # Test known arch.
-        supported_cc = nvvm.get_supported_ccs()
-        for arch in supported_cc:
-            self.assertEqual(nvvm.get_arch_option(*arch), "compute_%d%d" % arch)
-        self.assertEqual(
-            nvvm.get_arch_option(1000, 0), "compute_%d%d" % supported_cc[-1]
-        )
-
-
 @skip_on_cudasim("NVVM Driver unsupported in the simulator")
 class TestLibDevice(unittest.TestCase):
     def test_libdevice_load(self):
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py b/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py
index 4cb3d09cf..6dafa4d0a 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py
@@ -1,9 +1,6 @@
 import multiprocessing
 import os
-from numba.core import config
-from numba.cuda.cudadrv.runtime import runtime
-from numba.cuda.testing import unittest, SerialMixin, skip_on_cudasim
-from unittest.mock import patch
+from numba.cuda.testing import unittest, SerialMixin
 
 
 def set_visible_devices_and_check(q):
@@ -18,39 +15,6 @@ def set_visible_devices_and_check(q):
         q.put(-1)
 
 
-if config.ENABLE_CUDASIM:
-    SUPPORTED_VERSIONS = ((-1, -1),)
-else:
-    SUPPORTED_VERSIONS = (
-        (11, 0),
-        (11, 1),
-        (11, 2),
-        (11, 3),
-        (11, 4),
-        (11, 5),
-        (11, 6),
-        (11, 7),
-    )
-
-
-class TestRuntime(unittest.TestCase):
-    def test_is_supported_version_true(self):
-        for v in SUPPORTED_VERSIONS:
-            with patch.object(runtime, "get_version", return_value=v):
-                self.assertTrue(runtime.is_supported_version())
-
-    @skip_on_cudasim("The simulator always simulates a supported runtime")
-    def test_is_supported_version_false(self):
-        # Check with an old unsupported version and some potential future
-        # versions
-        for v in ((10, 2), (11, 8), (12, 0)):
-            with patch.object(runtime, "get_version", return_value=v):
-                self.assertFalse(runtime.is_supported_version())
-
-    def test_supported_versions(self):
-        self.assertEqual(SUPPORTED_VERSIONS, runtime.supported_versions)
-
-
 class TestVisibleDevices(unittest.TestCase, SerialMixin):
     def test_visible_devices_set_after_import(self):
         # See Issue #6149. This test checks that we can set
diff --git a/numba_cuda/numba/cuda/tests/cudapy/__init__.py b/numba_cuda/numba/cuda/tests/cudapy/__init__.py
index d8eb827b8..e0df12c1b 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/__init__.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/__init__.py
@@ -1,8 +1,6 @@
-from numba.cuda.testing import ensure_supported_ccs_initialized
 from numba.cuda.tests import load_testsuite
 import os
 
 
 def load_tests(loader, tests, pattern):
-    ensure_supported_ccs_initialized()
     return load_testsuite(loader, os.path.dirname(__file__))
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py b/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py
index 8d99603f9..62cc40df5 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py
@@ -265,7 +265,7 @@ def add(x, y):
         # Check we target the current device's compute capability, or the
         # closest compute capability supported by the current toolkit.
         device_cc = cuda.get_current_device().compute_capability
-        cc = cuda.cudadrv.nvvm.find_closest_arch(device_cc)
+        cc = cuda.cudadrv.nvrtc.find_closest_arch(device_cc)
         target = f".target sm_{cc[0]}{cc[1]}"
         self.assertIn(target, ptx)
 
diff --git a/numba_cuda/numba/cuda/tests/nocuda/__init__.py b/numba_cuda/numba/cuda/tests/nocuda/__init__.py
index d8eb827b8..e0df12c1b 100644
--- a/numba_cuda/numba/cuda/tests/nocuda/__init__.py
+++ b/numba_cuda/numba/cuda/tests/nocuda/__init__.py
@@ -1,8 +1,6 @@
-from numba.cuda.testing import ensure_supported_ccs_initialized
 from numba.cuda.tests import load_testsuite
 import os
 
 
 def load_tests(loader, tests, pattern):
-    ensure_supported_ccs_initialized()
     return load_testsuite(loader, os.path.dirname(__file__))
diff --git a/numba_cuda/numba/cuda/tests/nrt/__init__.py b/numba_cuda/numba/cuda/tests/nrt/__init__.py
index d8eb827b8..e0df12c1b 100644
--- a/numba_cuda/numba/cuda/tests/nrt/__init__.py
+++ b/numba_cuda/numba/cuda/tests/nrt/__init__.py
@@ -1,8 +1,6 @@
-from numba.cuda.testing import ensure_supported_ccs_initialized
 from numba.cuda.tests import load_testsuite
 import os
 
 
 def load_tests(loader, tests, pattern):
-    ensure_supported_ccs_initialized()
     return load_testsuite(loader, os.path.dirname(__file__))