diff --git a/docs/source/reference/host.rst b/docs/source/reference/host.rst index c472c5ed6..acf883464 100644 --- a/docs/source/reference/host.rst +++ b/docs/source/reference/host.rst @@ -229,20 +229,3 @@ stream, and the stream must remain valid whilst the Numba ``Stream`` object is in use. .. autofunction:: numba.cuda.external_stream - - -Runtime -------- - -Numba generally uses the Driver API, but it provides a simple wrapper to the -Runtime API so that the version of the runtime in use can be queried. This is -accessed through ``cuda.runtime``, which is an instance of the -:class:`numba.cuda.cudadrv.runtime.Runtime` class: - -.. autoclass:: numba.cuda.cudadrv.runtime.Runtime - :members: get_version, is_supported_version, supported_versions - -Whether the current runtime is officially supported and tested with the current -version of Numba can also be queried: - -.. autofunction:: numba.cuda.is_supported_version diff --git a/numba_cuda/numba/cuda/__init__.py b/numba_cuda/numba/cuda/__init__.py index e944fe0bf..af5f95595 100644 --- a/numba_cuda/numba/cuda/__init__.py +++ b/numba_cuda/numba/cuda/__init__.py @@ -83,6 +83,19 @@ implementation = "NVIDIA" +# The default compute capability as set by the upstream Numba implementation. +config_default_cc = config.CUDA_DEFAULT_PTX_CC + +# The default compute capability for Numba-CUDA. This will usually override the +# upstream Numba built-in default of 5.0, unless the user has set it even +# higher, in which case we should use the user-specified value. This default is +# aligned with recent toolkit versions. +numba_cuda_default_ptx_cc = (7, 5) + +if numba_cuda_default_ptx_cc > config_default_cc: + config.CUDA_DEFAULT_PTX_CC = numba_cuda_default_ptx_cc + + def test(*args, **kwargs): if not is_available(): raise cuda_error() diff --git a/numba_cuda/numba/cuda/codegen.py b/numba_cuda/numba/cuda/codegen.py index 0b269ed52..d2ec579ad 100644 --- a/numba_cuda/numba/cuda/codegen.py +++ b/numba_cuda/numba/cuda/codegen.py @@ -2,7 +2,7 @@ from numba.core import config, serialize from numba.core.codegen import Codegen, CodeLibrary -from .cudadrv import devices, driver, nvvm, runtime +from .cudadrv import devices, driver, nvrtc, nvvm, runtime from numba.cuda.cudadrv.libs import get_cudalib from numba.cuda.cudadrv.linkable_code import LinkableCode from numba.cuda.memory_management.nrt import NRT_LIBRARY @@ -211,7 +211,7 @@ def get_asm_str(self, cc=None): if ptxes: return ptxes - arch = nvvm.get_arch_option(*cc) + arch = nvrtc.get_arch_option(*cc) options = self._nvvm_options.copy() options["arch"] = arch @@ -240,7 +240,7 @@ def get_ltoir(self, cc=None): if ltoir is not None: return ltoir - arch = nvvm.get_arch_option(*cc) + arch = nvrtc.get_arch_option(*cc) options = self._nvvm_options.copy() options["arch"] = arch options["gen-lto"] = None diff --git a/numba_cuda/numba/cuda/compiler.py b/numba_cuda/numba/cuda/compiler.py index 555b4c558..b58332c5a 100644 --- a/numba_cuda/numba/cuda/compiler.py +++ b/numba_cuda/numba/cuda/compiler.py @@ -35,7 +35,7 @@ from numba.cuda import nvvmutils from numba.cuda.api import get_current_device from numba.cuda.codegen import ExternalCodeLibrary -from numba.cuda.cudadrv import nvvm +from numba.cuda.cudadrv import nvvm, nvrtc from numba.cuda.descriptor import cuda_target from numba.cuda.flags import CUDAFlags from numba.cuda.target import CUDACABICallConv @@ -640,7 +640,7 @@ def compile( # If the user has used the config variable to specify a non-default that is # greater than the lowest non-deprecated one, then we should default to # their specified CC instead of the lowest non-deprecated one. - MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, nvvm.LOWEST_CURRENT_CC) + MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, nvrtc.get_lowest_supported_cc()) cc = cc or MIN_CC cres = compile_cuda( diff --git a/numba_cuda/numba/cuda/cuda_paths.py b/numba_cuda/numba/cuda/cuda_paths.py index 9bcccf9ce..66db135d0 100644 --- a/numba_cuda/numba/cuda/cuda_paths.py +++ b/numba_cuda/numba/cuda/cuda_paths.py @@ -132,16 +132,9 @@ def _get_nvvm_wheel(): return None -def get_major_cuda_version(): - # TODO: remove once cuda-python is - # a hard dependency - from numba.cuda.cudadrv.runtime import get_version - - return get_version()[0] - - def get_nvrtc_dso_path(): site_paths = [site.getusersitepackages()] + site.getsitepackages() + for sp in site_paths: lib_dir = os.path.join( sp, @@ -150,23 +143,28 @@ def get_nvrtc_dso_path(): ("bin" if IS_WIN32 else "lib") if sp else None, ) if lib_dir and os.path.exists(lib_dir): - try: - major = get_major_cuda_version() - if major == 11: - cu_ver = "112" if IS_WIN32 else "11.2" - elif major == 12: - cu_ver = "120" if IS_WIN32 else "12" - else: - raise NotImplementedError(f"CUDA {major} is not supported") - - return os.path.join( + chosen_path = None + + # Check for each version of the NVRTC DLL, preferring the most + # recent. + versions = ( + "112" if IS_WIN32 else "11.2", + "120" if IS_WIN32 else "12", + "130" if IS_WIN32 else "13", + ) + + for version in versions: + dso_path = os.path.join( lib_dir, - f"nvrtc64_{cu_ver}_0.dll" + f"nvrtc64_{version}_0.dll" if IS_WIN32 - else f"libnvrtc.so.{cu_ver}", + else f"libnvrtc.so.{version}", ) - except RuntimeError: - continue + + if os.path.exists(dso_path) and os.path.isfile(dso_path): + chosen_path = dso_path + + return chosen_path def _get_nvrtc_wheel(): diff --git a/numba_cuda/numba/cuda/cudadrv/error.py b/numba_cuda/numba/cuda/cudadrv/error.py index f395f0721..b40f0abdb 100644 --- a/numba_cuda/numba/cuda/cudadrv/error.py +++ b/numba_cuda/numba/cuda/cudadrv/error.py @@ -38,3 +38,7 @@ class NvrtcBuiltinOperationFailure(NvrtcError): class NvrtcSupportError(ImportError): pass + + +class CCSupportError(RuntimeError): + pass diff --git a/numba_cuda/numba/cuda/cudadrv/libs.py b/numba_cuda/numba/cuda/cudadrv/libs.py index ce4d99113..b3105942a 100644 --- a/numba_cuda/numba/cuda/cudadrv/libs.py +++ b/numba_cuda/numba/cuda/cudadrv/libs.py @@ -154,7 +154,7 @@ def test(): print(f"\t\t{location}") # Checks for dynamic libraries - libs = "nvvm nvrtc cudart".split() + libs = "nvvm nvrtc".split() for lib in libs: path = get_cudalib(lib) print("Finding {} from {}".format(lib, _get_source_variable(lib))) diff --git a/numba_cuda/numba/cuda/cudadrv/nvrtc.py b/numba_cuda/numba/cuda/cudadrv/nvrtc.py index 2513e2a48..0c4074a73 100644 --- a/numba_cuda/numba/cuda/cudadrv/nvrtc.py +++ b/numba_cuda/numba/cuda/cudadrv/nvrtc.py @@ -1,6 +1,7 @@ from ctypes import byref, c_char, c_char_p, c_int, c_size_t, c_void_p, POINTER from enum import IntEnum from numba.cuda.cudadrv.error import ( + CCSupportError, NvrtcError, NvrtcBuiltinOperationFailure, NvrtcCompilationError, @@ -79,20 +80,6 @@ class NVRTC: (for Numba) open_cudalib function to load the NVRTC library. """ - _CU11_2ONLY_PROTOTYPES = { - # nvrtcResult nvrtcGetNumSupportedArchs(int *numArchs); - "nvrtcGetNumSupportedArchs": (nvrtc_result, POINTER(c_int)), - # nvrtcResult nvrtcGetSupportedArchs(int *supportedArchs); - "nvrtcGetSupportedArchs": (nvrtc_result, POINTER(c_int)), - } - - _CU12ONLY_PROTOTYPES = { - # nvrtcResult nvrtcGetLTOIRSize(nvrtcProgram prog, size_t *ltoSizeRet); - "nvrtcGetLTOIRSize": (nvrtc_result, nvrtc_program, POINTER(c_size_t)), - # nvrtcResult nvrtcGetLTOIR(nvrtcProgram prog, char *lto); - "nvrtcGetLTOIR": (nvrtc_result, nvrtc_program, c_char_p), - } - _PROTOTYPES = { # nvrtcResult nvrtcVersion(int *major, int *minor) "nvrtcVersion": (nvrtc_result, POINTER(c_int), POINTER(c_int)), @@ -140,6 +127,14 @@ class NVRTC: ), # nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log); "nvrtcGetProgramLog": (nvrtc_result, nvrtc_program, c_char_p), + # nvrtcResult nvrtcGetNumSupportedArchs(int *numArchs); + "nvrtcGetNumSupportedArchs": (nvrtc_result, POINTER(c_int)), + # nvrtcResult nvrtcGetSupportedArchs(int *supportedArchs); + "nvrtcGetSupportedArchs": (nvrtc_result, POINTER(c_int)), + # nvrtcResult nvrtcGetLTOIRSize(nvrtcProgram prog, size_t *ltoSizeRet); + "nvrtcGetLTOIRSize": (nvrtc_result, nvrtc_program, POINTER(c_size_t)), + # nvrtcResult nvrtcGetLTOIR(nvrtcProgram prog, char *lto); + "nvrtcGetLTOIR": (nvrtc_result, nvrtc_program, c_char_p), } # Singleton reference @@ -157,18 +152,18 @@ def __new__(cls): cls.__INSTANCE = None raise NvrtcSupportError("NVRTC cannot be loaded") from e - from numba.cuda.cudadrv.runtime import get_version - - if get_version() >= (11, 2): - inst._PROTOTYPES |= inst._CU11_2ONLY_PROTOTYPES - if get_version() >= (12, 0): - inst._PROTOTYPES |= inst._CU12ONLY_PROTOTYPES - # Find & populate functions for name, proto in inst._PROTOTYPES.items(): - func = getattr(lib, name) - func.restype = proto[0] - func.argtypes = proto[1:] + try: + func = getattr(lib, name) + func.restype = proto[0] + func.argtypes = proto[1:] + except AttributeError: + if "LTOIR" in name: + # CUDA 11 does not have LTOIR functions; ignore + continue + else: + raise @functools.wraps(func) def checked_call(*args, func=func, name=name): @@ -195,52 +190,16 @@ def checked_call(*args, func=func, name=name): return cls.__INSTANCE + @functools.cache def get_supported_archs(self): """ Get Supported Architectures by NVRTC as list of arch tuples. """ - ver = self.get_version() - if ver < (11, 0): - raise RuntimeError( - "Unsupported CUDA version. CUDA 11.0 or higher is required." - ) - elif ver == (11, 0): - return [ - (3, 0), - (3, 2), - (3, 5), - (3, 7), - (5, 0), - (5, 2), - (5, 3), - (6, 0), - (6, 1), - (6, 2), - (7, 0), - (7, 2), - (7, 5), - ] - elif ver == (11, 1): - return [ - (3, 5), - (3, 7), - (5, 0), - (5, 2), - (5, 3), - (6, 0), - (6, 1), - (6, 2), - (7, 0), - (7, 2), - (7, 5), - (8, 0), - ] - else: - num = c_int() - self.nvrtcGetNumSupportedArchs(byref(num)) - archs = (c_int * num.value)() - self.nvrtcGetSupportedArchs(archs) - return [(archs[i] // 10, archs[i] % 10) for i in range(num.value)] + num = c_int() + self.nvrtcGetNumSupportedArchs(byref(num)) + archs = (c_int * num.value)() + self.nvrtcGetSupportedArchs(archs) + return [(archs[i] // 10, archs[i] % 10) for i in range(num.value)] def get_version(self): """ @@ -349,9 +308,9 @@ def compile(src, name, cc, ltoir=False): version = nvrtc.get_version() ver_str = lambda v: ".".join(v) - if version < (11, 0): + if version < (11, 2): raise RuntimeError( - "Unsupported CUDA version. CUDA 11.0 or higher is required." + "Unsupported CUDA version. CUDA 11.2 or higher is required." ) else: supported_arch = nvrtc.get_supported_archs() @@ -383,8 +342,10 @@ def compile(src, name, cc, ltoir=False): else: arch = f"--gpu-architecture=compute_{major}{minor}" - cuda_include = [ - f"{get_cuda_paths()['include_dir'].info}", + cuda_include_dir = get_cuda_paths()["include_dir"].info + cuda_includes = [ + f"{cuda_include_dir}", + f"{os.path.join(cuda_include_dir, 'cccl')}", ] nvrtc_version = nvrtc.get_version() @@ -405,7 +366,7 @@ def compile(src, name, cc, ltoir=False): nrt_include = os.path.join(numba_cuda_path, "memory_management") - includes = [numba_include, *cuda_include, nrt_include, *extra_includes] + includes = [numba_include, *cuda_includes, nrt_include, *extra_includes] if config.CUDA_USE_NVIDIA_BINDING: options = ProgramOptions( @@ -474,3 +435,51 @@ def write(self, msg): else: ptx = nvrtc.get_ptx(program) return ptx, log + + +def find_closest_arch(mycc): + """ + Given a compute capability, return the closest compute capability supported + by the CUDA toolkit. + + :param mycc: Compute capability as a tuple ``(MAJOR, MINOR)`` + :return: Closest supported CC as a tuple ``(MAJOR, MINOR)`` + """ + supported_ccs = get_supported_ccs() + + for i, cc in enumerate(supported_ccs): + if cc == mycc: + # Matches + return cc + elif cc > mycc: + # Exceeded + if i == 0: + # CC lower than supported + msg = ( + "GPU compute capability %d.%d is not supported" + "(requires >=%d.%d)" % (mycc + cc) + ) + raise CCSupportError(msg) + else: + # return the previous CC + return supported_ccs[i - 1] + + # CC higher than supported + return supported_ccs[-1] # Choose the highest + + +def get_arch_option(major, minor): + """Matches with the closest architecture option""" + if config.FORCE_CUDA_CC: + arch = config.FORCE_CUDA_CC + else: + arch = find_closest_arch((major, minor)) + return "compute_%d%d" % arch + + +def get_lowest_supported_cc(): + return min(get_supported_ccs()) + + +def get_supported_ccs(): + return NVRTC().get_supported_archs() diff --git a/numba_cuda/numba/cuda/cudadrv/nvvm.py b/numba_cuda/numba/cuda/cudadrv/nvvm.py index 0dde2359c..235604567 100644 --- a/numba_cuda/numba/cuda/cudadrv/nvvm.py +++ b/numba_cuda/numba/cuda/cudadrv/nvvm.py @@ -14,7 +14,7 @@ from .error import NvvmError, NvvmSupportError, NvvmWarning from .libs import get_libdevice, open_libdevice, open_cudalib -from numba.core import cgutils, config +from numba.core import cgutils logger = logging.getLogger(__name__) @@ -179,7 +179,6 @@ def __init__(self): self._minorIR = ir_versions[1] self._majorDbg = ir_versions[2] self._minorDbg = ir_versions[3] - self._supported_ccs = get_supported_ccs() @property def data_layout(self): @@ -188,10 +187,6 @@ def data_layout(self): else: return _datalayout_i128 - @property - def supported_ccs(self): - return self._supported_ccs - def get_version(self): major = c_int() minor = c_int() @@ -350,197 +345,6 @@ def get_log(self): return "" -COMPUTE_CAPABILITIES = ( - (3, 5), - (3, 7), - (5, 0), - (5, 2), - (5, 3), - (6, 0), - (6, 1), - (6, 2), - (7, 0), - (7, 2), - (7, 5), - (8, 0), - (8, 6), - (8, 7), - (8, 9), - (9, 0), - (10, 0), - (10, 1), - (10, 3), - (12, 0), - (12, 1), -) - - -# Maps CTK version -> (min supported cc, max supported cc) ranges, bounds inclusive -_CUDA_CC_MIN_MAX_SUPPORT = { - (11, 2): [ - ((3, 5), (8, 6)), - ], - (11, 3): [ - ((3, 5), (8, 6)), - ], - (11, 4): [ - ((3, 5), (8, 7)), - ], - (11, 5): [ - ((3, 5), (8, 7)), - ], - (11, 6): [ - ((3, 5), (8, 7)), - ], - (11, 7): [ - ((3, 5), (8, 7)), - ], - (11, 8): [ - ((3, 5), (9, 0)), - ], - (12, 0): [ - ((5, 0), (9, 0)), - ], - (12, 1): [ - ((5, 0), (9, 0)), - ], - (12, 2): [ - ((5, 0), (9, 0)), - ], - (12, 3): [ - ((5, 0), (9, 0)), - ], - (12, 4): [ - ((5, 0), (9, 0)), - ], - (12, 5): [ - ((5, 0), (9, 0)), - ], - (12, 6): [ - ((5, 0), (9, 0)), - ], - (12, 8): [ - ((5, 0), (10, 1)), - ((12, 0), (12, 0)), - ], - (12, 9): [ - ((5, 0), (12, 1)), - ], -} - -# From CUDA 12.9 Release notes, Section 1.5.4, "Deprecated Architectures" -# https://docs.nvidia.com/cuda/archive/12.9.0/cuda-toolkit-release-notes/index.html#deprecated-architectures -# -# "Maxwell, Pascal, and Volta architectures are now feature-complete with no -# further enhancements planned. While CUDA Toolkit 12.x series will continue -# to support building applications for these architectures, offline -# compilation and library support will be removed in the next major CUDA -# Toolkit version release. Users should plan migration to newer -# architectures, as future toolkits will be unable to target Maxwell, Pascal, -# and Volta GPUs." -# -# In order to maintain compatibility with future toolkits, we use Turing (7.5) -# as the default CC if it is not otherwise specified. -LOWEST_CURRENT_CC = (7, 5) - - -def ccs_supported_by_ctk(ctk_version): - try: - # For supported versions, we look up the range of supported CCs - return tuple( - [ - cc - for min_cc, max_cc in _CUDA_CC_MIN_MAX_SUPPORT[ctk_version] - for cc in COMPUTE_CAPABILITIES - if min_cc <= cc <= max_cc - ] - ) - except KeyError: - # For unsupported CUDA toolkit versions, all we can do is assume all - # non-deprecated versions we are aware of are supported. - # - # If the user has specified a non-default CC that is greater than the - # lowest non-deprecated one, then we should assume that instead. - MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, LOWEST_CURRENT_CC) - - return tuple([cc for cc in COMPUTE_CAPABILITIES if cc >= MIN_CC]) - - -def get_supported_ccs(): - try: - from numba.cuda.cudadrv.runtime import runtime - - cudart_version = runtime.get_version() - except: # noqa: E722 - # We can't support anything if there's an error getting the runtime - # version (e.g. if it's not present or there's another issue) - _supported_cc = () - return _supported_cc - - # Ensure the minimum CTK version requirement is met - min_cudart = min(_CUDA_CC_MIN_MAX_SUPPORT) - if cudart_version < min_cudart: - _supported_cc = () - ctk_ver = f"{cudart_version[0]}.{cudart_version[1]}" - unsupported_ver = ( - f"CUDA Toolkit {ctk_ver} is unsupported by Numba - " - f"{min_cudart[0]}.{min_cudart[1]} is the minimum " - "required version." - ) - warnings.warn(unsupported_ver) - return _supported_cc - - _supported_cc = ccs_supported_by_ctk(cudart_version) - return _supported_cc - - -def find_closest_arch(mycc): - """ - Given a compute capability, return the closest compute capability supported - by the CUDA toolkit. - - :param mycc: Compute capability as a tuple ``(MAJOR, MINOR)`` - :return: Closest supported CC as a tuple ``(MAJOR, MINOR)`` - """ - supported_ccs = NVVM().supported_ccs - - if not supported_ccs: - msg = ( - "No supported GPU compute capabilities found. " - "Please check your cudatoolkit version matches your CUDA version." - ) - raise NvvmSupportError(msg) - - for i, cc in enumerate(supported_ccs): - if cc == mycc: - # Matches - return cc - elif cc > mycc: - # Exceeded - if i == 0: - # CC lower than supported - msg = ( - "GPU compute capability %d.%d is not supported" - "(requires >=%d.%d)" % (mycc + cc) - ) - raise NvvmSupportError(msg) - else: - # return the previous CC - return supported_ccs[i - 1] - - # CC higher than supported - return supported_ccs[-1] # Choose the highest - - -def get_arch_option(major, minor): - """Matches with the closest architecture option""" - if config.FORCE_CUDA_CC: - arch = config.FORCE_CUDA_CC - else: - arch = find_closest_arch((major, minor)) - return "compute_%d%d" % arch - - MISSING_LIBDEVICE_FILE_MSG = """Missing libdevice file. Please ensure you have a CUDA Toolkit 11.2 or higher. For CUDA 12, ``cuda-nvcc`` and ``cuda-nvrtc`` are required: diff --git a/numba_cuda/numba/cuda/cudadrv/runtime.py b/numba_cuda/numba/cuda/cudadrv/runtime.py index d665f4db1..79c478283 100644 --- a/numba_cuda/numba/cuda/cudadrv/runtime.py +++ b/numba_cuda/numba/cuda/cudadrv/runtime.py @@ -1,147 +1,16 @@ """ -CUDA Runtime wrapper. +Former CUDA Runtime wrapper. -This provides a very minimal set of bindings, since the Runtime API is not -really used in Numba except for querying the Runtime version. +The toolkit version can now be obtained from NVRTC, so we don't use a binding +to the runtime anymore. This file is provided to maintain the existing API. """ -import ctypes -import functools -import sys - -from numba.core import config -from numba.cuda.cudadrv.driver import ERROR_MAP, make_logger -from numba.cuda.cudadrv.error import CudaSupportError, CudaRuntimeError -from numba.cuda.cudadrv.libs import open_cudalib -from numba.cuda.cudadrv.rtapi import API_PROTOTYPES -from numba.cuda.cudadrv import enums - - -class CudaRuntimeAPIError(CudaRuntimeError): - """ - Raised when there is an error accessing a C API from the CUDA Runtime. - """ - - def __init__(self, code, msg): - self.code = code - self.msg = msg - super().__init__(code, msg) - - def __str__(self): - return "[%s] %s" % (self.code, self.msg) +from numba.cuda.cudadrv.nvrtc import NVRTC class Runtime: - """ - Runtime object that lazily binds runtime API functions. - """ - - def __init__(self): - self.is_initialized = False - - def _initialize(self): - # lazily initialize logger - global _logger - _logger = make_logger() - - if config.DISABLE_CUDA: - msg = ( - "CUDA is disabled due to setting NUMBA_DISABLE_CUDA=1 " - "in the environment, or because CUDA is unsupported on " - "32-bit systems." - ) - raise CudaSupportError(msg) - self.lib = open_cudalib("cudart") - - self.is_initialized = True - - def __getattr__(self, fname): - # First request of a runtime API function - try: - proto = API_PROTOTYPES[fname] - except KeyError: - raise AttributeError(fname) - restype = proto[0] - argtypes = proto[1:] - - if not self.is_initialized: - self._initialize() - - # Find function in runtime library - libfn = self._find_api(fname) - libfn.restype = restype - libfn.argtypes = argtypes - - safe_call = self._wrap_api_call(fname, libfn) - setattr(self, fname, safe_call) - return safe_call - - def _wrap_api_call(self, fname, libfn): - @functools.wraps(libfn) - def safe_cuda_api_call(*args): - _logger.debug("call runtime api: %s", libfn.__name__) - retcode = libfn(*args) - self._check_error(fname, retcode) - - return safe_cuda_api_call - - def _check_error(self, fname, retcode): - if retcode != enums.CUDA_SUCCESS: - errname = ERROR_MAP.get(retcode, "cudaErrorUnknown") - msg = "Call to %s results in %s" % (fname, errname) - _logger.error(msg) - raise CudaRuntimeAPIError(retcode, msg) - - def _find_api(self, fname): - try: - return getattr(self.lib, fname) - except AttributeError: - pass - - # Not found. - # Delay missing function error to use - def absent_function(*args, **kws): - msg = "runtime missing function: %s." - raise CudaRuntimeError(msg % fname) - - setattr(self, fname, absent_function) - return absent_function - def get_version(self): - """ - Returns the CUDA Runtime version as a tuple (major, minor). - """ - rtver = ctypes.c_int() - self.cudaRuntimeGetVersion(ctypes.byref(rtver)) - # The version is encoded as (1000 * major) + (10 * minor) - major = rtver.value // 1000 - minor = (rtver.value - (major * 1000)) // 10 - return (major, minor) - - def is_supported_version(self): - """ - Returns True if the CUDA Runtime is a supported version. - """ - - return self.get_version() in self.supported_versions - - @property - def supported_versions(self): - """A tuple of all supported CUDA toolkit versions. Versions are given in - the form ``(major_version, minor_version)``.""" - if sys.platform not in ("linux", "win32") or config.MACHINE_BITS != 64: - # Only 64-bit Linux and Windows are supported - return () - return ( - (11, 0), - (11, 1), - (11, 2), - (11, 3), - (11, 4), - (11, 5), - (11, 6), - (11, 7), - ) + return NVRTC().get_version() runtime = Runtime() diff --git a/numba_cuda/numba/cuda/testing.py b/numba_cuda/numba/cuda/testing.py index 7e3cd6f85..217f7d370 100644 --- a/numba_cuda/numba/cuda/testing.py +++ b/numba_cuda/numba/cuda/testing.py @@ -51,20 +51,6 @@ def tearDown(self): reset() -def ensure_supported_ccs_initialized(): - from numba.cuda import is_available as cuda_is_available - from numba.cuda.cudadrv import nvvm - - if cuda_is_available(): - # Ensure that cudart.so is loaded and the list of supported compute - # capabilities in the nvvm module is populated before a fork. This is - # needed because some compilation tests don't require a CUDA context, - # but do use NVVM, and it is required that libcudart.so should be - # loaded before a fork (note that the requirement is not explicitly - # documented). - nvvm.get_supported_ccs() - - def skip_on_cudasim(reason): """Skip this test if running on the CUDA simulator""" return unittest.skipIf(config.ENABLE_CUDASIM, reason) diff --git a/numba_cuda/numba/cuda/tests/__init__.py b/numba_cuda/numba/cuda/tests/__init__.py index d04d546ed..59aae1cee 100644 --- a/numba_cuda/numba/cuda/tests/__init__.py +++ b/numba_cuda/numba/cuda/tests/__init__.py @@ -1,5 +1,4 @@ from fnmatch import fnmatch -from numba.cuda.testing import ensure_supported_ccs_initialized from numba.testing import unittest from numba import cuda from os.path import dirname, isfile, join, normpath, relpath, splitext @@ -42,7 +41,6 @@ def load_testsuite(loader, dir): def load_tests(loader, tests, pattern): suite = unittest.TestSuite() this_dir = dirname(__file__) - ensure_supported_ccs_initialized() suite.addTests(load_testsuite(loader, join(this_dir, "nocuda"))) if cuda.is_available(): suite.addTests(load_testsuite(loader, join(this_dir, "cudasim"))) diff --git a/numba_cuda/numba/cuda/tests/cudadrv/__init__.py b/numba_cuda/numba/cuda/tests/cudadrv/__init__.py index d8eb827b8..e0df12c1b 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/__init__.py @@ -1,8 +1,6 @@ -from numba.cuda.testing import ensure_supported_ccs_initialized from numba.cuda.tests import load_testsuite import os def load_tests(loader, tests, pattern): - ensure_supported_ccs_initialized() return load_testsuite(loader, os.path.dirname(__file__)) diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py b/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py index 049804e7a..ac1b36a07 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py @@ -109,7 +109,21 @@ def test_attached_non_primary(self): if driver.USE_NV_BINDING: flags = 0 dev = driver.binding.CUdevice(0) - hctx = the_driver.cuCtxCreate(flags, dev) + + result, version = driver.binding.cuDriverGetVersion() + self.assertEqual( + result, + driver.binding.CUresult.CUDA_SUCCESS, + "Error getting CUDA driver version", + ) + + # CUDA 13's cuCtxCreate has an optional parameter prepended + if version >= 13000: + args = (None, flags, dev) + else: + args = (flags, dev) + + hctx = the_driver.cuCtxCreate(*args) else: hctx = driver.drvapi.cu_context() the_driver.cuCtxCreate(byref(hctx), 0, 0) diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py b/numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py new file mode 100644 index 000000000..c605a6e64 --- /dev/null +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py @@ -0,0 +1,27 @@ +from numba.cuda.cudadrv import nvrtc +from numba.cuda.testing import skip_on_cudasim + +import unittest + + +@skip_on_cudasim("NVVM Driver unsupported in the simulator") +class TestArchOption(unittest.TestCase): + def test_get_arch_option(self): + # Test returning the nearest lowest arch. + self.assertEqual(nvrtc.get_arch_option(7, 5), "compute_75") + self.assertEqual(nvrtc.get_arch_option(7, 7), "compute_75") + self.assertEqual(nvrtc.get_arch_option(8, 5), "compute_80") + self.assertEqual(nvrtc.get_arch_option(9, 1), "compute_90") + # Test known arch. + supported_cc = nvrtc.NVRTC().get_supported_archs() + for arch in supported_cc: + self.assertEqual( + nvrtc.get_arch_option(*arch), "compute_%d%d" % arch + ) + self.assertEqual( + nvrtc.get_arch_option(1000, 0), "compute_%d%d" % supported_cc[-1] + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py b/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py index ca027ccef..779967d43 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py @@ -1,7 +1,7 @@ import warnings from llvmlite import ir -from numba.cuda.cudadrv import nvvm, runtime +from numba.cuda.cudadrv import nvrtc, nvvm, runtime from numba.cuda.testing import unittest from numba.cuda.cudadrv.nvvm import LibDevice, NvvmError, NVVM from numba.cuda.testing import skip_on_cudasim @@ -30,7 +30,7 @@ def test_nvvm_compile_nullary_option(self): self.skipTest("-gen-lto unavailable in this toolkit version") nvvmir = self.get_nvvmir() - arch = "compute_%d%d" % nvvm.LOWEST_CURRENT_CC + arch = "compute_%d%d" % nvrtc.get_lowest_supported_cc() ltoir = nvvm.compile_ir(nvvmir, opt=3, gen_lto=None, arch=arch) # Verify we correctly passed the option by checking if we got LTOIR @@ -110,7 +110,7 @@ def _test_nvvm_support(self, arch): def test_nvvm_support(self): """Test supported CC by NVVM""" - for arch in nvvm.get_supported_ccs(): + for arch in nvrtc.get_supported_ccs(): self._test_nvvm_support(arch=arch) def test_nvvm_warning(self): @@ -135,22 +135,6 @@ def test_nvvm_warning(self): self.assertIn("overriding noinline attribute", str(w[0])) -@skip_on_cudasim("NVVM Driver unsupported in the simulator") -class TestArchOption(unittest.TestCase): - def test_get_arch_option(self): - # Test returning the nearest lowest arch. - self.assertEqual(nvvm.get_arch_option(7, 5), "compute_75") - self.assertEqual(nvvm.get_arch_option(7, 7), "compute_75") - self.assertEqual(nvvm.get_arch_option(8, 8), "compute_87") - # Test known arch. - supported_cc = nvvm.get_supported_ccs() - for arch in supported_cc: - self.assertEqual(nvvm.get_arch_option(*arch), "compute_%d%d" % arch) - self.assertEqual( - nvvm.get_arch_option(1000, 0), "compute_%d%d" % supported_cc[-1] - ) - - @skip_on_cudasim("NVVM Driver unsupported in the simulator") class TestLibDevice(unittest.TestCase): def test_libdevice_load(self): diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py b/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py index 4cb3d09cf..6dafa4d0a 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py @@ -1,9 +1,6 @@ import multiprocessing import os -from numba.core import config -from numba.cuda.cudadrv.runtime import runtime -from numba.cuda.testing import unittest, SerialMixin, skip_on_cudasim -from unittest.mock import patch +from numba.cuda.testing import unittest, SerialMixin def set_visible_devices_and_check(q): @@ -18,39 +15,6 @@ def set_visible_devices_and_check(q): q.put(-1) -if config.ENABLE_CUDASIM: - SUPPORTED_VERSIONS = ((-1, -1),) -else: - SUPPORTED_VERSIONS = ( - (11, 0), - (11, 1), - (11, 2), - (11, 3), - (11, 4), - (11, 5), - (11, 6), - (11, 7), - ) - - -class TestRuntime(unittest.TestCase): - def test_is_supported_version_true(self): - for v in SUPPORTED_VERSIONS: - with patch.object(runtime, "get_version", return_value=v): - self.assertTrue(runtime.is_supported_version()) - - @skip_on_cudasim("The simulator always simulates a supported runtime") - def test_is_supported_version_false(self): - # Check with an old unsupported version and some potential future - # versions - for v in ((10, 2), (11, 8), (12, 0)): - with patch.object(runtime, "get_version", return_value=v): - self.assertFalse(runtime.is_supported_version()) - - def test_supported_versions(self): - self.assertEqual(SUPPORTED_VERSIONS, runtime.supported_versions) - - class TestVisibleDevices(unittest.TestCase, SerialMixin): def test_visible_devices_set_after_import(self): # See Issue #6149. This test checks that we can set diff --git a/numba_cuda/numba/cuda/tests/cudapy/__init__.py b/numba_cuda/numba/cuda/tests/cudapy/__init__.py index d8eb827b8..e0df12c1b 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/__init__.py +++ b/numba_cuda/numba/cuda/tests/cudapy/__init__.py @@ -1,8 +1,6 @@ -from numba.cuda.testing import ensure_supported_ccs_initialized from numba.cuda.tests import load_testsuite import os def load_tests(loader, tests, pattern): - ensure_supported_ccs_initialized() return load_testsuite(loader, os.path.dirname(__file__)) diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py b/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py index 8d99603f9..62cc40df5 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py @@ -265,7 +265,7 @@ def add(x, y): # Check we target the current device's compute capability, or the # closest compute capability supported by the current toolkit. device_cc = cuda.get_current_device().compute_capability - cc = cuda.cudadrv.nvvm.find_closest_arch(device_cc) + cc = cuda.cudadrv.nvrtc.find_closest_arch(device_cc) target = f".target sm_{cc[0]}{cc[1]}" self.assertIn(target, ptx) diff --git a/numba_cuda/numba/cuda/tests/nocuda/__init__.py b/numba_cuda/numba/cuda/tests/nocuda/__init__.py index d8eb827b8..e0df12c1b 100644 --- a/numba_cuda/numba/cuda/tests/nocuda/__init__.py +++ b/numba_cuda/numba/cuda/tests/nocuda/__init__.py @@ -1,8 +1,6 @@ -from numba.cuda.testing import ensure_supported_ccs_initialized from numba.cuda.tests import load_testsuite import os def load_tests(loader, tests, pattern): - ensure_supported_ccs_initialized() return load_testsuite(loader, os.path.dirname(__file__)) diff --git a/numba_cuda/numba/cuda/tests/nrt/__init__.py b/numba_cuda/numba/cuda/tests/nrt/__init__.py index d8eb827b8..e0df12c1b 100644 --- a/numba_cuda/numba/cuda/tests/nrt/__init__.py +++ b/numba_cuda/numba/cuda/tests/nrt/__init__.py @@ -1,8 +1,6 @@ -from numba.cuda.testing import ensure_supported_ccs_initialized from numba.cuda.tests import load_testsuite import os def load_tests(loader, tests, pattern): - ensure_supported_ccs_initialized() return load_testsuite(loader, os.path.dirname(__file__))