Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 0 additions & 17 deletions docs/source/reference/host.rst
Original file line number Diff line number Diff line change
Expand Up @@ -229,20 +229,3 @@ stream, and the stream must remain valid whilst the Numba ``Stream`` object is
in use.

.. autofunction:: numba.cuda.external_stream


Runtime
-------

Numba generally uses the Driver API, but it provides a simple wrapper to the
Runtime API so that the version of the runtime in use can be queried. This is
accessed through ``cuda.runtime``, which is an instance of the
:class:`numba.cuda.cudadrv.runtime.Runtime` class:

.. autoclass:: numba.cuda.cudadrv.runtime.Runtime
:members: get_version, is_supported_version, supported_versions

Whether the current runtime is officially supported and tested with the current
version of Numba can also be queried:

.. autofunction:: numba.cuda.is_supported_version
13 changes: 13 additions & 0 deletions numba_cuda/numba/cuda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,19 @@
implementation = "NVIDIA"


# The default compute capability as set by the upstream Numba implementation.
config_default_cc = config.CUDA_DEFAULT_PTX_CC

# The default compute capability for Numba-CUDA. This will usually override the
# upstream Numba built-in default of 5.0, unless the user has set it even
# higher, in which case we should use the user-specified value. This default is
# aligned with recent toolkit versions.
numba_cuda_default_ptx_cc = (7, 5)

if numba_cuda_default_ptx_cc > config_default_cc:
config.CUDA_DEFAULT_PTX_CC = numba_cuda_default_ptx_cc


def test(*args, **kwargs):
if not is_available():
raise cuda_error()
Expand Down
6 changes: 3 additions & 3 deletions numba_cuda/numba/cuda/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from numba.core import config, serialize
from numba.core.codegen import Codegen, CodeLibrary
from .cudadrv import devices, driver, nvvm, runtime
from .cudadrv import devices, driver, nvrtc, nvvm, runtime
from numba.cuda.cudadrv.libs import get_cudalib
from numba.cuda.cudadrv.linkable_code import LinkableCode
from numba.cuda.memory_management.nrt import NRT_LIBRARY
Expand Down Expand Up @@ -211,7 +211,7 @@ def get_asm_str(self, cc=None):
if ptxes:
return ptxes

arch = nvvm.get_arch_option(*cc)
arch = nvrtc.get_arch_option(*cc)
options = self._nvvm_options.copy()
options["arch"] = arch

Expand Down Expand Up @@ -240,7 +240,7 @@ def get_ltoir(self, cc=None):
if ltoir is not None:
return ltoir

arch = nvvm.get_arch_option(*cc)
arch = nvrtc.get_arch_option(*cc)
options = self._nvvm_options.copy()
options["arch"] = arch
options["gen-lto"] = None
Expand Down
4 changes: 2 additions & 2 deletions numba_cuda/numba/cuda/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from numba.cuda import nvvmutils
from numba.cuda.api import get_current_device
from numba.cuda.codegen import ExternalCodeLibrary
from numba.cuda.cudadrv import nvvm
from numba.cuda.cudadrv import nvvm, nvrtc
from numba.cuda.descriptor import cuda_target
from numba.cuda.flags import CUDAFlags
from numba.cuda.target import CUDACABICallConv
Expand Down Expand Up @@ -640,7 +640,7 @@ def compile(
# If the user has used the config variable to specify a non-default that is
# greater than the lowest non-deprecated one, then we should default to
# their specified CC instead of the lowest non-deprecated one.
MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, nvvm.LOWEST_CURRENT_CC)
MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, nvrtc.get_lowest_supported_cc())
cc = cc or MIN_CC

cres = compile_cuda(
Expand Down
42 changes: 20 additions & 22 deletions numba_cuda/numba/cuda/cuda_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,16 +132,9 @@ def _get_nvvm_wheel():
return None


def get_major_cuda_version():
# TODO: remove once cuda-python is
# a hard dependency
from numba.cuda.cudadrv.runtime import get_version

return get_version()[0]


def get_nvrtc_dso_path():
site_paths = [site.getusersitepackages()] + site.getsitepackages()

for sp in site_paths:
lib_dir = os.path.join(
sp,
Expand All @@ -150,23 +143,28 @@ def get_nvrtc_dso_path():
("bin" if IS_WIN32 else "lib") if sp else None,
)
if lib_dir and os.path.exists(lib_dir):
try:
major = get_major_cuda_version()
if major == 11:
cu_ver = "112" if IS_WIN32 else "11.2"
elif major == 12:
cu_ver = "120" if IS_WIN32 else "12"
else:
raise NotImplementedError(f"CUDA {major} is not supported")

return os.path.join(
chosen_path = None

# Check for each version of the NVRTC DLL, preferring the most
# recent.
versions = (
"112" if IS_WIN32 else "11.2",
"120" if IS_WIN32 else "12",
"130" if IS_WIN32 else "13",
)

for version in versions:
dso_path = os.path.join(
lib_dir,
f"nvrtc64_{cu_ver}_0.dll"
f"nvrtc64_{version}_0.dll"
if IS_WIN32
else f"libnvrtc.so.{cu_ver}",
else f"libnvrtc.so.{version}",
)
except RuntimeError:
continue

if os.path.exists(dso_path) and os.path.isfile(dso_path):
chosen_path = dso_path

return chosen_path


def _get_nvrtc_wheel():
Expand Down
4 changes: 4 additions & 0 deletions numba_cuda/numba/cuda/cudadrv/error.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,7 @@ class NvrtcBuiltinOperationFailure(NvrtcError):

class NvrtcSupportError(ImportError):
pass


class CCSupportError(RuntimeError):
pass
2 changes: 1 addition & 1 deletion numba_cuda/numba/cuda/cudadrv/libs.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def test():
print(f"\t\t{location}")

# Checks for dynamic libraries
libs = "nvvm nvrtc cudart".split()
libs = "nvvm nvrtc".split()
for lib in libs:
path = get_cudalib(lib)
print("Finding {} from {}".format(lib, _get_source_variable(lib)))
Expand Down
151 changes: 80 additions & 71 deletions numba_cuda/numba/cuda/cudadrv/nvrtc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from ctypes import byref, c_char, c_char_p, c_int, c_size_t, c_void_p, POINTER
from enum import IntEnum
from numba.cuda.cudadrv.error import (
CCSupportError,
NvrtcError,
NvrtcBuiltinOperationFailure,
NvrtcCompilationError,
Expand Down Expand Up @@ -79,20 +80,6 @@ class NVRTC:
(for Numba) open_cudalib function to load the NVRTC library.
"""

_CU11_2ONLY_PROTOTYPES = {
# nvrtcResult nvrtcGetNumSupportedArchs(int *numArchs);
"nvrtcGetNumSupportedArchs": (nvrtc_result, POINTER(c_int)),
# nvrtcResult nvrtcGetSupportedArchs(int *supportedArchs);
"nvrtcGetSupportedArchs": (nvrtc_result, POINTER(c_int)),
}

_CU12ONLY_PROTOTYPES = {
# nvrtcResult nvrtcGetLTOIRSize(nvrtcProgram prog, size_t *ltoSizeRet);
"nvrtcGetLTOIRSize": (nvrtc_result, nvrtc_program, POINTER(c_size_t)),
# nvrtcResult nvrtcGetLTOIR(nvrtcProgram prog, char *lto);
"nvrtcGetLTOIR": (nvrtc_result, nvrtc_program, c_char_p),
}

_PROTOTYPES = {
# nvrtcResult nvrtcVersion(int *major, int *minor)
"nvrtcVersion": (nvrtc_result, POINTER(c_int), POINTER(c_int)),
Expand Down Expand Up @@ -140,6 +127,14 @@ class NVRTC:
),
# nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
"nvrtcGetProgramLog": (nvrtc_result, nvrtc_program, c_char_p),
# nvrtcResult nvrtcGetNumSupportedArchs(int *numArchs);
"nvrtcGetNumSupportedArchs": (nvrtc_result, POINTER(c_int)),
# nvrtcResult nvrtcGetSupportedArchs(int *supportedArchs);
"nvrtcGetSupportedArchs": (nvrtc_result, POINTER(c_int)),
# nvrtcResult nvrtcGetLTOIRSize(nvrtcProgram prog, size_t *ltoSizeRet);
"nvrtcGetLTOIRSize": (nvrtc_result, nvrtc_program, POINTER(c_size_t)),
# nvrtcResult nvrtcGetLTOIR(nvrtcProgram prog, char *lto);
"nvrtcGetLTOIR": (nvrtc_result, nvrtc_program, c_char_p),
}

# Singleton reference
Expand All @@ -157,18 +152,18 @@ def __new__(cls):
cls.__INSTANCE = None
raise NvrtcSupportError("NVRTC cannot be loaded") from e

from numba.cuda.cudadrv.runtime import get_version

if get_version() >= (11, 2):
inst._PROTOTYPES |= inst._CU11_2ONLY_PROTOTYPES
if get_version() >= (12, 0):
inst._PROTOTYPES |= inst._CU12ONLY_PROTOTYPES

# Find & populate functions
for name, proto in inst._PROTOTYPES.items():
func = getattr(lib, name)
func.restype = proto[0]
func.argtypes = proto[1:]
try:
func = getattr(lib, name)
func.restype = proto[0]
func.argtypes = proto[1:]
except AttributeError:
if "LTOIR" in name:
# CUDA 11 does not have LTOIR functions; ignore
continue
else:
raise

@functools.wraps(func)
def checked_call(*args, func=func, name=name):
Expand All @@ -195,52 +190,16 @@ def checked_call(*args, func=func, name=name):

return cls.__INSTANCE

@functools.cache
def get_supported_archs(self):
"""
Get Supported Architectures by NVRTC as list of arch tuples.
"""
ver = self.get_version()
if ver < (11, 0):
raise RuntimeError(
"Unsupported CUDA version. CUDA 11.0 or higher is required."
)
elif ver == (11, 0):
return [
(3, 0),
(3, 2),
(3, 5),
(3, 7),
(5, 0),
(5, 2),
(5, 3),
(6, 0),
(6, 1),
(6, 2),
(7, 0),
(7, 2),
(7, 5),
]
elif ver == (11, 1):
return [
(3, 5),
(3, 7),
(5, 0),
(5, 2),
(5, 3),
(6, 0),
(6, 1),
(6, 2),
(7, 0),
(7, 2),
(7, 5),
(8, 0),
]
else:
num = c_int()
self.nvrtcGetNumSupportedArchs(byref(num))
archs = (c_int * num.value)()
self.nvrtcGetSupportedArchs(archs)
return [(archs[i] // 10, archs[i] % 10) for i in range(num.value)]
num = c_int()
self.nvrtcGetNumSupportedArchs(byref(num))
archs = (c_int * num.value)()
self.nvrtcGetSupportedArchs(archs)
return [(archs[i] // 10, archs[i] % 10) for i in range(num.value)]

def get_version(self):
"""
Expand Down Expand Up @@ -349,9 +308,9 @@ def compile(src, name, cc, ltoir=False):

version = nvrtc.get_version()
ver_str = lambda v: ".".join(v)
if version < (11, 0):
if version < (11, 2):
raise RuntimeError(
"Unsupported CUDA version. CUDA 11.0 or higher is required."
"Unsupported CUDA version. CUDA 11.2 or higher is required."
)
else:
supported_arch = nvrtc.get_supported_archs()
Expand Down Expand Up @@ -383,8 +342,10 @@ def compile(src, name, cc, ltoir=False):
else:
arch = f"--gpu-architecture=compute_{major}{minor}"

cuda_include = [
f"{get_cuda_paths()['include_dir'].info}",
cuda_include_dir = get_cuda_paths()["include_dir"].info
cuda_includes = [
f"{cuda_include_dir}",
f"{os.path.join(cuda_include_dir, 'cccl')}",
]

nvrtc_version = nvrtc.get_version()
Expand All @@ -405,7 +366,7 @@ def compile(src, name, cc, ltoir=False):

nrt_include = os.path.join(numba_cuda_path, "memory_management")

includes = [numba_include, *cuda_include, nrt_include, *extra_includes]
includes = [numba_include, *cuda_includes, nrt_include, *extra_includes]

if config.CUDA_USE_NVIDIA_BINDING:
options = ProgramOptions(
Expand Down Expand Up @@ -474,3 +435,51 @@ def write(self, msg):
else:
ptx = nvrtc.get_ptx(program)
return ptx, log


def find_closest_arch(mycc):
"""
Given a compute capability, return the closest compute capability supported
by the CUDA toolkit.

:param mycc: Compute capability as a tuple ``(MAJOR, MINOR)``
:return: Closest supported CC as a tuple ``(MAJOR, MINOR)``
"""
supported_ccs = get_supported_ccs()

for i, cc in enumerate(supported_ccs):
if cc == mycc:
# Matches
return cc
elif cc > mycc:
# Exceeded
if i == 0:
# CC lower than supported
msg = (
"GPU compute capability %d.%d is not supported"
"(requires >=%d.%d)" % (mycc + cc)
)
raise CCSupportError(msg)
else:
# return the previous CC
return supported_ccs[i - 1]

# CC higher than supported
return supported_ccs[-1] # Choose the highest


def get_arch_option(major, minor):
"""Matches with the closest architecture option"""
if config.FORCE_CUDA_CC:
arch = config.FORCE_CUDA_CC
else:
arch = find_closest_arch((major, minor))
return "compute_%d%d" % arch


def get_lowest_supported_cc():
return min(get_supported_ccs())


def get_supported_ccs():
return NVRTC().get_supported_archs()
Loading