diff --git a/ci/test_conda.sh b/ci/test_conda.sh index 2f86e4f1e..2fbc53b71 100755 --- a/ci/test_conda.sh +++ b/ci/test_conda.sh @@ -6,7 +6,7 @@ set -euo pipefail . /opt/conda/etc/profile.d/conda.sh if [ "${CUDA_VER%.*.*}" = "11" ]; then - CTK_PACKAGES="cudatoolkit" + CTK_PACKAGES="cudatoolkit=11" else CTK_PACKAGES="cuda-cccl cuda-nvcc-impl cuda-nvrtc libcurand-dev" apt-get update @@ -53,6 +53,28 @@ EXITCODE=0 trap "EXITCODE=1" ERR set +e + +GET_TEST_BINARY_DIR=" +import numba_cuda +root = numba_cuda.__file__.rstrip('__init__.py') +test_dir = root + \"numba/cuda/tests/test_binary_generation/\" +print(test_dir) +" + +CUDA_VER_MAJOR_MINOR=${CUDA_VER%.*} +if [ "${CUDA_VER_MAJOR_MINOR%.*}" == "11" ] +then + rapids-logger "Skipping test build for CUDA 11" +else + rapids-logger "Build tests" + + export NUMBA_CUDA_TEST_BIN_DIR=$(python -c "$GET_TEST_BINARY_DIR") + pushd $NUMBA_CUDA_TEST_BIN_DIR + make + popd +fi + + rapids-logger "Run Tests" python -m numba.runtests numba.cuda.tests -v diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 071c3ea8e..113810665 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -3,14 +3,37 @@ set -euo pipefail +CUDA_VER_MAJOR_MINOR=${CUDA_VER%.*} + rapids-logger "Install testing dependencies" # TODO: Replace with rapids-dependency-file-generator python -m pip install \ psutil \ cffi \ - cuda-python \ + "cuda-python==${CUDA_VER_MAJOR_MINOR%.*}.*" \ pytest + +GET_TEST_BINARY_DIR=" +import numba_cuda +root = numba_cuda.__file__.rstrip('__init__.py') +test_dir = root + \"numba/cuda/tests/test_binary_generation/\" +print(test_dir) +" + +if [ "${CUDA_VER_MAJOR_MINOR%.*}" == "11" ] +then + rapids-logger "Skipping test build for CUDA 11" +else + rapids-logger "Build tests" + + export NUMBA_CUDA_TEST_BIN_DIR=$(python -c "$GET_TEST_BINARY_DIR") + pushd $NUMBA_CUDA_TEST_BIN_DIR + make + popd +fi + + rapids-logger "Install wheel" package=$(realpath wheel/numba_cuda*.whl) echo "Wheel path: $package" diff --git a/numba_cuda/numba/cuda/cudadrv/driver.py b/numba_cuda/numba/cuda/cudadrv/driver.py index 2741d6ae7..452239d71 100644 --- a/numba_cuda/numba/cuda/cudadrv/driver.py +++ b/numba_cuda/numba/cuda/cudadrv/driver.py @@ -2797,6 +2797,10 @@ def add_cu(self, cu, name): ptx_name = os.path.splitext(name)[0] + ".ptx" self.add_ptx(ptx.encode(), ptx_name) + @abstractmethod + def add_data(self, data, kind, name): + """Add in-memory data to the link""" + @abstractmethod def add_file(self, path, kind): """Add code from a file to the link""" @@ -2948,6 +2952,10 @@ def add_ptx(self, ptx, name=""): except CubinLinkerError as e: raise LinkerError from e + def add_data(self, data, kind, name): + msg = "Adding in-memory data unsupported in the MVC linker" + raise LinkerError(msg) + def add_file(self, path, kind): try: from cubinlinker import CubinLinkerError @@ -3046,17 +3054,32 @@ def info_log(self): def error_log(self): return self.linker_errors_buf.value.decode("utf8") - def add_ptx(self, ptx, name=""): - ptxbuf = c_char_p(ptx) - namebuf = c_char_p(name.encode("utf8")) - self._keep_alive += [ptxbuf, namebuf] + def add_cubin(self, cubin, name=""): + return self._add_data(enums.CU_JIT_INPUT_CUBIN, cubin, name) + + def add_ptx(self, ptx, name=""): + return self._add_data(enums.CU_JIT_INPUT_PTX, ptx, name) + + def add_object(self, object_, name=""): + return self._add_data(enums.CU_JIT_INPUT_OBJECT, object_, name) + + def add_fatbin(self, fatbin, name=""): + return self._add_data(enums.CU_JIT_INPUT_FATBINARY, fatbin, name) + + def add_library(self, library, name=""): + return self._add_data(enums.CU_JIT_INPUT_LIBRARY, library, name) + + def _add_data(self, input_type, data, name): + data_buffer = c_char_p(data) + name_buffer = c_char_p(name.encode("utf8")) + self._keep_alive += [data_buffer, name_buffer] try: driver.cuLinkAddData( self.handle, - enums.CU_JIT_INPUT_PTX, - ptxbuf, - len(ptx), - namebuf, + input_type, + data_buffer, + len(data), + name_buffer, 0, None, None, @@ -3064,6 +3087,28 @@ def add_ptx(self, ptx, name=""): except CudaAPIError as e: raise LinkerError("%s\n%s" % (e, self.error_log)) + def add_data(self, data, kind, name=None): + # We pass the name as **kwargs to ensure the default name for the input + # type is used if none is supplied + kws = {} + if name is not None: + kws["name"] = name + + if kind == FILE_EXTENSION_MAP["cubin"]: + self.add_cubin(data, **kws) + elif kind == FILE_EXTENSION_MAP["fatbin"]: + self.add_fatbin(data, **kws) + elif kind == FILE_EXTENSION_MAP["a"]: + self.add_library(data, **kws) + elif kind == FILE_EXTENSION_MAP["ptx"]: + self.add_ptx(data, **kws) + elif kind == FILE_EXTENSION_MAP["o"]: + self.add_object(data, **kws) + elif kind == FILE_EXTENSION_MAP["ltoir"]: + raise LinkerError("Ctypes linker cannot link LTO-IR") + else: + raise LinkerError(f"Don't know how to link {kind}") + def add_file(self, path, kind): pathbuf = c_char_p(path.encode("utf8")) self._keep_alive.append(pathbuf) @@ -3151,17 +3196,58 @@ def info_log(self): def error_log(self): return self.linker_errors_buf.decode("utf8") - def add_ptx(self, ptx, name=""): - namebuf = name.encode("utf8") - self._keep_alive += [ptx, namebuf] + def add_cubin(self, cubin, name=""): + input_type = binding.CUjitInputType.CU_JIT_INPUT_CUBIN + return self._add_data(input_type, cubin, name) + + def add_ptx(self, ptx, name=""): + input_type = binding.CUjitInputType.CU_JIT_INPUT_PTX + return self._add_data(input_type, ptx, name) + + def add_object(self, object_, name=""): + input_type = binding.CUjitInputType.CU_JIT_INPUT_OBJECT + return self._add_data(input_type, object_, name) + + def add_fatbin(self, fatbin, name=""): + input_type = binding.CUjitInputType.CU_JIT_INPUT_FATBINARY + return self._add_data(input_type, fatbin, name) + + def add_library(self, library, name=""): + input_type = binding.CUjitInputType.CU_JIT_INPUT_LIBRARY + return self._add_data(input_type, library, name) + + def _add_data(self, input_type, data, name): + name_buffer = name.encode("utf8") + self._keep_alive += [data, name_buffer] try: - input_ptx = binding.CUjitInputType.CU_JIT_INPUT_PTX driver.cuLinkAddData( - self.handle, input_ptx, ptx, len(ptx), namebuf, 0, [], [] + self.handle, input_type, data, len(data), name_buffer, 0, [], [] ) except CudaAPIError as e: raise LinkerError("%s\n%s" % (e, self.error_log)) + def add_data(self, data, kind, name=None): + # We pass the name as **kwargs to ensure the default name for the input + # type is used if none is supplied + kws = {} + if name is not None: + kws["name"] = name + + if kind == FILE_EXTENSION_MAP["cubin"]: + self.add_cubin(data, **kws) + elif kind == FILE_EXTENSION_MAP["fatbin"]: + self.add_fatbin(data, **kws) + elif kind == FILE_EXTENSION_MAP["a"]: + self.add_library(data, **kws) + elif kind == FILE_EXTENSION_MAP["ptx"]: + self.add_ptx(data, **kws) + elif kind == FILE_EXTENSION_MAP["o"]: + self.add_object(data, **kws) + elif kind == FILE_EXTENSION_MAP["ltoir"]: + raise LinkerError("CudaPythonLinker cannot link LTO-IR") + else: + raise LinkerError(f"Don't know how to link {kind}") + def add_file(self, path, kind): pathbuf = path.encode("utf8") self._keep_alive.append(pathbuf) diff --git a/numba_cuda/numba/cuda/cudadrv/linkable_code.py b/numba_cuda/numba/cuda/cudadrv/linkable_code.py index 15aab3fe2..244a51741 100644 --- a/numba_cuda/numba/cuda/cudadrv/linkable_code.py +++ b/numba_cuda/numba/cuda/cudadrv/linkable_code.py @@ -87,5 +87,5 @@ class Object(LinkableCode): class LTOIR(LinkableCode): """An LTOIR file in memory.""" - kind = "ltoir" + kind = FILE_EXTENSION_MAP["ltoir"] default_name = "" diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_extending.py b/numba_cuda/numba/cuda/tests/cudapy/test_extending.py index 8d3028d0c..9f78ec851 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_extending.py @@ -1,6 +1,8 @@ from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase +from llvmlite import ir import numpy as np +import os from numba import config, cuda, njit, types @@ -160,5 +162,94 @@ def f(r, x): np.testing.assert_allclose(r, expected) +TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR") +if TEST_BIN_DIR: + test_device_functions_a = os.path.join( + TEST_BIN_DIR, "test_device_functions.a" + ) + test_device_functions_cubin = os.path.join( + TEST_BIN_DIR, "test_device_functions.cubin" + ) + test_device_functions_cu = os.path.join( + TEST_BIN_DIR, "test_device_functions.cu" + ) + test_device_functions_fatbin = os.path.join( + TEST_BIN_DIR, "test_device_functions.fatbin" + ) + test_device_functions_fatbin_multi = os.path.join( + TEST_BIN_DIR, "test_device_functions_multi.fatbin" + ) + test_device_functions_o = os.path.join( + TEST_BIN_DIR, "test_device_functions.o" + ) + test_device_functions_ptx = os.path.join( + TEST_BIN_DIR, "test_device_functions.ptx" + ) + test_device_functions_ltoir = os.path.join( + TEST_BIN_DIR, "test_device_functions.ltoir" + ) + + +class TestExtendingLinkage(CUDATestCase): + def test_extension_adds_linkable_code(self): + cuda_major_version = cuda.runtime.get_version()[0] + + if cuda_major_version < 12: + self.skipTest("CUDA 12 required for linking in-memory data") + + files = ( + (test_device_functions_a, cuda.Archive), + (test_device_functions_cubin, cuda.Cubin), + (test_device_functions_cu, cuda.CUSource), + (test_device_functions_fatbin, cuda.Fatbin), + (test_device_functions_o, cuda.Object), + (test_device_functions_ptx, cuda.PTXSource), + (test_device_functions_ltoir, cuda.LTOIR), + ) + + lto = config.CUDA_ENABLE_PYNVJITLINK + + for path, ctor in files: + if ctor == cuda.LTOIR and not lto: + # Don't try to test with LTOIR if LTO is not enabled + continue + + with open(path, "rb") as f: + code_object = ctor(f.read()) + + def external_add(x, y): + return x + y + + @type_callable(external_add) + def type_external_add(context): + def typer(x, y): + if x == types.uint32 and y == types.uint32: + return types.uint32 + + return typer + + @lower_builtin(external_add, types.uint32, types.uint32) + def lower_external_add(context, builder, sig, args): + context.active_code_library.add_linking_file(code_object) + i32 = ir.IntType(32) + fnty = ir.FunctionType(i32, [i32, i32]) + fn = cgutils.get_or_insert_function( + builder.module, fnty, "add_cabi" + ) + return builder.call(fn, args) + + @cuda.jit(lto=lto) + def use_external_add(r, x, y): + r[0] = external_add(x[0], y[0]) + + r = np.zeros(1, dtype=np.uint32) + x = np.ones(1, dtype=np.uint32) + y = np.ones(1, dtype=np.uint32) * 2 + + use_external_add[1, 1](r, x, y) + + np.testing.assert_equal(r[0], 3) + + if __name__ == "__main__": unittest.main() diff --git a/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu b/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu index f1499dc69..20c66f234 100644 --- a/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +++ b/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu @@ -17,3 +17,7 @@ extern "C" __device__ int add_from_numba(uint32_t *result, uint32_t a, *result = a + b; return 0; } + +extern "C" __device__ uint32_t add_cabi(uint32_t a, uint32_t b) { + return a + b; +}