Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion ci/test_conda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ set -euo pipefail
. /opt/conda/etc/profile.d/conda.sh

if [ "${CUDA_VER%.*.*}" = "11" ]; then
CTK_PACKAGES="cudatoolkit"
CTK_PACKAGES="cudatoolkit=11"
else
CTK_PACKAGES="cuda-cccl cuda-nvcc-impl cuda-nvrtc libcurand-dev"
apt-get update
Expand Down Expand Up @@ -53,6 +53,28 @@ EXITCODE=0
trap "EXITCODE=1" ERR
set +e


GET_TEST_BINARY_DIR="
import numba_cuda
root = numba_cuda.__file__.rstrip('__init__.py')
test_dir = root + \"numba/cuda/tests/test_binary_generation/\"
print(test_dir)
"

CUDA_VER_MAJOR_MINOR=${CUDA_VER%.*}
if [ "${CUDA_VER_MAJOR_MINOR%.*}" == "11" ]
then
rapids-logger "Skipping test build for CUDA 11"
else
rapids-logger "Build tests"

export NUMBA_CUDA_TEST_BIN_DIR=$(python -c "$GET_TEST_BINARY_DIR")
pushd $NUMBA_CUDA_TEST_BIN_DIR
make
popd
fi


rapids-logger "Run Tests"
python -m numba.runtests numba.cuda.tests -v

Expand Down
25 changes: 24 additions & 1 deletion ci/test_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,37 @@

set -euo pipefail

CUDA_VER_MAJOR_MINOR=${CUDA_VER%.*}

rapids-logger "Install testing dependencies"
# TODO: Replace with rapids-dependency-file-generator
python -m pip install \
psutil \
cffi \
cuda-python \
"cuda-python==${CUDA_VER_MAJOR_MINOR%.*}.*" \
pytest


GET_TEST_BINARY_DIR="
import numba_cuda
root = numba_cuda.__file__.rstrip('__init__.py')
test_dir = root + \"numba/cuda/tests/test_binary_generation/\"
print(test_dir)
"

if [ "${CUDA_VER_MAJOR_MINOR%.*}" == "11" ]
then
rapids-logger "Skipping test build for CUDA 11"
else
rapids-logger "Build tests"

export NUMBA_CUDA_TEST_BIN_DIR=$(python -c "$GET_TEST_BINARY_DIR")
pushd $NUMBA_CUDA_TEST_BIN_DIR
make
popd
fi


rapids-logger "Install wheel"
package=$(realpath wheel/numba_cuda*.whl)
echo "Wheel path: $package"
Expand Down
112 changes: 99 additions & 13 deletions numba_cuda/numba/cuda/cudadrv/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -2797,6 +2797,10 @@ def add_cu(self, cu, name):
ptx_name = os.path.splitext(name)[0] + ".ptx"
self.add_ptx(ptx.encode(), ptx_name)

@abstractmethod
def add_data(self, data, kind, name):
"""Add in-memory data to the link"""

@abstractmethod
def add_file(self, path, kind):
"""Add code from a file to the link"""
Expand Down Expand Up @@ -2948,6 +2952,10 @@ def add_ptx(self, ptx, name="<cudapy-ptx>"):
except CubinLinkerError as e:
raise LinkerError from e

def add_data(self, data, kind, name):
msg = "Adding in-memory data unsupported in the MVC linker"
raise LinkerError(msg)

def add_file(self, path, kind):
try:
from cubinlinker import CubinLinkerError
Expand Down Expand Up @@ -3046,24 +3054,61 @@ def info_log(self):
def error_log(self):
return self.linker_errors_buf.value.decode("utf8")

def add_ptx(self, ptx, name="<cudapy-ptx>"):
ptxbuf = c_char_p(ptx)
namebuf = c_char_p(name.encode("utf8"))
self._keep_alive += [ptxbuf, namebuf]
def add_cubin(self, cubin, name="<unnamed-cubin>"):
return self._add_data(enums.CU_JIT_INPUT_CUBIN, cubin, name)

def add_ptx(self, ptx, name="<unnamed-ptx>"):
return self._add_data(enums.CU_JIT_INPUT_PTX, ptx, name)

def add_object(self, object_, name="<unnamed-object>"):
return self._add_data(enums.CU_JIT_INPUT_OBJECT, object_, name)

def add_fatbin(self, fatbin, name="<unnamed-fatbin>"):
return self._add_data(enums.CU_JIT_INPUT_FATBINARY, fatbin, name)

def add_library(self, library, name="<unnamed-library>"):
return self._add_data(enums.CU_JIT_INPUT_LIBRARY, library, name)

def _add_data(self, input_type, data, name):
data_buffer = c_char_p(data)
name_buffer = c_char_p(name.encode("utf8"))
self._keep_alive += [data_buffer, name_buffer]
try:
driver.cuLinkAddData(
self.handle,
enums.CU_JIT_INPUT_PTX,
ptxbuf,
len(ptx),
namebuf,
input_type,
data_buffer,
len(data),
name_buffer,
0,
None,
None,
)
except CudaAPIError as e:
raise LinkerError("%s\n%s" % (e, self.error_log))

def add_data(self, data, kind, name=None):
# We pass the name as **kwargs to ensure the default name for the input
# type is used if none is supplied
kws = {}
if name is not None:
kws["name"] = name

if kind == FILE_EXTENSION_MAP["cubin"]:
self.add_cubin(data, **kws)
elif kind == FILE_EXTENSION_MAP["fatbin"]:
self.add_fatbin(data, **kws)
elif kind == FILE_EXTENSION_MAP["a"]:
self.add_library(data, **kws)
elif kind == FILE_EXTENSION_MAP["ptx"]:
self.add_ptx(data, **kws)
elif kind == FILE_EXTENSION_MAP["o"]:
self.add_object(data, **kws)
elif kind == FILE_EXTENSION_MAP["ltoir"]:
raise LinkerError("Ctypes linker cannot link LTO-IR")
else:
raise LinkerError(f"Don't know how to link {kind}")

def add_file(self, path, kind):
pathbuf = c_char_p(path.encode("utf8"))
self._keep_alive.append(pathbuf)
Expand Down Expand Up @@ -3151,17 +3196,58 @@ def info_log(self):
def error_log(self):
return self.linker_errors_buf.decode("utf8")

def add_ptx(self, ptx, name="<cudapy-ptx>"):
namebuf = name.encode("utf8")
self._keep_alive += [ptx, namebuf]
def add_cubin(self, cubin, name="<unnamed-cubin>"):
input_type = binding.CUjitInputType.CU_JIT_INPUT_CUBIN
return self._add_data(input_type, cubin, name)

def add_ptx(self, ptx, name="<unnamed-ptx>"):
input_type = binding.CUjitInputType.CU_JIT_INPUT_PTX
return self._add_data(input_type, ptx, name)

def add_object(self, object_, name="<unnamed-object>"):
input_type = binding.CUjitInputType.CU_JIT_INPUT_OBJECT
return self._add_data(input_type, object_, name)

def add_fatbin(self, fatbin, name="<unnamed-fatbin>"):
input_type = binding.CUjitInputType.CU_JIT_INPUT_FATBINARY
return self._add_data(input_type, fatbin, name)

def add_library(self, library, name="<unnamed-library>"):
input_type = binding.CUjitInputType.CU_JIT_INPUT_LIBRARY
return self._add_data(input_type, library, name)

def _add_data(self, input_type, data, name):
name_buffer = name.encode("utf8")
self._keep_alive += [data, name_buffer]
try:
input_ptx = binding.CUjitInputType.CU_JIT_INPUT_PTX
driver.cuLinkAddData(
self.handle, input_ptx, ptx, len(ptx), namebuf, 0, [], []
self.handle, input_type, data, len(data), name_buffer, 0, [], []
)
except CudaAPIError as e:
raise LinkerError("%s\n%s" % (e, self.error_log))

def add_data(self, data, kind, name=None):
# We pass the name as **kwargs to ensure the default name for the input
# type is used if none is supplied
kws = {}
if name is not None:
kws["name"] = name

if kind == FILE_EXTENSION_MAP["cubin"]:
self.add_cubin(data, **kws)
elif kind == FILE_EXTENSION_MAP["fatbin"]:
self.add_fatbin(data, **kws)
elif kind == FILE_EXTENSION_MAP["a"]:
self.add_library(data, **kws)
elif kind == FILE_EXTENSION_MAP["ptx"]:
self.add_ptx(data, **kws)
elif kind == FILE_EXTENSION_MAP["o"]:
self.add_object(data, **kws)
elif kind == FILE_EXTENSION_MAP["ltoir"]:
raise LinkerError("CudaPythonLinker cannot link LTO-IR")
else:
raise LinkerError(f"Don't know how to link {kind}")

def add_file(self, path, kind):
pathbuf = path.encode("utf8")
self._keep_alive.append(pathbuf)
Expand Down
2 changes: 1 addition & 1 deletion numba_cuda/numba/cuda/cudadrv/linkable_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,5 +87,5 @@ class Object(LinkableCode):
class LTOIR(LinkableCode):
"""An LTOIR file in memory."""

kind = "ltoir"
kind = FILE_EXTENSION_MAP["ltoir"]
default_name = "<unnamed-ltoir>"
91 changes: 91 additions & 0 deletions numba_cuda/numba/cuda/tests/cudapy/test_extending.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
from llvmlite import ir

import numpy as np
import os
from numba import config, cuda, njit, types


Expand Down Expand Up @@ -160,5 +162,94 @@ def f(r, x):
np.testing.assert_allclose(r, expected)


TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
if TEST_BIN_DIR:
test_device_functions_a = os.path.join(
TEST_BIN_DIR, "test_device_functions.a"
)
test_device_functions_cubin = os.path.join(
TEST_BIN_DIR, "test_device_functions.cubin"
)
test_device_functions_cu = os.path.join(
TEST_BIN_DIR, "test_device_functions.cu"
)
test_device_functions_fatbin = os.path.join(
TEST_BIN_DIR, "test_device_functions.fatbin"
)
test_device_functions_fatbin_multi = os.path.join(
TEST_BIN_DIR, "test_device_functions_multi.fatbin"
)
test_device_functions_o = os.path.join(
TEST_BIN_DIR, "test_device_functions.o"
)
test_device_functions_ptx = os.path.join(
TEST_BIN_DIR, "test_device_functions.ptx"
)
test_device_functions_ltoir = os.path.join(
TEST_BIN_DIR, "test_device_functions.ltoir"
)


class TestExtendingLinkage(CUDATestCase):
def test_extension_adds_linkable_code(self):
cuda_major_version = cuda.runtime.get_version()[0]

if cuda_major_version < 12:
self.skipTest("CUDA 12 required for linking in-memory data")

files = (
(test_device_functions_a, cuda.Archive),
(test_device_functions_cubin, cuda.Cubin),
(test_device_functions_cu, cuda.CUSource),
(test_device_functions_fatbin, cuda.Fatbin),
(test_device_functions_o, cuda.Object),
(test_device_functions_ptx, cuda.PTXSource),
(test_device_functions_ltoir, cuda.LTOIR),
)
Comment on lines +200 to +208
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Side question, do we want to add pytest to numba-cuda?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes - I haven't had time to do anything about it though.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can help with that next week


lto = config.CUDA_ENABLE_PYNVJITLINK

for path, ctor in files:
if ctor == cuda.LTOIR and not lto:
# Don't try to test with LTOIR if LTO is not enabled
continue

with open(path, "rb") as f:
code_object = ctor(f.read())

def external_add(x, y):
return x + y

@type_callable(external_add)
def type_external_add(context):
def typer(x, y):
if x == types.uint32 and y == types.uint32:
return types.uint32

return typer

@lower_builtin(external_add, types.uint32, types.uint32)
def lower_external_add(context, builder, sig, args):
context.active_code_library.add_linking_file(code_object)
i32 = ir.IntType(32)
fnty = ir.FunctionType(i32, [i32, i32])
fn = cgutils.get_or_insert_function(
builder.module, fnty, "add_cabi"
)
return builder.call(fn, args)

@cuda.jit(lto=lto)
def use_external_add(r, x, y):
r[0] = external_add(x[0], y[0])

r = np.zeros(1, dtype=np.uint32)
x = np.ones(1, dtype=np.uint32)
y = np.ones(1, dtype=np.uint32) * 2

use_external_add[1, 1](r, x, y)

np.testing.assert_equal(r[0], 3)


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@ extern "C" __device__ int add_from_numba(uint32_t *result, uint32_t a,
*result = a + b;
return 0;
}

extern "C" __device__ uint32_t add_cabi(uint32_t a, uint32_t b) {
return a + b;
}