Skip to content
2 changes: 1 addition & 1 deletion docs/source/user/ufunc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ All CUDA ufunc kernels have the ability to call other CUDA device functions::
from numba import vectorize, cuda

# define a device function
@cuda.jit('float32(float32, float32, float32)', device=True, inline=True)
@cuda.jit('float32(float32, float32, float32)', device=True, inline="always")
def cu_device_fn(x, y, z):
return x ** y / z

Expand Down
14 changes: 13 additions & 1 deletion numba_cuda/numba/cuda/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
def jit(
func_or_sig=None,
device=False,
inline=False,
inline="never",
link=[],
debug=None,
opt=None,
Expand Down Expand Up @@ -81,6 +81,15 @@ def jit(
msg = _msg_deprecated_signature_arg.format("bind")
raise DeprecationError(msg)

if isinstance(inline, bool):
DeprecationWarning(
"Passing bool to inline argument is deprecated, please refer to "
"Numba's documentation on inlining: "
"https://numba.readthedocs.io/en/stable/developer/inlining.html"
)

inline = "always" if inline else "never"

debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
opt = (config.OPT != 0) if opt is None else opt
fastmath = kws.get("fastmath", False)
Expand Down Expand Up @@ -130,6 +139,7 @@ def _jit(func):
targetoptions["opt"] = opt
targetoptions["fastmath"] = fastmath
targetoptions["device"] = device
targetoptions["inline"] = inline
targetoptions["extensions"] = extensions

disp = CUDADispatcher(func, targetoptions=targetoptions)
Expand Down Expand Up @@ -171,6 +181,7 @@ def autojitwrapper(func):
return jit(
func,
device=device,
inline=inline,
debug=debug,
opt=opt,
lineinfo=lineinfo,
Expand All @@ -194,6 +205,7 @@ def autojitwrapper(func):
targetoptions["link"] = link
targetoptions["fastmath"] = fastmath
targetoptions["device"] = device
targetoptions["inline"] = inline
targetoptions["extensions"] = extensions
disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions)

Expand Down
2 changes: 1 addition & 1 deletion numba_cuda/numba/cuda/tests/cudapy/test_array_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

class TestCudaArrayArg(CUDATestCase):
def test_array_ary(self):
@cuda.jit("double(double[:],int64)", device=True, inline=True)
@cuda.jit("double(double[:],int64)", device=True, inline="always")
def device_function(a, c):
return a[c]

Expand Down
2 changes: 1 addition & 1 deletion numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def test_blackscholes(self):
VOLATILITY,
)

@cuda.jit(double(double), device=True, inline=True)
@cuda.jit(double(double), device=True, inline="always")
def cnd_cuda(d):
K = 1.0 / (1.0 + 0.2316419 * math.fabs(d))
ret_val = (
Expand Down
59 changes: 59 additions & 0 deletions numba_cuda/numba/cuda/tests/cudapy/test_inline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import re
import numpy as np
from numba import cuda, types
from numba.cuda.testing import (
unittest,
CUDATestCase,
skip_on_cudasim,
)


class TestCudaInline(CUDATestCase):
@skip_on_cudasim("Cudasim does not support inline")
def _test_call_inline(self, inline):
"""Test @cuda.jit(inline=...)"""
a = np.ones(2, dtype=np.int32)

sig = (types.int32[::1],)

@cuda.jit(inline=inline)
def set_zero(a):
a[0] = 0

@cuda.jit(sig)
def call_set_zero(a):
set_zero(a)

call_set_zero[1, 2](a)

expected = np.arange(2, dtype=np.int32)
self.assertTrue(np.all(a == expected))

llvm_ir = call_set_zero.inspect_llvm(sig)
pat = r"call [a-zA-Z0-9]* @"
match = re.compile(pat).search(llvm_ir)

if inline == "always" or inline is True:
# check that call was inlined
self.assertIsNone(match, msg=llvm_ir)
else:
assert inline == "never" or inline is False

# check that call was not inlined
self.assertIsNotNone(match, msg=llvm_ir)

def test_call_inline_always(self):
self._test_call_inline("always")

def test_call_inline_never(self):
self._test_call_inline("never")

def test_call_inline_true(self):
self._test_call_inline(True)

def test_call_inline_false(self):
self._test_call_inline(False)


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

class TestCudaLaplace(CUDATestCase):
def test_laplace_small(self):
@cuda.jit(float64(float64, float64), device=True, inline=True)
@cuda.jit(float64(float64, float64), device=True, inline="always")
def get_max(a, b):
if a > b:
return a
Expand Down
2 changes: 1 addition & 1 deletion numba_cuda/numba/cuda/vectorizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def __vectorized_{name}({args}, __out__):

class CUDAVectorize(deviceufunc.DeviceVectorize):
def _compile_core(self, sig):
cudevfn = cuda.jit(sig, device=True, inline=True)(self.pyfunc)
cudevfn = cuda.jit(sig, device=True, inline="always")(self.pyfunc)
return cudevfn, cudevfn.overloads[sig.args].signature.return_type

def _get_globals(self, corefn):
Expand Down