Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion numba_cuda/numba/cuda/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -933,7 +933,12 @@ def compile_subroutine(
If *caching* evaluates True, the function keeps the compiled function
for reuse in *.cached_internal_func*.
"""
cache_key = (impl.__code__, sig, type(self.error_model))
cache_key = (
impl.__code__,
sig,
type(self.error_model),
self.enable_nrt,
)
if not caching:
cached = None
else:
Expand Down
31 changes: 31 additions & 0 deletions numba_cuda/numba/cuda/tests/nrt/test_nrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,37 @@ def foo():
self.assertEqual(stats.free, stats_free)
self.assertEqual(stats.mi_free, stats_mi_free)

def test_nrt_toggle_enabled(self):
def array_reshape1d(arr, newshape, got):
y = arr.reshape(newshape)
for i in range(y.shape[0]):
got[i] = y[i]

def array_reshape(arr, newshape):
return arr.reshape(newshape)

with override_config("CUDA_ENABLE_NRT", True):
# compile a kernel that caches an NRT enabled reshape primitive
@cuda.jit
def kernel(out):
out = out.reshape(out.shape)
out[0] = 1

out = cuda.to_device(np.zeros(1, dtype=np.float64))
kernel[1, 1](out)

with override_config("CUDA_ENABLE_NRT", False):
# compile and launch a new kernel that gets a cache hit on the
# NRT enabled reshape, but tries to launch with NRT disabled
# globally
new_kernel = cuda.jit(array_reshape1d)
arr = np.arange(24)
expected = array_reshape(arr, (24,))
got = np.zeros(expected.shape, dtype=arr.dtype)
new_kernel[1, 1](arr, (24,), got)

self.assertTrue(np.array_equal(expected, got))


if __name__ == "__main__":
unittest.main()
Loading