From 13395d4af1173c872288792fe4579676c9f8aa1f Mon Sep 17 00:00:00 2001 From: Atmn Patel Date: Mon, 29 Sep 2025 09:21:08 -0700 Subject: [PATCH 1/3] [Testing] Add some numpy array testing for the CUDA target --- numba_cuda/numba/cuda/__init__.py | 6 + numba_cuda/numba/cuda/np/arrayobj.py | 6 +- numba_cuda/numba/cuda/simulator/api.py | 4 + numba_cuda/numba/cuda/target.py | 9 +- .../numba/cuda/tests/test_array_attr.py | 424 ++++++++++++ .../numba/cuda/tests/test_array_constants.py | 275 ++++++++ .../numba/cuda/tests/test_array_iterators.py | 638 ++++++++++++++++++ numba_cuda/numba/cuda/typing/context.py | 8 +- numba_cuda/numba/cuda/ufuncs.py | 8 +- 9 files changed, 1368 insertions(+), 10 deletions(-) create mode 100644 numba_cuda/numba/cuda/tests/test_array_attr.py create mode 100644 numba_cuda/numba/cuda/tests/test_array_constants.py create mode 100644 numba_cuda/numba/cuda/tests/test_array_iterators.py diff --git a/numba_cuda/numba/cuda/__init__.py b/numba_cuda/numba/cuda/__init__.py index 64dd06e65..8ff7391a8 100644 --- a/numba_cuda/numba/cuda/__init__.py +++ b/numba_cuda/numba/cuda/__init__.py @@ -60,3 +60,9 @@ ) from numba.cuda.np.ufunc import vectorize, guvectorize + +# Re-export typeof +from numba.cuda.misc.special import ( + literally, + literal_unroll, +) diff --git a/numba_cuda/numba/cuda/np/arrayobj.py b/numba_cuda/numba/cuda/np/arrayobj.py index 3e06d84cd..303376f72 100644 --- a/numba_cuda/numba/cuda/np/arrayobj.py +++ b/numba_cuda/numba/cuda/np/arrayobj.py @@ -16,9 +16,9 @@ import numpy as np -from numba import pndindex, literal_unroll -from numba.core import types, errors -from numba.cuda import typing +from numba import pndindex +from numba.cuda import literal_unroll +from numba.core import types, typing, errors from numba.cuda import cgutils, extending from numba.cuda.np.numpy_support import ( as_dtype, diff --git a/numba_cuda/numba/cuda/simulator/api.py b/numba_cuda/numba/cuda/simulator/api.py index 9d22d2845..c54aadef7 100644 --- a/numba_cuda/numba/cuda/simulator/api.py +++ b/numba_cuda/numba/cuda/simulator/api.py @@ -161,3 +161,7 @@ def jitwrapper(fn): def defer_cleanup(): # No effect for simulator yield + + +class grid(object): + pass diff --git a/numba_cuda/numba/cuda/target.py b/numba_cuda/numba/cuda/target.py index 31e5f0f18..5caa07595 100644 --- a/numba_cuda/numba/cuda/target.py +++ b/numba_cuda/numba/cuda/target.py @@ -36,7 +36,7 @@ # Typing -class CUDATypingContext(typing.BaseContext): +class CUDATypingContext(typing.Context): def load_additional_registries(self): from . import ( cudadecl, @@ -46,7 +46,7 @@ def load_additional_registries(self): libdevicedecl, vector_types, ) - from numba.cuda.typing import enumdecl, cffi_utils + from numba.cuda.typing import enumdecl, cffi_utils, npydecl self.install_registry(cudadecl.registry) self.install_registry(cffi_utils.registry) @@ -57,6 +57,7 @@ def load_additional_registries(self): self.install_registry(vector_types.typing_registry) self.install_registry(fp16.typing_registry) self.install_registry(bf16.typing_registry) + self.install_registry(npydecl.registry) def resolve_value_type(self, val): # treat other dispatcher object as another device function @@ -182,6 +183,8 @@ def load_additional_registries(self): arrayobj, npdatetime, polynomial, + arraymath, + npyimpl, ) from . import ( cudaimpl, @@ -222,6 +225,8 @@ def load_additional_registries(self): self.install_registry(polynomial.registry) self.install_registry(npdatetime.registry) self.install_registry(arrayobj.registry) + self.install_registry(arraymath.registry) + self.install_registry(npyimpl.registry) # Install only implementations that are defined outside of numba (i.e., # in third-party extensions) from Numba's builtin_registry. diff --git a/numba_cuda/numba/cuda/tests/test_array_attr.py b/numba_cuda/numba/cuda/tests/test_array_attr.py new file mode 100644 index 000000000..8f52a7c4e --- /dev/null +++ b/numba_cuda/numba/cuda/tests/test_array_attr.py @@ -0,0 +1,424 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: BSD-2-Clause + +import numpy as np + +import unittest +from numba.cuda.np.numpy_support import from_dtype +from numba import typeof +from numba.cuda import jit +from numba.core import types +from numba.cuda.tests.support import MemoryLeakMixin +from numba.cuda.testing import CUDATestCase +from numba.core.errors import TypingError +from numba.cuda.tests.support import override_config + + +def array_itemsize(a, res): + res[0] = a.itemsize + + +def array_nbytes(a, res): + res[0] = a.nbytes + + +def array_shape(a, i, res): + res[0] = a.shape[i] + + +def array_strides(a, i, res): + res[0] = a.strides[i] + + +def array_ndim(a, res): + res[0] = a.ndim + + +def array_size(a, res): + res[0] = a.size + + +def array_flags_contiguous(a, res): + res[0] = a.flags.contiguous + + +def array_flags_c_contiguous(a, res): + res[0] = a.flags.c_contiguous + + +def array_flags_f_contiguous(a, res): + res[0] = a.flags.f_contiguous + + +def nested_array_itemsize(a, res): + res[0] = a.f.itemsize + + +def nested_array_nbytes(a, res): + res[0] = a.f.nbytes + + +def nested_array_shape(a, res): + res[0] = a.f.shape[0] + res[1] = a.f.shape[1] + + +def nested_array_strides(a, res): + res[0] = a.f.strides[0] + res[1] = a.f.strides[1] + + +def nested_array_ndim(a, res): + res[0] = a.f.ndim + + +def nested_array_size(a, res): + res[0] = a.f.size + + +def size_after_slicing_usecase(buf, i, res): + sliced = buf[i] + # Make sure size attribute is not lost + res[0] = sliced.size + + +def array_real(arr, res): + if arr.ndim == 1: + for i in range(arr.shape[0]): + res[i] = arr.real[i] + else: + for i in range(arr.shape[0]): + for j in range(arr.shape[1]): + res[i, j] = arr.real[i, j] + + +def array_imag(arr, res): + if arr.ndim == 1: + for i in range(arr.shape[0]): + res[i] = arr.imag[i] + else: + for i in range(arr.shape[0]): + for j in range(arr.shape[1]): + res[i, j] = arr.imag[i, j] + + +class TestArrayAttr(MemoryLeakMixin, CUDATestCase): + def setUp(self): + super(TestArrayAttr, self).setUp() + self.a = np.arange(20, dtype=np.int32).reshape(4, 5) + + def check_unary(self, pyfunc, arr): + out = np.zeros(1) + aryty = typeof(arr) + cfunc = self.get_cfunc(pyfunc, (aryty, typeof(out))) + cout = np.zeros(1) + pyfunc(arr, out) + cfunc[1, 1](arr, cout) + self.assertPreciseEqual(out[0], cout[0]) + # Retry with forced any layout + cfunc = self.get_cfunc(pyfunc, (aryty.copy(layout="A"), typeof(out))) + cout = np.zeros(1) + cfunc[1, 1](arr, cout) + self.assertPreciseEqual(cout[0], out[0]) + + def check_unary_with_arrays( + self, + pyfunc, + ): + self.check_unary(pyfunc, self.a) + self.check_unary(pyfunc, self.a.T) + # 0-d array + arr = np.array([42]).reshape(()) + self.check_unary(pyfunc, arr) + # array with an empty dimension + arr = np.zeros(0) + self.check_unary(pyfunc, arr) + + # check with reshape + self.check_unary(pyfunc, arr.reshape((1, 0, 2))) + + def get_cfunc(self, pyfunc, argspec): + return jit(argspec)(pyfunc) + + def test_shape(self): + pyfunc = array_shape + cfunc = self.get_cfunc( + pyfunc, (types.int32[:, :], types.int32, types.float64[:]) + ) + + for i in range(self.a.ndim): + out = np.zeros(1) + cout = np.zeros(1) + pyfunc(self.a, i, out) + cfunc[1, 1](self.a, i, cout) + self.assertEqual(out[0], cout[0]) + + def test_strides(self): + pyfunc = array_strides + cfunc = self.get_cfunc( + pyfunc, (types.int32[:, :], types.int32, types.float64[:]) + ) + + for i in range(self.a.ndim): + out = np.zeros(1) + cout = np.zeros(1) + pyfunc(self.a, i, out) + cfunc[1, 1](self.a, i, cout) + self.assertEqual(out[0], cout[0]) + + def test_ndim(self): + self.check_unary_with_arrays(array_ndim) + + def test_size(self): + self.check_unary_with_arrays(array_size) + + def test_itemsize(self): + self.check_unary_with_arrays(array_itemsize) + + def test_nbytes(self): + self.check_unary_with_arrays(array_nbytes) + + def test_flags_contiguous(self): + with override_config("CUDA_ENABLE_NRT", True): + self.check_unary_with_arrays(array_flags_contiguous) + + def test_flags_c_contiguous(self): + with override_config("CUDA_ENABLE_NRT", True): + self.check_unary_with_arrays(array_flags_c_contiguous) + + def test_flags_f_contiguous(self): + with override_config("CUDA_ENABLE_NRT", True): + self.check_unary_with_arrays(array_flags_f_contiguous) + + +class TestNestedArrayAttr(MemoryLeakMixin, CUDATestCase): + def setUp(self): + super(TestNestedArrayAttr, self).setUp() + dtype = np.dtype([("a", np.int32), ("f", np.int32, (2, 5))]) + self.a = np.recarray(1, dtype)[0] + self.nbrecord = from_dtype(self.a.dtype) + + def get_cfunc(self, pyfunc): + return jit((self.nbrecord, types.float64[:]))(pyfunc) + + def test_shape(self): + pyfunc = nested_array_shape + cfunc = self.get_cfunc(pyfunc) + + out = np.zeros(2) + cout = np.zeros(2) + pyfunc(self.a, out) + cfunc[1, 1](self.a, cout) + self.assertEqual(out[0], cout[0]) + self.assertEqual(out[1], cout[1]) + + def test_strides(self): + pyfunc = nested_array_strides + cfunc = self.get_cfunc(pyfunc) + + out = np.zeros(2) + cout = np.zeros(2) + pyfunc(self.a, out) + cfunc[1, 1](self.a, cout) + self.assertEqual(out[0], cout[0]) + self.assertEqual(out[1], cout[1]) + + def test_ndim(self): + pyfunc = nested_array_ndim + cfunc = self.get_cfunc(pyfunc) + + out = np.zeros(1) + cout = np.zeros(1) + pyfunc(self.a, out) + cfunc[1, 1](self.a, cout) + self.assertEqual(out[0], cout[0]) + + def test_nbytes(self): + pyfunc = nested_array_nbytes + cfunc = self.get_cfunc(pyfunc) + + out = np.zeros(1) + cout = np.zeros(1) + pyfunc(self.a, out) + cfunc[1, 1](self.a, cout) + self.assertEqual(out[0], cout[0]) + + def test_size(self): + pyfunc = nested_array_size + cfunc = self.get_cfunc(pyfunc) + + out = np.zeros(1) + cout = np.zeros(1) + pyfunc(self.a, out) + cfunc[1, 1](self.a, cout) + self.assertEqual(out[0], cout[0]) + + def test_itemsize(self): + pyfunc = nested_array_itemsize + cfunc = self.get_cfunc(pyfunc) + + out = np.zeros(1) + cout = np.zeros(1) + pyfunc(self.a, out) + cfunc[1, 1](self.a, cout) + self.assertEqual(out[0], cout[0]) + + +class TestSlicedArrayAttr(MemoryLeakMixin, CUDATestCase): + def test_size_after_slicing(self): + pyfunc = size_after_slicing_usecase + cfunc = jit(pyfunc) + arr = np.arange(2 * 5).reshape(2, 5) + for i in range(arr.shape[0]): + out = np.zeros(1) + cout = np.zeros(1) + pyfunc(arr, i, out) + cfunc[1, 1](arr, i, cout) + self.assertEqual(out[0], cout[0]) + arr = np.arange(2 * 5 * 3).reshape(2, 5, 3) + for i in range(arr.shape[0]): + out = np.zeros(1) + cout = np.zeros(1) + pyfunc(arr, i, out) + cfunc[1, 1](arr, i, cout) + self.assertEqual(out[0], cout[0]) + + +class TestRealImagAttr(MemoryLeakMixin, CUDATestCase): + def setUp(self): + override_config("CUDA_ENABLE_NRT", True) + super(TestRealImagAttr, self).setUp() + + def check_complex(self, pyfunc): + cfunc = jit(pyfunc) + # test 1D + size = 10 + arr = np.arange(size) + np.arange(size) * 10j + out = np.zeros(arr.shape) + cout = np.zeros(arr.shape) + pyfunc(arr, out) + cfunc[1, 1](arr, cout) + self.assertPreciseEqual(out, cout) + # test 2D + arr = arr.reshape(2, 5) + out = np.zeros(arr.shape) + cout = np.zeros(arr.shape) + pyfunc(arr, out) + cfunc[1, 1](arr, cout) + self.assertPreciseEqual(out, cout) + + def test_complex_real(self): + self.check_complex(array_real) + + def test_complex_imag(self): + self.check_complex(array_imag) + + def check_number_real(self, dtype): + pyfunc = array_real + cfunc = jit(pyfunc) + # test 1D + size = 10 + arr = np.arange(size, dtype=dtype) + out = np.zeros(arr.shape) + cout = np.zeros(arr.shape) + pyfunc(arr, out) + cfunc[1, 1](arr, cout) + self.assertPreciseEqual(out, cout) + # test 2D + arr = arr.reshape(2, 5) + out = np.zeros(arr.shape) + cout = np.zeros(arr.shape) + pyfunc(arr, out) + cfunc[1, 1](arr, cout) + self.assertPreciseEqual(out, cout) + # test identity + out = np.zeros(arr.shape) + cout = np.zeros(arr.shape) + pyfunc(arr, out) + cfunc[1, 1](arr, cout) + self.assertEqual(arr.data, out.data) + self.assertEqual(arr.data, cout.data) + # test writable + out = np.zeros(arr.shape) + cout = np.zeros(arr.shape) + cfunc[1, 1](arr, cout) + self.assertNotEqual(cout[0, 0], 5) + cout[0, 0] = 5 + self.assertEqual(cout[0, 0], 5) + + def test_number_real(self): + """ + Testing .real of non-complex dtypes + """ + for dtype in [np.uint8, np.int32, np.float32, np.float64]: + self.check_number_real(dtype) + + def check_number_imag(self, dtype): + pyfunc = array_imag + cfunc = jit(pyfunc) + # test 1D + size = 10 + arr = np.arange(size, dtype=dtype) + out = np.zeros(arr.shape) + cout = np.zeros(arr.shape) + pyfunc(arr, out) + cfunc[1, 1](arr, cout) + self.assertPreciseEqual(out, cout) + # test 2D + arr = arr.reshape(2, 5) + out = np.zeros(arr.shape) + cout = np.zeros(arr.shape) + pyfunc(arr, out) + cfunc[1, 1](arr, cout) + self.assertPreciseEqual(out, cout) + # test are zeros + cout = np.zeros(arr.shape) + cfunc[1, 1](arr, cout) + self.assertEqual(cout.tolist(), np.zeros_like(arr).tolist()) + + def test_number_imag(self): + """ + Testing .imag of non-complex dtypes + """ + with override_config("CUDA_ENABLE_NRT", True): + for dtype in [np.uint8, np.int32, np.float32, np.float64]: + self.check_number_imag(dtype) + + def test_record_real(self): + rectyp = np.dtype([("real", np.float32), ("imag", np.complex64)]) + arr = np.zeros(3, dtype=rectyp) + arr["real"] = np.random.random(arr.size) + arr["imag"] = np.random.random(arr.size) * 1.3j + + # check numpy behavior + # .real is identity + out = np.zeros(arr.shape, dtype=arr.dtype) + array_real(arr, out) + self.assertPreciseEqual(out, arr) + # .imag is zero_like + out = np.zeros(arr.shape, dtype=arr.dtype) + array_imag(arr, out) + self.assertEqual(out.tolist(), np.zeros_like(arr).tolist()) + + # check numba behavior + # it's most likely a user error, anyway + jit_array_real = jit(array_real) + jit_array_imag = jit(array_imag) + + cout = np.zeros(arr.shape, dtype=arr.dtype) + with self.assertRaises(TypingError) as raises: + jit_array_real[1, 1](arr, cout) + self.assertIn( + "cannot access .real of array of Record", str(raises.exception) + ) + + cout = np.zeros(arr.shape, dtype=arr.dtype) + with self.assertRaises(TypingError) as raises: + jit_array_imag[1, 1](arr, cout) + self.assertIn( + "cannot access .imag of array of Record", str(raises.exception) + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/numba_cuda/numba/cuda/tests/test_array_constants.py b/numba_cuda/numba/cuda/tests/test_array_constants.py new file mode 100644 index 000000000..e5a927e52 --- /dev/null +++ b/numba_cuda/numba/cuda/tests/test_array_constants.py @@ -0,0 +1,275 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: BSD-2-Clause + +import numpy as np + +import unittest +from numba.cuda import jit, grid, to_device, device_array +from numba.cuda.testing import CUDATestCase, skip_on_cudasim + + +a0 = np.array(42) + +s1 = np.int32(64) + +a1 = np.arange(12) +a2 = a1[::2] +a3 = a1.reshape((3, 4)).T + +dt = np.dtype([("x", np.int8), ("y", "S3")]) + +a4 = np.arange(32, dtype=np.int8).view(dt) +a5 = a4[::-2] + +# A recognizable data string +a6 = np.frombuffer(b"XXXX_array_contents_XXXX", dtype=np.float32) + + +myarray = np.array( + [ + 1, + ] +) + + +@jit +def getitem0_kernel(input_array, output_array, size): + i = grid(1) + if i < size: + output_array[i] = input_array[()] + + +@jit +def getitem1_kernel(input_array, output_array, size): + i = grid(1) + if i < size: + output_array[i] = input_array[i] + + +@jit +def getitem2_kernel(input_array, output_array, size): + i = grid(1) + if i < size: + output_array[i] = input_array[i] + + +@jit +def getitem3_kernel(input_array, output_array, size): + i = grid(1) + if i < size: + # For 2D arrays, flatten the indexing + if input_array.ndim > 1: + # Calculate 2D indices from flat index + flat_idx = i + if flat_idx < input_array.size: + row = flat_idx // input_array.shape[1] + col = flat_idx % input_array.shape[1] + output_array[i] = input_array[row, col] + else: + output_array[i] = input_array[i] + + +@jit +def getitem4_kernel(input_array, output_array, size): + i = grid(1) + if i < size: + output_array[i] = input_array[i] + + +@jit +def getitem5_kernel(input_array, output_array, size): + i = grid(1) + if i < size: + output_array[i] = input_array[i] + + +@jit +def getitem6_kernel(input_array, output_array, size): + i = grid(1) + if i < size: + output_array[i] = input_array[i] + + +@jit +def use_arrayscalar_const_kernel(output_array, size): + i = grid(1) + if i < size: + output_array[i] = s1 + + +@jit +def write_to_global_array_kernel(global_array): + i = grid(1) + if i < 1: + global_array[0] = 1 + + +@jit +def bytes_as_const_array_kernel(output_array, size): + i = grid(1) + if i < size: + # Use hardcoded bytes values instead of frombuffer + # "foo" as uint8 values: f=102, o=111, o=111 + if i % 3 == 0: + output_array[i] = 102 # 'f' + elif i % 3 == 1: + output_array[i] = 111 # 'o' + else: + output_array[i] = 111 # 'o' + + +@skip_on_cudasim +class TestConstantArray(CUDATestCase): + """ + Test array constants. + """ + + def check_array_const(self, kernel_func, input_array, expected_size): + # Convert input array to device (make contiguous if needed) + if ( + not input_array.flags["C_CONTIGUOUS"] + and not input_array.flags["F_CONTIGUOUS"] + ): + input_array = np.ascontiguousarray(input_array) + d_input = to_device(input_array) + d_output = device_array(expected_size, dtype=input_array.dtype) + + # Launch kernel + kernel_func[1, expected_size](d_input, d_output, expected_size) + + # Get result + result = d_output.copy_to_host() + + # Verify result matches expected values + for i in range(expected_size): + if input_array.ndim == 0: + # For 0D arrays, all results should be the same + expected_val = input_array[()] + np.testing.assert_array_equal(result[i], expected_val) + elif i < input_array.size: + if input_array.ndim == 1: + expected_val = input_array[i] + else: + # For multi-dimensional arrays, flatten the indexing + flat_idx = i + if flat_idx < input_array.size: + expected_val = input_array.flat[flat_idx] + else: + continue + np.testing.assert_array_equal(result[i], expected_val) + + def test_array_const_0d(self): + self.check_array_const(getitem0_kernel, a0, 3) + + def test_array_const_1d_contig(self): + self.check_array_const(getitem1_kernel, a1, 3) + + def test_array_const_1d_noncontig(self): + self.check_array_const(getitem2_kernel, a2, 3) + + def test_array_const_2d(self): + self.check_array_const(getitem3_kernel, a3, 3) + + def test_record_array_const_contig(self): + self.check_array_const(getitem4_kernel, a4, 3) + + def test_record_array_const_noncontig(self): + self.check_array_const(getitem5_kernel, a5, 3) + + def test_array_const_alignment(self): + """ + Issue #1933: the array declaration in the LLVM IR must have + the right alignment specified. + """ + # Test the kernel with the alignment array + self.check_array_const(getitem6_kernel, a6, 3) + + def test_arrayscalar_const(self): + # Test arrayscalar constant in CUDA kernel + d_output = device_array(1, dtype=np.int32) + use_arrayscalar_const_kernel[1, 1](d_output, 1) + result = d_output.copy_to_host() + self.assertEqual(result[0], s1) + + def test_write_to_global_array(self): + # Test that writing to global array works in CUDA + d_myarray = to_device(myarray.copy()) # Make a writable copy + write_to_global_array_kernel[1, 1](d_myarray) + # Copy back to host and verify the global array was modified + result = d_myarray.copy_to_host() + self.assertEqual(result[0], 1) + + def test_issue_1850(self): + """ + This issue is caused by an unresolved bug in numpy since version 1.6. + See numpy GH issue #3147. + """ + constarr = np.array([86]) + + @jit + def issue_1850_kernel(output_array, size): + i = grid(1) + if i < size: + output_array[i] = constarr[0] + + d_output = device_array(1, dtype=np.int32) + issue_1850_kernel[1, 1](d_output, 1) + out = d_output.copy_to_host() + self.assertEqual(out[0], 86) + + def test_too_big_to_freeze(self): + """ + Test issue https://github.com/numba/numba/issues/2188 where freezing + a constant array into the code that's prohibitively long and consumes + too much RAM. + """ + nelem = 10**4 # Reduced size for CUDA testing + + @jit + def big_array_kernel(input_array, output_array, size): + i = grid(1) + if i < size: + if input_array.ndim == 1: + output_array[i] = input_array[i] + else: + # For multi-dimensional arrays, flatten the indexing + flat_idx = i + if flat_idx < input_array.size: + output_array[i] = input_array.flat[flat_idx] + + c_array = np.arange(nelem).reshape(nelem) + f_array = np.asfortranarray(np.random.random((2, nelem // 2))) + + # Test C contig + d_input = to_device(c_array) + d_output = device_array(nelem, dtype=c_array.dtype) + # Use proper block size for CUDA (max 1024 threads per block) + block_size = min(nelem, 1024) + grid_size = (nelem + block_size - 1) // block_size + big_array_kernel[grid_size, block_size](d_input, d_output, nelem) + result = d_output.copy_to_host() + np.testing.assert_array_equal(c_array, result) + + # Test F contig + d_input = to_device(f_array) + d_output = device_array(f_array.size, dtype=f_array.dtype) + block_size = min(f_array.size, 1024) + grid_size = (f_array.size + block_size - 1) // block_size + big_array_kernel[grid_size, block_size](d_input, d_output, f_array.size) + result = d_output.copy_to_host() + np.testing.assert_array_equal(f_array.flatten(), result) + + +@skip_on_cudasim +class TestConstantBytes(CUDATestCase): + def test_constant_bytes(self): + # Test constant bytes array in CUDA kernel + d_output = device_array(3, dtype=np.uint8) + bytes_as_const_array_kernel[1, 3](d_output, 3) + result = d_output.copy_to_host() + expected = np.frombuffer(b"foo", dtype=np.uint8) + np.testing.assert_array_equal(result, expected) + + +if __name__ == "__main__": + unittest.main() diff --git a/numba_cuda/numba/cuda/tests/test_array_iterators.py b/numba_cuda/numba/cuda/tests/test_array_iterators.py new file mode 100644 index 000000000..0cd9c8d12 --- /dev/null +++ b/numba_cuda/numba/cuda/tests/test_array_iterators.py @@ -0,0 +1,638 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: BSD-2-Clause + + +import numpy as np + +from numba.cuda import jit +from numba import typeof +from numba.core import types +from numba.cuda.tests.support import MemoryLeakMixin, override_config +from numba.cuda.testing import CUDATestCase +import unittest +import itertools + + +def array_iter(arr, out): + total = 0 + for i, v in enumerate(arr): + total += i * v + out[0] = total + + +def array_view_iter(arr, idx, out): + total = 0 + for i, v in enumerate(arr[idx]): + total += i * v + out[0] = total + + +def array_flat(arr, out): + for i, v in enumerate(arr.flat): + out[i] = v + + +def array_flat_getitem(arr, ind, out): + out[0] = arr.flat[ind] + + +def array_flat_setitem(arr, ind, val): + arr.flat[ind] = val + + +def array_flat_sum(arr, out): + s = 0 + for i, v in enumerate(arr.flat): + s = s + (i + 1) * v + out[0] = s + + +def array_flat_len(arr, out): + out[0] = len(arr.flat) + + +def array_ndenumerate_sum(arr, out): + s = 0 + for (i, j), v in np.ndenumerate(arr): + s = s + (i + 1) * (j + 1) * v + out[0] = s + + +def np_ndindex_empty(out): + s = 0 + for ind in np.ndindex(()): + s += s + len(ind) + 1 + out[0] = s + + +def np_ndindex(x, y, out): + s = 0 + n = 0 # noqa: F841 + for i, j in np.ndindex(x, y): + s = s + (i + 1) * (j + 1) + out[0] = s + + +def np_ndindex_array(arr, out): + s = 0 + n = 0 # noqa: F841 + for indices in np.ndindex(arr.shape): + for i, j in enumerate(indices): + s = s + (i + 1) * (j + 1) + out[0] = s + + +def np_nditer1a(a, out): + for u in np.nditer(a): + out = u.item() # noqa: F841 + + +def np_nditer1b(a, out): + i = 0 + for u in np.nditer(a): + out[i] = u.item() + i += 1 + + +def np_nditer2a(a, b, out): + for u, v in np.nditer((a, b)): + out[0] = u.item() + out[1] = v.item() + + +def np_nditer2b(a, b, out): + i = 0 + for u, v in np.nditer((a, b)): + out[i] = u.item() + out[i + 1] = v.item() + i += 2 + + +def np_nditer2b_err(a, b, out): + i = 0 + for u, v in np.nditer((a, b)): + out[i] = u.item() + out[i + 1] = v.item() + i += 2 + + +def np_nditer3(a, b, c, out): + i = 0 + for u, v, w in np.nditer((a, b, c)): + out[i] = u.item() + out[i + 1] = v.item() + out[i + 2] = w.item() + i += 3 + + +def iter_next(arr, out): + it = iter(arr) + it2 = iter(arr) + out[0] = next(it) + out[1] = next(it) + out[2] = next(it2) + + +# +# Test premature free (see issue #2112). +# The following test allocates an array ``x`` inside the body. +# The compiler will put a ``del x`` right after the last use of ``x``, +# which is right after the creation of the array iterator and +# before the loop is entered. If the iterator does not incref the array, +# the iterator will be reading garbage data of free'ed memory. +# + + +def array_flat_premature_free(size, out): + x = np.arange(size) + res = np.zeros_like(x, dtype=np.intp) + for i, v in enumerate(x.flat): + res[i] = v + for i in range(len(res)): + out[i] = res[i] + + +def array_ndenumerate_premature_free(size, out): + x = np.arange(size) + res = np.zeros_like(x, dtype=np.intp) + for i, v in np.ndenumerate(x): + res[i] = v + for i in range(len(res)): + out[i] = res[i] + + +class TestArrayIterators(MemoryLeakMixin, CUDATestCase): + """ + Test array.flat, etc. + """ + + def setUp(self): + super(TestArrayIterators, self).setUp() + + def check_array_iter_1d(self, arr): + out = np.zeros(1, dtype=np.int32) + cout = np.zeros(1, dtype=np.int32) + pyfunc = array_iter + cfunc = jit((typeof(arr), typeof(out)))(pyfunc) + pyfunc(arr, out) + cfunc[1, 1](arr, cout) + self.assertPreciseEqual(out[0], cout[0]) + + def check_array_view_iter(self, arr, index): + out = np.zeros(1) + cout = np.zeros(1) + pyfunc = array_view_iter + cfunc = jit( + ( + typeof(arr), + typeof(index), + typeof(out), + ) + )(pyfunc) + pyfunc(arr, index, out) + cfunc[1, 1](arr, index, cout) + self.assertPreciseEqual(out[0], cout[0]) + + def check_array_flat(self, arr, arrty=None): + out = np.zeros(arr.size, dtype=arr.dtype) + nb_out = out.copy() + if arrty is None: + arrty = typeof(arr) + + cfunc = jit( + ( + arrty, + typeof(out), + ) + )(array_flat) + + array_flat(arr, out) + cfunc[1, 1](arr, nb_out) + + self.assertPreciseEqual(out, nb_out) + + def check_array_unary(self, arr, arrty, func): + out = np.zeros(3) + cout = np.zeros(3) + cfunc = jit((arrty, typeof(out)))(func) + func(arr, out) + cfunc[1, 1](arr, cout) + self.assertPreciseEqual(out, cout) + + def check_array_ndenumerate_sum(self, arr, arrty): + self.check_array_unary(arr, arrty, array_ndenumerate_sum) + + def test_array_iter(self): + # Test iterating over arrays + arr = np.arange(6) + self.check_array_iter_1d(arr) + arr = arr[::2] + self.assertFalse(arr.flags.c_contiguous) + self.assertFalse(arr.flags.f_contiguous) + self.check_array_iter_1d(np.ascontiguousarray(arr)) + arr = np.bool_([1, 0, 0, 1]) + self.check_array_iter_1d(arr) + + def test_array_view_iter(self): + # Test iterating over a 1d view over a 2d array + arr = np.arange(12).reshape((3, 4)) + self.check_array_view_iter(arr, 1) + self.check_array_view_iter(arr.T, 1) + arr = arr[::2] + self.check_array_view_iter(np.ascontiguousarray(arr), 1) + arr = np.bool_([1, 0, 0, 1]).reshape((2, 2)) + self.check_array_view_iter(arr, 1) + + def test_array_flat_3d(self): + arr = np.arange(24).reshape(4, 2, 3) + + arrty = typeof(arr) + self.assertEqual(arrty.ndim, 3) + self.assertEqual(arrty.layout, "C") + self.assertTrue(arr.flags.c_contiguous) + # Test with C-contiguous array + self.check_array_flat(arr) + # Test with Fortran-contiguous array + arr = arr.transpose() + self.assertFalse(arr.flags.c_contiguous) + self.assertTrue(arr.flags.f_contiguous) + self.assertEqual(typeof(arr).layout, "F") + self.check_array_flat(arr) + # Test with non-contiguous array + arr = arr[::2] + self.assertFalse(arr.flags.c_contiguous) + self.assertFalse(arr.flags.f_contiguous) + self.assertEqual(typeof(arr).layout, "A") + self.check_array_flat(np.ascontiguousarray(arr)) + # Boolean array + arr = np.bool_([1, 0, 0, 1] * 2).reshape((2, 2, 2)) + self.check_array_flat(np.ascontiguousarray(arr)) + + def test_array_flat_empty(self): + # Test .flat with various shapes of empty arrays, contiguous + # and non-contiguous (see issue #846). + + # Define a local checking function, Numba's `typeof` ends up aliasing + # 0d C and F ordered arrays, so the check needs to go via the compile + # result entry point to bypass type checking. + def check(arr, arrty): + out = np.zeros(1, dtype=np.int32) + cout = np.zeros(1, dtype=np.int32) + cfunc = jit((arrty, typeof(out)))(array_flat_sum) + array_flat_sum(arr, out) + cfunc[1, 1](arr, cout) + self.assertPreciseEqual(out[0], cout[0]) + + arr = np.zeros(0, dtype=np.int32) + arr = arr.reshape(0, 2) + arrty = types.Array(types.int32, 2, layout="C") + check(arr, arrty) + arrty = types.Array(types.int32, 2, layout="F") + check(arr, arrty) + arrty = types.Array(types.int32, 2, layout="A") + check(arr, arrty) + arr = arr.reshape(2, 0) + arrty = types.Array(types.int32, 2, layout="C") + check(arr, arrty) + arrty = types.Array(types.int32, 2, layout="F") + check(arr, arrty) + arrty = types.Array(types.int32, 2, layout="A") + check(arr, arrty) + + def test_array_flat_getitem(self): + # Test indexing of array.flat object + pyfunc = array_flat_getitem + cfunc = jit(pyfunc) + + def check(arr, ind): + out = np.zeros(1, dtype=np.int32) + cout = np.zeros(1, dtype=np.int32) + pyfunc(arr, ind, out) + cfunc[1, 1](arr, ind, cout) + self.assertEqual(cout[0], out[0]) + + arr = np.arange(24).reshape(4, 2, 3) + for i in range(arr.size): + check(arr, i) + arr = arr.T + for i in range(arr.size): + check(arr, i) + arr = arr[::2] + for i in range(arr.size): + check(np.ascontiguousarray(arr), i) + arr = np.array([42]).reshape(()) + for i in range(arr.size): + check(arr, i) + # Boolean array + arr = np.bool_([1, 0, 0, 1]) + for i in range(arr.size): + check(arr, i) + arr = arr[::2] + for i in range(arr.size): + check(np.ascontiguousarray(arr), i) + + def test_array_flat_setitem(self): + # Test indexing of array.flat object + pyfunc = array_flat_setitem + cfunc = jit(pyfunc) + + def check(arr, ind): + # Use np.copy() to keep the layout + expected = np.copy(arr) + got = np.copy(arr) + pyfunc(expected, ind, 123) + cfunc[1, 1](got, ind, 123) + self.assertPreciseEqual(got, expected) + + arr = np.arange(24).reshape(4, 2, 3) + for i in range(arr.size): + check(arr, i) + arr = arr.T + for i in range(arr.size): + check(arr, i) + arr = arr[::2] + for i in range(arr.size): + check(np.ascontiguousarray(arr), i) + arr = np.array([42]).reshape(()) + for i in range(arr.size): + check(arr, i) + # Boolean array + arr = np.bool_([1, 0, 0, 1]) + for i in range(arr.size): + check(arr, i) + arr = arr[::2] + for i in range(arr.size): + check(arr, i) + + def test_array_flat_len(self): + # Test len(array.flat) + pyfunc = array_flat_len + cfunc = jit(array_flat_len) + + def check(arr): + out = np.zeros(1, dtype=np.int32) + cout = np.zeros(1, dtype=np.int32) + pyfunc(arr, out) + cfunc[1, 1](arr, cout) + self.assertEqual(cout[0], out[0]) + + arr = np.arange(24).reshape(4, 2, 3) + check(arr) + arr = arr.T + check(arr) + arr = np.array([42]).reshape(()) + check(arr) + + def test_array_flat_premature_free(self): + with override_config("CUDA_ENABLE_NRT", True): + out = np.zeros(6) + cout = np.zeros(6) + cfunc = jit((types.intp, typeof(out)))(array_flat_premature_free) + array_flat_premature_free(6, out) + cfunc[1, 1](6, cout) + self.assertTrue(cout.sum()) + self.assertPreciseEqual(out, cout) + + def test_array_ndenumerate_2d(self): + arr = np.arange(12).reshape(4, 3) + arrty = typeof(arr) + self.assertEqual(arrty.ndim, 2) + self.assertEqual(arrty.layout, "C") + self.assertTrue(arr.flags.c_contiguous) + # Test with C-contiguous array + self.check_array_ndenumerate_sum(arr, arrty) + # Test with Fortran-contiguous array + arr = arr.transpose() + self.assertFalse(arr.flags.c_contiguous) + self.assertTrue(arr.flags.f_contiguous) + arrty = typeof(arr) + self.assertEqual(arrty.layout, "F") + self.check_array_ndenumerate_sum(arr, arrty) + # Test with non-contiguous array + arr = arr[::2] + self.assertFalse(arr.flags.c_contiguous) + self.assertFalse(arr.flags.f_contiguous) + arrty = typeof(arr) + self.assertEqual(arrty.layout, "A") + self.check_array_ndenumerate_sum(np.ascontiguousarray(arr), arrty) + # Boolean array + arr = np.bool_([1, 0, 0, 1]).reshape((2, 2)) + self.check_array_ndenumerate_sum(np.ascontiguousarray(arr), typeof(arr)) + + def test_array_ndenumerate_empty(self): + # Define a local checking function, Numba's `typeof` ends up aliasing + # 0d C and F ordered arrays, so the check needs to go via the compile + # result entry point to bypass type checking. + def check(arr, arrty): + out = np.zeros(1, dtype=np.int32) + cout = np.zeros(1, dtype=np.int32) + cfunc = jit((arrty, typeof(out)))(array_ndenumerate_sum) + array_ndenumerate_sum(arr, out) + cfunc[1, 1](arr, cout) + np.testing.assert_allclose(out[0], cout[0]) + + arr = np.zeros(0, dtype=np.int32) + arr = arr.reshape(0, 2) + arrty = types.Array(types.int32, 2, layout="C") + check(arr, arrty) + arrty = types.Array(types.int32, 2, layout="F") + check(arr, arrty) + arrty = types.Array(types.int32, 2, layout="A") + check(arr, arrty) + arr = arr.reshape(2, 0) + arrty = types.Array(types.int32, 2, layout="C") + check(arr, arrty) + arrty = types.Array(types.int32, 2, layout="F") + check(arr, arrty) + arrty = types.Array(types.int32, 2, layout="A") + check(arr, arrty) + + def test_array_ndenumerate_premature_free(self): + with override_config("CUDA_ENABLE_NRT", True): + out = np.zeros(6) + cout = np.zeros(6) + cfunc = jit((types.intp, typeof(out)))( + array_ndenumerate_premature_free + ) + array_ndenumerate_premature_free(6, out) + cfunc[1, 1](6, cout) + self.assertTrue(cout.sum()) + self.assertPreciseEqual(out, cout) + + def test_np_ndindex(self): + func = np_ndindex + out = np.zeros(1) + cout = np.zeros(1) + cfunc = jit( + ( + types.int32, + types.int32, + typeof(out), + ) + )(func) + func(3, 4, out) + cfunc[1, 1](3, 4, cout) + self.assertPreciseEqual(out, cout) + func(3, 0, out) + cfunc[1, 1](3, 0, cout) + self.assertPreciseEqual(out, cout) + func(0, 3, out) + cfunc[1, 1](0, 3, cout) + self.assertPreciseEqual(out, cout) + func(0, 0, out) + cfunc[1, 1](0, 0, cout) + self.assertPreciseEqual(out, cout) + + def test_np_ndindex_array(self): + func = np_ndindex_array + arr = np.arange(12, dtype=np.int32) + 10 + self.check_array_unary(arr, typeof(arr), func) + arr = arr.reshape((4, 3)) + self.check_array_unary(arr, typeof(arr), func) + arr = arr.reshape((2, 2, 3)) + self.check_array_unary(arr, typeof(arr), func) + + def test_iter_next(self): + # This also checks memory management with iter() and next() + func = iter_next + arr = np.arange(12, dtype=np.int32) + 10 + self.check_array_unary(arr, typeof(arr), func) + + +class TestNdIter(MemoryLeakMixin, CUDATestCase): + """ + Test np.nditer() + """ + + def inputs_a(self): + # scalars + yield np.float32(100) + + # 0-d arrays + yield np.array(102, dtype=np.int16) + + def inputs_b(self): + # All those inputs are compatible with a (3, 4) main shape + # 1-d arrays + yield np.arange(4).astype(np.complex64) + yield np.arange(8)[::2] + + # 2-d arrays + a = np.arange(12).reshape((3, 4)) + yield a + yield a.copy(order="F") + a = np.arange(24).reshape((6, 4))[::2] + yield a + + def basic_inputs(self): + yield np.arange(4).astype(np.complex64) + yield np.arange(8)[::2] + a = np.arange(12).reshape((3, 4)) + yield a + yield a.copy(order="F") + + def check_result(self, got, expected): + self.assertEqual(set(got), set(expected), (got, expected)) + + def test_nditer1a(self): + pyfunc = np_nditer1a + cfunc = jit(pyfunc) + for a in self.inputs_a(): + out = np.zeros(a.size, dtype=a.dtype) + cout = np.zeros(a.size, dtype=a.dtype) + pyfunc(a, out) + cfunc[1, 1](a, cout) + self.assertPreciseEqual(out, cout) + + def test_nditer1b(self): + pyfunc = np_nditer1b + cfunc = jit(pyfunc) + for a in self.inputs_b(): + out = np.zeros(a.size, dtype=a.dtype) + cout = np.zeros(a.size, dtype=a.dtype) + pyfunc(np.ascontiguousarray(a), out) + cfunc[1, 1](np.ascontiguousarray(a), cout) + self.assertPreciseEqual(out, cout) + + def test_nditer2a(self): + pyfunc = np_nditer2a + cfunc = jit(pyfunc) + for a, b in itertools.product(self.inputs_a(), self.inputs_a()): + out = np.zeros( + a.size + b.size, dtype=np.result_type(a.dtype, b.dtype) + ) + cout = np.zeros( + a.size + b.size, dtype=np.result_type(a.dtype, b.dtype) + ) + pyfunc(a, b, out) + cfunc[1, 1](a, b, cout) + self.check_result(out, cout) + + def test_nditer2b(self): + pyfunc = np_nditer2b + cfunc = jit(pyfunc) + for a, b in itertools.product(self.inputs_b(), self.inputs_b()): + out = np.zeros( + a.size * b.size, dtype=np.result_type(a.dtype, b.dtype) + ) + cout = np.zeros( + a.size * b.size, dtype=np.result_type(a.dtype, b.dtype) + ) + pyfunc(np.ascontiguousarray(a), np.ascontiguousarray(b), out) + cfunc[1, 1](np.ascontiguousarray(a), np.ascontiguousarray(b), cout) + self.check_result(out, cout) + + def test_nditer3(self): + pyfunc = np_nditer3 + cfunc = jit(pyfunc) + # Use a restricted set of inputs, to shorten test time + inputs = self.basic_inputs + for a, b, c in itertools.product(inputs(), inputs(), inputs()): + out = np.zeros( + a.size * b.size * c.size, + dtype=np.result_type(a.dtype, b.dtype, c.dtype), + ) + cout = np.zeros( + a.size * b.size * c.size, + dtype=np.result_type(a.dtype, b.dtype, c.dtype), + ) + pyfunc( + np.ascontiguousarray(a), + np.ascontiguousarray(b), + np.ascontiguousarray(c), + out, + ) + cfunc[1, 1]( + np.ascontiguousarray(a), + np.ascontiguousarray(b), + np.ascontiguousarray(c), + cout, + ) + self.check_result(out, cout) + + def test_errors(self): + # Incompatible shapes + pyfunc = np_nditer2b_err + cfunc = jit(debug=True)(pyfunc) + + self.disable_leak_check() + + def check_incompatible(a, b): + with self.assertRaises(SystemError) as raises: + out = np.zeros( + a.size * b.size, dtype=np.result_type(a.dtype, b.dtype) + ) + cfunc[1, 1](a, b, out) + self.assertIn( + "unknown error", + str(raises.exception), + ) + + check_incompatible(np.arange(2), np.arange(3)) + a = np.arange(12).reshape((3, 4)) + b = np.arange(3) + check_incompatible(a, b) + + +if __name__ == "__main__": + unittest.main() diff --git a/numba_cuda/numba/cuda/typing/context.py b/numba_cuda/numba/cuda/typing/context.py index 62c6cf938..324d4ab41 100644 --- a/numba_cuda/numba/cuda/typing/context.py +++ b/numba_cuda/numba/cuda/typing/context.py @@ -522,7 +522,13 @@ def is_external(obj): else: # A type was already inserted, see if we can add to it newty = existing.augment(gty) - if newty is None: + if ( + newty is None + and ".".join( + str(existing.__class__.__module__).split(".")[:2] + ) + != "numba.core" + ): raise TypeError( "cannot augment %s with %s" % (existing, gty) ) diff --git a/numba_cuda/numba/cuda/ufuncs.py b/numba_cuda/numba/cuda/ufuncs.py index 01d99e115..2b8b2374a 100644 --- a/numba_cuda/numba/cuda/ufuncs.py +++ b/numba_cuda/numba/cuda/ufuncs.py @@ -13,10 +13,6 @@ import numpy as np from functools import lru_cache from numba.cuda import typing -from numba.cuda.mathimpl import ( - get_unary_impl_for_fn_and_ty, - get_binary_impl_for_fn_and_ty, -) def get_ufunc_info(ufunc_key): @@ -29,6 +25,10 @@ def ufunc_db(): from numba.cuda.cpython import cmathimpl, mathimpl, numbers from numba.cuda.np import npyfuncs from numba.cuda.np.numpy_support import numpy_version + from numba.cuda.mathimpl import ( + get_unary_impl_for_fn_and_ty, + get_binary_impl_for_fn_and_ty, + ) def np_unary_impl(fn, context, builder, sig, args): npyfuncs._check_arity_and_homogeneity(sig, args, 1) From 43641c72e9da49da37717499af505453efd08215 Mon Sep 17 00:00:00 2001 From: Atmn Patel Date: Mon, 20 Oct 2025 14:44:02 -0700 Subject: [PATCH 2/3] fix sim failures --- numba_cuda/numba/cuda/simulator/memory_management/nrt.py | 8 +++++++- numba_cuda/numba/cuda/tests/test_array_attr.py | 4 ++++ numba_cuda/numba/cuda/tests/test_array_iterators.py | 4 ++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/numba_cuda/numba/cuda/simulator/memory_management/nrt.py b/numba_cuda/numba/cuda/simulator/memory_management/nrt.py index 6097af3b7..c3c974028 100644 --- a/numba_cuda/numba/cuda/simulator/memory_management/nrt.py +++ b/numba_cuda/numba/cuda/simulator/memory_management/nrt.py @@ -2,6 +2,9 @@ # SPDX-License-Identifier: BSD-2-Clause from numba.cuda import config +from collections import namedtuple + +_nrt_mstats = namedtuple("nrt_mstats", ["alloc", "free", "mi_alloc", "mi_free"]) class RTSys: @@ -11,9 +14,12 @@ def __init__(self, *args, **kwargs): def memsys_enable_stats(self): pass - def get_allocation_stats(self): + def memsys_disable_stats(self): pass + def get_allocation_stats(self): + return _nrt_mstats(alloc=0, free=0, mi_alloc=0, mi_free=0) + rtsys = RTSys() diff --git a/numba_cuda/numba/cuda/tests/test_array_attr.py b/numba_cuda/numba/cuda/tests/test_array_attr.py index 8f52a7c4e..4a49b5bb6 100644 --- a/numba_cuda/numba/cuda/tests/test_array_attr.py +++ b/numba_cuda/numba/cuda/tests/test_array_attr.py @@ -12,6 +12,10 @@ from numba.cuda.testing import CUDATestCase from numba.core.errors import TypingError from numba.cuda.tests.support import override_config +from numba.cuda import config + +if config.ENABLE_CUDASIM: + raise unittest.SkipTest("Array attribute tests not done in simulator") def array_itemsize(a, res): diff --git a/numba_cuda/numba/cuda/tests/test_array_iterators.py b/numba_cuda/numba/cuda/tests/test_array_iterators.py index 0cd9c8d12..65345d8e1 100644 --- a/numba_cuda/numba/cuda/tests/test_array_iterators.py +++ b/numba_cuda/numba/cuda/tests/test_array_iterators.py @@ -11,6 +11,10 @@ from numba.cuda.testing import CUDATestCase import unittest import itertools +from numba.cuda import config + +if config.ENABLE_CUDASIM: + raise unittest.SkipTest("Array iterator tests not done in simulator") def array_iter(arr, out): From 675fd400a04068a6efdf76af5bd58bf244f3eecf Mon Sep 17 00:00:00 2001 From: Atmn Patel Date: Mon, 3 Nov 2025 12:54:40 -0800 Subject: [PATCH 3/3] update skip_on_cudasim to have messages, and update errors import to use vendored in numba.cuda.core.errors --- numba_cuda/numba/cuda/tests/test_array_attr.py | 2 +- numba_cuda/numba/cuda/tests/test_array_constants.py | 4 ++-- numba_cuda/numba/cuda/tests/test_array_iterators.py | 2 +- numba_cuda/numba/cuda/typing/context.py | 8 +------- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/numba_cuda/numba/cuda/tests/test_array_attr.py b/numba_cuda/numba/cuda/tests/test_array_attr.py index 4a49b5bb6..09f461674 100644 --- a/numba_cuda/numba/cuda/tests/test_array_attr.py +++ b/numba_cuda/numba/cuda/tests/test_array_attr.py @@ -10,7 +10,7 @@ from numba.core import types from numba.cuda.tests.support import MemoryLeakMixin from numba.cuda.testing import CUDATestCase -from numba.core.errors import TypingError +from numba.cuda.core.errors import TypingError from numba.cuda.tests.support import override_config from numba.cuda import config diff --git a/numba_cuda/numba/cuda/tests/test_array_constants.py b/numba_cuda/numba/cuda/tests/test_array_constants.py index e5a927e52..80c46a073 100644 --- a/numba_cuda/numba/cuda/tests/test_array_constants.py +++ b/numba_cuda/numba/cuda/tests/test_array_constants.py @@ -118,7 +118,7 @@ def bytes_as_const_array_kernel(output_array, size): output_array[i] = 111 # 'o' -@skip_on_cudasim +@skip_on_cudasim("CUDA simulator does not support array constants") class TestConstantArray(CUDATestCase): """ Test array constants. @@ -260,7 +260,7 @@ def big_array_kernel(input_array, output_array, size): np.testing.assert_array_equal(f_array.flatten(), result) -@skip_on_cudasim +@skip_on_cudasim("CUDA simulator does not support array constants") class TestConstantBytes(CUDATestCase): def test_constant_bytes(self): # Test constant bytes array in CUDA kernel diff --git a/numba_cuda/numba/cuda/tests/test_array_iterators.py b/numba_cuda/numba/cuda/tests/test_array_iterators.py index 65345d8e1..6653a9f5c 100644 --- a/numba_cuda/numba/cuda/tests/test_array_iterators.py +++ b/numba_cuda/numba/cuda/tests/test_array_iterators.py @@ -617,7 +617,7 @@ def test_nditer3(self): def test_errors(self): # Incompatible shapes pyfunc = np_nditer2b_err - cfunc = jit(debug=True)(pyfunc) + cfunc = jit(debug=True, opt=False)(pyfunc) self.disable_leak_check() diff --git a/numba_cuda/numba/cuda/typing/context.py b/numba_cuda/numba/cuda/typing/context.py index 81c5caef7..6c2c933bf 100644 --- a/numba_cuda/numba/cuda/typing/context.py +++ b/numba_cuda/numba/cuda/typing/context.py @@ -491,13 +491,7 @@ def is_external(obj): else: # A type was already inserted, see if we can add to it newty = existing.augment(gty) - if ( - newty is None - and ".".join( - str(existing.__class__.__module__).split(".")[:2] - ) - != "numba.core" - ): + if newty is None: raise TypeError( "cannot augment %s with %s" % (existing, gty) )