diff --git a/numba_cuda/numba/cuda/cudadrv/devicearray.py b/numba_cuda/numba/cuda/cudadrv/devicearray.py index 7ffbca924..73180e43e 100644 --- a/numba_cuda/numba/cuda/cudadrv/devicearray.py +++ b/numba_cuda/numba/cuda/cudadrv/devicearray.py @@ -121,16 +121,10 @@ def __init__(self, shape, strides, dtype, stream=0, gpu_data=None): @property def __cuda_array_interface__(self): - if _driver.USE_NV_BINDING: - if self.device_ctypes_pointer is not None: - ptr = int(self.device_ctypes_pointer) - else: - ptr = 0 + if self.device_ctypes_pointer.value is not None: + ptr = self.device_ctypes_pointer.value else: - if self.device_ctypes_pointer.value is not None: - ptr = self.device_ctypes_pointer.value - else: - ptr = 0 + ptr = 0 return { "shape": tuple(self.shape), @@ -204,10 +198,7 @@ def _numba_type_(self): def device_ctypes_pointer(self): """Returns the ctypes pointer to the GPU data buffer""" if self.gpu_data is None: - if _driver.USE_NV_BINDING: - return _driver.binding.CUdeviceptr(0) - else: - return c_void_p(0) + return c_void_p(0) else: return self.gpu_data.device_ctypes_pointer diff --git a/numba_cuda/numba/cuda/cudadrv/driver.py b/numba_cuda/numba/cuda/cudadrv/driver.py index b14fec399..a04f69753 100644 --- a/numba_cuda/numba/cuda/cudadrv/driver.py +++ b/numba_cuda/numba/cuda/cudadrv/driver.py @@ -2090,6 +2090,8 @@ def view(self, start, stop=None): @property def device_ctypes_pointer(self): + if USE_NV_BINDING: + return drvapi.cu_device_ptr(int(self.device_pointer)) return self.device_pointer @property @@ -3431,8 +3433,8 @@ def device_extents(devmem): """ devptr = device_ctypes_pointer(devmem) if USE_NV_BINDING: - s, n = driver.cuMemGetAddressRange(devptr) - return s, binding.CUdeviceptr(int(s) + n) + s, n = driver.cuMemGetAddressRange(devptr.value) + return int(s), int(binding.CUdeviceptr(int(s) + n)) else: s = drvapi.cu_device_ptr() n = c_size_t() @@ -3449,10 +3451,7 @@ def device_memory_size(devmem): sz = getattr(devmem, "_cuda_memsize_", None) if sz is None: s, e = device_extents(devmem) - if USE_NV_BINDING: - sz = int(e) - int(s) - else: - sz = e - s + sz = e - s devmem._cuda_memsize_ = sz assert sz >= 0, "{} length array".format(sz) return sz @@ -3518,10 +3517,7 @@ def host_memory_size(obj): def device_pointer(obj): "Get the device pointer as an integer" - if USE_NV_BINDING: - return obj.device_ctypes_pointer - else: - return device_ctypes_pointer(obj).value + return device_ctypes_pointer(obj).value def device_ctypes_pointer(obj): diff --git a/numba_cuda/numba/cuda/dispatcher.py b/numba_cuda/numba/cuda/dispatcher.py index 1522aa4df..250377580 100644 --- a/numba_cuda/numba/cuda/dispatcher.py +++ b/numba_cuda/numba/cuda/dispatcher.py @@ -588,8 +588,6 @@ def _prepare_args(self, ty, val, stream, retr, kernelargs): elif isinstance(ty, types.Record): devrec = wrap_arg(val).to_device(retr, stream) ptr = devrec.device_ctypes_pointer - if driver.USE_NV_BINDING: - ptr = ctypes.c_void_p(int(ptr)) kernelargs.append(ptr) elif isinstance(ty, types.BaseTuple): diff --git a/numba_cuda/numba/cuda/memory_management/nrt.py b/numba_cuda/numba/cuda/memory_management/nrt.py index c978ce023..8edf40b2e 100644 --- a/numba_cuda/numba/cuda/memory_management/nrt.py +++ b/numba_cuda/numba/cuda/memory_management/nrt.py @@ -113,10 +113,13 @@ def allocate(self, stream=None): self._compile_memsys_module() # Allocate space for NRT_MemSys - ptr, nbytes = self._memsys_module.get_global_symbol("memsys_size") memsys_size = ctypes.c_uint64() + ptr, nbytes = self._memsys_module.get_global_symbol("memsys_size") + device_memsys_size = ptr.device_ctypes_pointer + if USE_NV_BINDING: + device_memsys_size = device_memsys_size.value driver.cuMemcpyDtoH( - ctypes.addressof(memsys_size), ptr.device_ctypes_pointer, nbytes + ctypes.addressof(memsys_size), device_memsys_size, nbytes ) self._memsys = device_array( (memsys_size.value,), dtype="i1", stream=stream @@ -145,18 +148,6 @@ def _single_thread_launch(self, module, stream, name, params=()): cooperative=False, ) - def _ctypes_pointer(self, array): - """ - Given an array, return a ctypes pointer to the data suitable for - passing to ``launch_kernel``. - """ - ptr = array.device_ctypes_pointer - - if USE_NV_BINDING: - ptr = ctypes.c_void_p(int(ptr)) - - return ptr - def ensure_initialized(self, stream=None): """ If memsys is not initialized, initialize memsys @@ -206,7 +197,7 @@ def memsys_stats_enabled(self, stream=None): context """ enabled_ar = cuda.managed_array(1, np.uint8) - enabled_ptr = self._ctypes_pointer(enabled_ar) + enabled_ptr = enabled_ar.device_ctypes_pointer self._single_thread_launch( self._memsys_module, @@ -233,7 +224,7 @@ def _copy_memsys_to_host(self, stream): ) stats_for_read = cuda.managed_array(1, dt) - stats_ptr = self._ctypes_pointer(stats_for_read) + stats_ptr = stats_for_read.device_ctypes_pointer self._single_thread_launch( self._memsys_module, stream, "NRT_MemSys_read", [stats_ptr] @@ -264,7 +255,7 @@ def _get_single_stat(self, stat, stream=None): Get a single stat from the memsys """ got = cuda.managed_array(1, np.uint64) - got_ptr = self._ctypes_pointer(got) + got_ptr = got.device_ctypes_pointer self._single_thread_launch( self._memsys_module, stream, f"NRT_MemSys_read_{stat}", [got_ptr] @@ -327,7 +318,7 @@ def set_memsys_to_module(self, module, stream=None): "Please allocate NRT Memsys first before setting to module." ) - memsys_ptr = self._ctypes_pointer(self._memsys) + memsys_ptr = self._memsys.device_ctypes_pointer self._single_thread_launch( module, stream, "NRT_MemSys_set", [memsys_ptr] diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py index 6972a44ed..931cb5c1a 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py @@ -1,4 +1,4 @@ -from ctypes import byref, c_int, c_void_p, sizeof +from ctypes import byref, c_int, sizeof from numba.cuda.cudadrv.driver import ( host_to_device, @@ -94,7 +94,6 @@ def test_cuda_driver_basic(self): stream = 0 if _driver.USE_NV_BINDING: - ptr = c_void_p(int(ptr)) stream = _driver.binding.CUstream(stream) launch_kernel( @@ -129,8 +128,6 @@ def test_cuda_driver_stream_operations(self): host_to_device(memory, array, sizeof(array), stream=stream) ptr = memory.device_ctypes_pointer - if _driver.USE_NV_BINDING: - ptr = c_void_p(int(ptr)) launch_kernel( function.handle, # Kernel diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py index 5d187411f..f5bbe5c39 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py @@ -20,10 +20,7 @@ def tearDown(self): def _template(self, obj): self.assertTrue(driver.is_device_memory(obj)) driver.require_device_memory(obj) - if driver.USE_NV_BINDING: - expected_class = driver.binding.CUdeviceptr - else: - expected_class = drvapi.cu_device_ptr + expected_class = drvapi.cu_device_ptr self.assertTrue(isinstance(obj.device_ctypes_pointer, expected_class)) def test_device_memory(self): diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py index 96c4087ad..3b716edea 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py @@ -12,10 +12,9 @@ @skip_on_cudasim("CUDA Array Interface is not supported in the simulator") class TestCudaArrayInterface(ContextResettingTestCase): def assertPointersEqual(self, a, b): - if driver.USE_NV_BINDING: - self.assertEqual( - int(a.device_ctypes_pointer), int(b.device_ctypes_pointer) - ) + self.assertEqual( + a.device_ctypes_pointer.value, b.device_ctypes_pointer.value + ) def test_as_cuda_array(self): h_arr = np.arange(10)