Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 4 additions & 13 deletions numba_cuda/numba/cuda/cudadrv/devicearray.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,16 +121,10 @@ def __init__(self, shape, strides, dtype, stream=0, gpu_data=None):

@property
def __cuda_array_interface__(self):
if _driver.USE_NV_BINDING:
if self.device_ctypes_pointer is not None:
ptr = int(self.device_ctypes_pointer)
else:
ptr = 0
if self.device_ctypes_pointer.value is not None:
ptr = self.device_ctypes_pointer.value
else:
if self.device_ctypes_pointer.value is not None:
ptr = self.device_ctypes_pointer.value
else:
ptr = 0
ptr = 0

return {
"shape": tuple(self.shape),
Expand Down Expand Up @@ -204,10 +198,7 @@ def _numba_type_(self):
def device_ctypes_pointer(self):
"""Returns the ctypes pointer to the GPU data buffer"""
if self.gpu_data is None:
if _driver.USE_NV_BINDING:
return _driver.binding.CUdeviceptr(0)
else:
return c_void_p(0)
return c_void_p(0)
else:
return self.gpu_data.device_ctypes_pointer

Expand Down
16 changes: 6 additions & 10 deletions numba_cuda/numba/cuda/cudadrv/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -2090,6 +2090,8 @@ def view(self, start, stop=None):

@property
def device_ctypes_pointer(self):
if USE_NV_BINDING:
return drvapi.cu_device_ptr(int(self.device_pointer))
return self.device_pointer

@property
Expand Down Expand Up @@ -3431,8 +3433,8 @@ def device_extents(devmem):
"""
devptr = device_ctypes_pointer(devmem)
if USE_NV_BINDING:
s, n = driver.cuMemGetAddressRange(devptr)
return s, binding.CUdeviceptr(int(s) + n)
s, n = driver.cuMemGetAddressRange(devptr.value)
return int(s), int(binding.CUdeviceptr(int(s) + n))
else:
s = drvapi.cu_device_ptr()
n = c_size_t()
Expand All @@ -3449,10 +3451,7 @@ def device_memory_size(devmem):
sz = getattr(devmem, "_cuda_memsize_", None)
if sz is None:
s, e = device_extents(devmem)
if USE_NV_BINDING:
sz = int(e) - int(s)
else:
sz = e - s
sz = e - s
devmem._cuda_memsize_ = sz
assert sz >= 0, "{} length array".format(sz)
return sz
Expand Down Expand Up @@ -3518,10 +3517,7 @@ def host_memory_size(obj):

def device_pointer(obj):
"Get the device pointer as an integer"
if USE_NV_BINDING:
return obj.device_ctypes_pointer
else:
return device_ctypes_pointer(obj).value
return device_ctypes_pointer(obj).value


def device_ctypes_pointer(obj):
Expand Down
2 changes: 0 additions & 2 deletions numba_cuda/numba/cuda/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,8 +588,6 @@ def _prepare_args(self, ty, val, stream, retr, kernelargs):
elif isinstance(ty, types.Record):
devrec = wrap_arg(val).to_device(retr, stream)
ptr = devrec.device_ctypes_pointer
if driver.USE_NV_BINDING:
ptr = ctypes.c_void_p(int(ptr))
kernelargs.append(ptr)

elif isinstance(ty, types.BaseTuple):
Expand Down
27 changes: 9 additions & 18 deletions numba_cuda/numba/cuda/memory_management/nrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,13 @@ def allocate(self, stream=None):
self._compile_memsys_module()

# Allocate space for NRT_MemSys
ptr, nbytes = self._memsys_module.get_global_symbol("memsys_size")
memsys_size = ctypes.c_uint64()
ptr, nbytes = self._memsys_module.get_global_symbol("memsys_size")
device_memsys_size = ptr.device_ctypes_pointer
if USE_NV_BINDING:
device_memsys_size = device_memsys_size.value
driver.cuMemcpyDtoH(
ctypes.addressof(memsys_size), ptr.device_ctypes_pointer, nbytes
ctypes.addressof(memsys_size), device_memsys_size, nbytes
)
self._memsys = device_array(
(memsys_size.value,), dtype="i1", stream=stream
Expand Down Expand Up @@ -145,18 +148,6 @@ def _single_thread_launch(self, module, stream, name, params=()):
cooperative=False,
)

def _ctypes_pointer(self, array):
"""
Given an array, return a ctypes pointer to the data suitable for
passing to ``launch_kernel``.
"""
ptr = array.device_ctypes_pointer

if USE_NV_BINDING:
ptr = ctypes.c_void_p(int(ptr))

return ptr

def ensure_initialized(self, stream=None):
"""
If memsys is not initialized, initialize memsys
Expand Down Expand Up @@ -206,7 +197,7 @@ def memsys_stats_enabled(self, stream=None):
context
"""
enabled_ar = cuda.managed_array(1, np.uint8)
enabled_ptr = self._ctypes_pointer(enabled_ar)
enabled_ptr = enabled_ar.device_ctypes_pointer

self._single_thread_launch(
self._memsys_module,
Expand All @@ -233,7 +224,7 @@ def _copy_memsys_to_host(self, stream):
)

stats_for_read = cuda.managed_array(1, dt)
stats_ptr = self._ctypes_pointer(stats_for_read)
stats_ptr = stats_for_read.device_ctypes_pointer

self._single_thread_launch(
self._memsys_module, stream, "NRT_MemSys_read", [stats_ptr]
Expand Down Expand Up @@ -264,7 +255,7 @@ def _get_single_stat(self, stat, stream=None):
Get a single stat from the memsys
"""
got = cuda.managed_array(1, np.uint64)
got_ptr = self._ctypes_pointer(got)
got_ptr = got.device_ctypes_pointer

self._single_thread_launch(
self._memsys_module, stream, f"NRT_MemSys_read_{stat}", [got_ptr]
Expand Down Expand Up @@ -327,7 +318,7 @@ def set_memsys_to_module(self, module, stream=None):
"Please allocate NRT Memsys first before setting to module."
)

memsys_ptr = self._ctypes_pointer(self._memsys)
memsys_ptr = self._memsys.device_ctypes_pointer

self._single_thread_launch(
module, stream, "NRT_MemSys_set", [memsys_ptr]
Expand Down
5 changes: 1 addition & 4 deletions numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from ctypes import byref, c_int, c_void_p, sizeof
from ctypes import byref, c_int, sizeof

from numba.cuda.cudadrv.driver import (
host_to_device,
Expand Down Expand Up @@ -94,7 +94,6 @@ def test_cuda_driver_basic(self):
stream = 0

if _driver.USE_NV_BINDING:
ptr = c_void_p(int(ptr))
stream = _driver.binding.CUstream(stream)

launch_kernel(
Expand Down Expand Up @@ -129,8 +128,6 @@ def test_cuda_driver_stream_operations(self):
host_to_device(memory, array, sizeof(array), stream=stream)

ptr = memory.device_ctypes_pointer
if _driver.USE_NV_BINDING:
ptr = c_void_p(int(ptr))

launch_kernel(
function.handle, # Kernel
Expand Down
5 changes: 1 addition & 4 deletions numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@ def tearDown(self):
def _template(self, obj):
self.assertTrue(driver.is_device_memory(obj))
driver.require_device_memory(obj)
if driver.USE_NV_BINDING:
expected_class = driver.binding.CUdeviceptr
else:
expected_class = drvapi.cu_device_ptr
expected_class = drvapi.cu_device_ptr
self.assertTrue(isinstance(obj.device_ctypes_pointer, expected_class))

def test_device_memory(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@
@skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
class TestCudaArrayInterface(ContextResettingTestCase):
def assertPointersEqual(self, a, b):
if driver.USE_NV_BINDING:
self.assertEqual(
int(a.device_ctypes_pointer), int(b.device_ctypes_pointer)
)
self.assertEqual(
a.device_ctypes_pointer.value, b.device_ctypes_pointer.value
)

def test_as_cuda_array(self):
h_arr = np.arange(10)
Expand Down