Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions numba_cuda/numba/cuda/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,9 @@ def from_cuda_array_interface(desc, owner=None, sync=True):

shape = desc["shape"]
strides = desc.get("strides")
dtype = np.dtype(desc["typestr"])

shape, strides, dtype = prepare_shape_strides_dtype(
shape, strides, dtype, order="C"
shape, strides, desc["typestr"], order="C"
)
size = driver.memory_size_from_info(shape, strides, dtype.itemsize)

Expand Down
22 changes: 16 additions & 6 deletions numba_cuda/numba/cuda/api_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import numpy as np

import functools


def prepare_shape_strides_dtype(shape, strides, dtype, order):
dtype = np.dtype(dtype)
Expand All @@ -14,25 +16,33 @@ def prepare_shape_strides_dtype(shape, strides, dtype, order):
raise TypeError("shape must be an integer or tuple of integers")
if isinstance(shape, int):
shape = (shape,)
else:
shape = tuple(shape)
if isinstance(strides, int):
strides = (strides,)
else:
strides = strides or _fill_stride_by_order(shape, dtype, order)
if not strides:
strides = _fill_stride_by_order(shape, dtype, order)
else:
strides = tuple(strides)
return shape, strides, dtype


@functools.cache
def _fill_stride_by_order(shape, dtype, order):
nd = len(shape)
if nd == 0:
ndims = len(shape)
if not ndims:
return ()
strides = [0] * nd
strides = [0] * ndims
if order == "C":
strides[-1] = dtype.itemsize
for d in reversed(range(nd - 1)):
# -2 because we subtract one for zero-based indexing and another one
# for skipping the already-filled-in last element
for d in range(ndims - 2, -1, -1):
strides[d] = strides[d + 1] * shape[d + 1]
elif order == "F":
strides[0] = dtype.itemsize
for d in range(1, nd):
for d in range(1, ndims):
strides[d] = strides[d - 1] * shape[d - 1]
else:
raise ValueError("must be either C/F order")
Expand Down
16 changes: 9 additions & 7 deletions numba_cuda/numba/cuda/cudadrv/devicearray.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,13 @@ def __init__(self, shape, strides, dtype, stream=0, gpu_data=None):
"""
if isinstance(shape, int):
shape = (shape,)
else:
shape = tuple(shape)
if isinstance(strides, int):
strides = (strides,)
else:
if strides:
strides = tuple(strides)
dtype = np.dtype(dtype)
itemsize = dtype.itemsize
self.ndim = ndim = len(shape)
Expand All @@ -96,9 +101,6 @@ def __init__(self, shape, strides, dtype, stream=0, gpu_data=None):
self._dummy = dummy = dummyarray.Array.from_desc(
0, shape, strides, itemsize
)
# confirm that all elements of shape are ints
if not all(isinstance(dim, (int, np.integer)) for dim in shape):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved this because caching avoids having to run this code every time this constructor is called, saving a good chunk of time in the constructor.

raise TypeError("all elements of shape must be ints")
self.shape = shape = dummy.shape
self.strides = strides = dummy.strides
self.dtype = dtype
Expand All @@ -121,17 +123,17 @@ def __init__(self, shape, strides, dtype, stream=0, gpu_data=None):

@property
def __cuda_array_interface__(self):
if self.device_ctypes_pointer.value is not None:
ptr = self.device_ctypes_pointer.value
if (value := self.device_ctypes_pointer.value) is not None:
ptr = value
else:
ptr = 0

return {
"shape": tuple(self.shape),
"shape": self.shape,
"strides": None if is_contiguous(self) else tuple(self.strides),
"data": (ptr, False),
"typestr": self.dtype.str,
"stream": int(self.stream) if self.stream != 0 else None,
"stream": int(stream) if (stream := self.stream) != 0 else None,
"version": 3,
}

Expand Down
1 change: 1 addition & 0 deletions numba_cuda/numba/cuda/cudadrv/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3023,6 +3023,7 @@ def host_memory_extents(obj):
return mviewbuf.memoryview_get_extents(obj)


@functools.cache
def memory_size_from_info(shape, strides, itemsize):
"""Get the byte size of a contiguous memory buffer given the shape, strides
and itemsize.
Expand Down
12 changes: 8 additions & 4 deletions numba_cuda/numba/cuda/cudadrv/dummyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import itertools
import functools
import operator
import numpy as np


Extent = namedtuple("Extent", ["begin", "end"])
Expand Down Expand Up @@ -245,9 +246,12 @@ class Array(object):
is_array = True

@classmethod
@functools.cache
def from_desc(cls, offset, shape, strides, itemsize):
dims = []
for ashape, astride in zip(shape, strides):
if not isinstance(ashape, (int, np.integer)):
raise TypeError("all elements of shape must be ints")
dim = Dim(
offset, offset + ashape * astride, ashape, astride, single=False
)
Expand Down Expand Up @@ -442,8 +446,8 @@ def reshape(self, *newdims, **kws):

ret = self.from_desc(
self.extent.begin,
shape=newdims,
strides=newstrides,
shape=tuple(newdims),
strides=tuple(newstrides),
itemsize=self.itemsize,
)

Expand Down Expand Up @@ -471,8 +475,8 @@ def squeeze(self, axis=None):
newstrides.append(stride)
newarr = self.from_desc(
self.extent.begin,
shape=newshape,
strides=newstrides,
shape=tuple(newshape),
strides=tuple(newstrides),
itemsize=self.itemsize,
)
return newarr, list(self.iter_contiguous_extent())
Expand Down
8 changes: 6 additions & 2 deletions numba_cuda/numba/cuda/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -1629,11 +1629,15 @@ def typeof_pyval(self, val):
try:
return typeof(val, Purpose.argument)
except ValueError:
if cuda.is_cuda_array(val):
if (
interface := getattr(val, "__cuda_array_interface__")
) is not None:
# When typing, we don't need to synchronize on the array's
# stream - this is done when the kernel is launched.

return typeof(
cuda.as_cuda_array(val, sync=False), Purpose.argument
cuda.from_cuda_array_interface(interface, sync=False),
Purpose.argument,
)
else:
raise
Expand Down
2 changes: 1 addition & 1 deletion pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ benchcmp = { cmd = [
"numba.cuda.tests.benchmarks",
"--benchmark-only",
"--benchmark-enable",
"--benchmark-group-by=func",
"--benchmark-group-by=name",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a more useful grouping when comparing benchmarks.

"--benchmark-compare",
] }

Expand Down