From a58f9285fcadafcc87df85d7730a57ddf6c5260b Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 22 Oct 2025 09:17:27 -0700
Subject: [PATCH 01/60] initial

---
 docs/source/user/ipc.rst                     |  8 +++
 docs/source/user/memory.rst                  | 20 +++++++
 numba_cuda/numba/cuda/api.py                 | 32 ++++++++---
 numba_cuda/numba/cuda/args.py                |  2 +-
 numba_cuda/numba/cuda/cudadrv/devicearray.py | 57 +++++++++++++++++---
 numba_cuda/numba/cuda/cudadrv/driver.py      |  2 +-
 numba_cuda/numba/cuda/kernels/transpose.py   |  2 +-
 numba_cuda/numba/cuda/vectorizers.py         |  6 +--
 8 files changed, 109 insertions(+), 20 deletions(-)

diff --git a/docs/source/user/ipc.rst b/docs/source/user/ipc.rst
index 904f1fd10..80acb0577 100644
--- a/docs/source/user/ipc.rst
+++ b/docs/source/user/ipc.rst
@@ -18,6 +18,10 @@ Sharing between processes is implemented using the Legacy CUDA IPC API
 Export device array to another process
 --------------------------------------
 
+.. note::
+  DeviceNDArray is deprecated. Prefer CuPy for array operations including
+  inter-process communication.
+
 A device array can be shared with another process in the same machine using
 the CUDA IPC API.  To do so, use the ``.get_ipc_handle()`` method on the device
 array to get a ``IpcArrayHandle`` object, which can be transferred to another
@@ -34,6 +38,10 @@ process.
 Import IPC memory from another process
 --------------------------------------
 
+.. note::
+  DeviceNDArray is deprecated. Prefer CuPy for array operations including
+  inter-process communication.
+
 The following function is used to open IPC handle from another process
 as a device array.
 
diff --git a/docs/source/user/memory.rst b/docs/source/user/memory.rst
index be4236619..3dd471745 100644
--- a/docs/source/user/memory.rst
+++ b/docs/source/user/memory.rst
@@ -11,6 +11,10 @@ Memory management
 Data transfer
 =============
 
+.. note::
+  DeviceNDArray is deprecated. Prefer CuPy for array operations including
+  memory transfers.
+
 Even though Numba can automatically transfer NumPy arrays to the device,
 it can only do so conservatively by always transferring device memory back to
 the host when a kernel finishes. To avoid the unnecessary transfer for
@@ -38,6 +42,9 @@ buffer using the following APIs:
 Device arrays
 -------------
 
+.. note::
+  DeviceNDArray is deprecated. Prefer CuPy for array operations.
+
 Device array references have the following methods.  These methods are to be
 called in host code, not within CUDA-jitted functions.
 
@@ -52,6 +59,11 @@ called in host code, not within CUDA-jitted functions.
 Pinned memory
 =============
 
+.. note::
+  DeviceNDArray is deprecated. Prefer CuPy for array operations including
+  allocating pinned memory
+
+
 .. autofunction:: numba.cuda.pinned
    :noindex:
 .. autofunction:: numba.cuda.pinned_array
@@ -63,6 +75,10 @@ Pinned memory
 Mapped memory
 =============
 
+.. note::
+  DeviceNDArray is deprecated. Prefer CuPy for array operations including
+  allocating mapped memory
+
 .. autofunction:: numba.cuda.mapped
    :noindex:
 .. autofunction:: numba.cuda.mapped_array
@@ -76,6 +92,10 @@ Mapped memory
 Managed memory
 ==============
 
+.. note::
+  DeviceNDArray is deprecated. Prefer CuPy for array operations including
+  allocating managed memory
+
 .. autofunction:: numba.cuda.managed_array
    :noindex:
 
diff --git a/numba_cuda/numba/cuda/api.py b/numba_cuda/numba/cuda/api.py
index e58b3f588..edb55d618 100644
--- a/numba_cuda/numba/cuda/api.py
+++ b/numba_cuda/numba/cuda/api.py
@@ -9,7 +9,7 @@
 import os
 
 import numpy as np
-
+import warnings
 from .cudadrv import devicearray, devices, driver
 from numba.cuda.core import config
 from numba.cuda.api_util import prepare_shape_strides_dtype
@@ -23,9 +23,9 @@
 
 @require_context
 def from_cuda_array_interface(desc, owner=None, sync=True):
-    """Create a DeviceNDArray from a cuda-array-interface description.
+    """Create a _DeviceNDArray from a cuda-array-interface description.
     The ``owner`` is the owner of the underlying memory.
-    The resulting DeviceNDArray will acquire a reference from it.
+    The resulting _DeviceNDArray will acquire a reference from it.
 
     If ``sync`` is ``True``, then the imported stream (if present) will be
     synchronized.
@@ -59,18 +59,18 @@ def from_cuda_array_interface(desc, owner=None, sync=True):
             stream.synchronize()
     else:
         stream = 0  # No "Numba default stream", not the CUDA default stream
-    da = devicearray.DeviceNDArray(
+    da = devicearray._DeviceNDArray(
         shape=shape, strides=strides, dtype=dtype, gpu_data=data, stream=stream
     )
     return da
 
 
 def as_cuda_array(obj, sync=True):
-    """Create a DeviceNDArray from any object that implements
+    """Create a _DeviceNDArray from any object that implements
     the :ref:`cuda array interface <cuda-array-interface>`.
 
     A view of the underlying GPU buffer is created.  No copying of the data
-    is done.  The resulting DeviceNDArray will acquire a reference from `obj`.
+    is done.  The resulting _DeviceNDArray will acquire a reference from `obj`.
 
     If ``sync`` is ``True``, then the imported stream (if present) will be
     synchronized.
@@ -138,6 +138,10 @@ def to_device(obj, stream=0, copy=True, to=None):
 
         hary = d_ary.copy_to_host(stream=stream)
     """
+    warnings.warn(
+        "to_device is deprecated. Please prefer cupy for moving numpy arrays to the device.",
+        FutureWarning,
+    )
     if to is None:
         to, new = devicearray.auto_device(
             obj, stream=stream, copy=copy, user_explicit=True
@@ -154,6 +158,10 @@ def device_array(shape, dtype=np.float64, strides=None, order="C", stream=0):
 
     Allocate an empty device ndarray. Similar to :meth:`numpy.empty`.
     """
+    warnings.warn(
+        "device_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
+        FutureWarning,
+    )
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
     )
@@ -186,6 +194,10 @@ def managed_array(
                           *host*, and memory is only accessible by devices
                           with Compute Capability 6.0 and later.
     """
+    warnings.warn(
+        "managed_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
+        FutureWarning,
+    )
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
     )
@@ -208,6 +220,10 @@ def pinned_array(shape, dtype=np.float64, strides=None, order="C"):
     Allocate an :class:`ndarray <numpy.ndarray>` with a buffer that is pinned
     (pagelocked).  Similar to :func:`np.empty() <numpy.empty>`.
     """
+    warnings.warn(
+        "pinned_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
+        FutureWarning,
+    )
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
     )
@@ -240,6 +256,10 @@ def mapped_array(
         to write by the host and to read by the device, but slower to
         write by the host and slower to write by the device.
     """
+    warnings.warn(
+        "mapped_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
+        FutureWarning,
+    )
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
     )
diff --git a/numba_cuda/numba/cuda/args.py b/numba_cuda/numba/cuda/args.py
index 755e19d00..866133621 100644
--- a/numba_cuda/numba/cuda/args.py
+++ b/numba_cuda/numba/cuda/args.py
@@ -22,7 +22,7 @@ def to_device(self, retr, stream=0):
         :param retr:
             a list of clean-up work to do after the kernel's been run.
             Append 0-arg lambdas to it!
-        :return: a value (usually an `DeviceNDArray`) to be passed to
+        :return: a value (usually an `_DeviceNDArray`) to be passed to
             the kernel
         """
         pass
diff --git a/numba_cuda/numba/cuda/cudadrv/devicearray.py b/numba_cuda/numba/cuda/cudadrv/devicearray.py
index e8dc27f3b..c26db3ed5 100644
--- a/numba_cuda/numba/cuda/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/cudadrv/devicearray.py
@@ -12,6 +12,7 @@
 import operator
 import copy
 from ctypes import c_void_p
+import warnings
 
 import numpy as np
 
@@ -36,6 +37,18 @@ def lru_cache(func):
         return func
 
 
+def deprecated_array_api(func):
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        warnings.warn(
+            f"{func.__name__} api is deprecated. Please prefer cupy for array functions",
+            FutureWarning,
+        )
+        return func(*args, **kwargs)
+
+    return wrapper
+
+
 def is_cuda_ndarray(obj):
     "Check if an object is a CUDA ndarray"
     return getattr(obj, "__cuda_ndarray__", False)
@@ -292,6 +305,7 @@ def copy_to_host(self, ary=None, stream=0):
                 )
         return hostary
 
+    @deprecated_array_api
     def split(self, section, stream=0):
         """Split the array into equal partition of the `section` size.
         If the array cannot be equally divided, the last section will be
@@ -310,7 +324,7 @@ def split(self, section, stream=0):
             end = min(begin + section, self.size)
             shape = (end - begin,)
             gpu_data = self.gpu_data.view(begin * itemsize, end * itemsize)
-            yield DeviceNDArray(
+            yield _DeviceNDArray(
                 shape,
                 strides,
                 dtype=self.dtype,
@@ -322,6 +336,7 @@ def as_cuda_arg(self):
         """Returns a device memory object that is used as the argument."""
         return self.gpu_data
 
+    @deprecated_array_api
     def get_ipc_handle(self):
         """
         Returns a *IpcArrayHandle* object that is safe to serialize and transfer
@@ -333,6 +348,7 @@ def get_ipc_handle(self):
         desc = dict(shape=self.shape, strides=self.strides, dtype=self.dtype)
         return IpcArrayHandle(ipc_handle=ipch, array_desc=desc)
 
+    @deprecated_array_api
     def squeeze(self, axis=None, stream=0):
         """
         Remove axes of size one from the array shape.
@@ -353,7 +369,7 @@ def squeeze(self, axis=None, stream=0):
 
         """
         new_dummy, _ = self._dummy.squeeze(axis=axis)
-        return DeviceNDArray(
+        return _DeviceNDArray(
             shape=new_dummy.shape,
             strides=new_dummy.strides,
             dtype=self.dtype,
@@ -361,6 +377,7 @@ def squeeze(self, axis=None, stream=0):
             gpu_data=self.gpu_data,
         )
 
+    @deprecated_array_api
     def view(self, dtype):
         """Returns a new object by reinterpretting the dtype without making a
         copy of the data.
@@ -389,7 +406,7 @@ def view(self, dtype):
 
             strides[-1] = dtype.itemsize
 
-        return DeviceNDArray(
+        return _DeviceNDArray(
             shape=shape,
             strides=strides,
             dtype=dtype,
@@ -462,7 +479,7 @@ def _do_getitem(self, item, stream=0):
             shape, strides, dtype = prepare_shape_strides_dtype(
                 typ.shape, None, typ.subdtype[0], "C"
             )
-            return DeviceNDArray(
+            return _DeviceNDArray(
                 shape=shape,
                 strides=strides,
                 dtype=dtype,
@@ -553,9 +570,10 @@ def kernel(lhs, rhs):
     return kernel
 
 
-class DeviceNDArray(DeviceNDArrayBase):
+class _DeviceNDArray(DeviceNDArrayBase):
     """
-    An on-GPU array type
+    An on-GPU array type (internal implementation class formerly named
+    DeviceNDArray)
     """
 
     def is_f_contiguous(self):
@@ -595,6 +613,7 @@ def __array__(self, dtype=None, copy=None):
     def __len__(self):
         return self.shape[0]
 
+    @deprecated_array_api
     def reshape(self, *newshape, **kws):
         """
         Reshape the array without changing its contents, similarly to
@@ -627,6 +646,7 @@ def reshape(self, *newshape, **kws):
         else:
             raise NotImplementedError("operation requires copying")
 
+    @deprecated_array_api
     def ravel(self, order="C", stream=0):
         """
         Flattens a contiguous array without changing its contents, similar to
@@ -650,10 +670,12 @@ def ravel(self, order="C", stream=0):
             raise NotImplementedError("operation requires copying")
 
     @devices.require_context
+    @deprecated_array_api
     def __getitem__(self, item):
         return self._do_getitem(item)
 
     @devices.require_context
+    @deprecated_array_api
     def getitem(self, item, stream=0):
         """Do `__getitem__(item)` with CUDA stream"""
         return self._do_getitem(item, stream)
@@ -792,13 +814,14 @@ def __init__(self, ipc_handle, array_desc):
         self._array_desc = array_desc
         self._ipc_handle = ipc_handle
 
+    @deprecated_array_api
     def open(self):
         """
         Returns a new *DeviceNDArray* that shares the allocation from the
         original process.  Must not be used on the original process.
         """
         dptr = self._ipc_handle.open(devices.get_context())
-        return DeviceNDArray(gpu_data=dptr, **self._array_desc)
+        return _DeviceNDArray(gpu_data=dptr, **self._array_desc)
 
     def close(self):
         """
@@ -833,9 +856,10 @@ def device_setup(self, gpu_data, stream=0):
         self.stream = stream
 
 
+@deprecated_array_api
 def from_array_like(ary, stream=0, gpu_data=None):
     "Create a DeviceNDArray object that is like ary."
-    return DeviceNDArray(
+    return _DeviceNDArray(
         ary.shape, ary.strides, ary.dtype, stream=stream, gpu_data=gpu_data
     )
 
@@ -951,3 +975,20 @@ def check_array_compatibility(ary1, ary2):
         raise ValueError(
             "incompatible strides: %s vs. %s" % (ary1.strides, ary2.strides)
         )
+
+
+class DeviceNDArray(_DeviceNDArray):
+    """
+    Deprecated public wrapper around the implementation class _DeviceNDArray.
+
+    Instantiating this class will emit a FutureWarning indicating that the
+    public name DeviceNDArray is deprecated. The implementation class is now
+    named _DeviceNDArray; code should migrate to that name.
+    """
+
+    def __init__(self, *args, **kwargs):
+        warnings.warn(
+            "DeviceNDArray api is deprecated. Please prefer cupy for array functions",
+            FutureWarning,
+        )
+        super().__init__(*args, **kwargs)
diff --git a/numba_cuda/numba/cuda/cudadrv/driver.py b/numba_cuda/numba/cuda/cudadrv/driver.py
index 69541df47..757ce5a1f 100644
--- a/numba_cuda/numba/cuda/cudadrv/driver.py
+++ b/numba_cuda/numba/cuda/cudadrv/driver.py
@@ -1758,7 +1758,7 @@ def open_array(self, context, shape, dtype, strides=None):
             strides = dtype.itemsize
         dptr = self.open(context)
         # read the device pointer as an array
-        return devicearray.DeviceNDArray(
+        return devicearray._DeviceNDArray(
             shape=shape, strides=strides, dtype=dtype, gpu_data=dptr
         )
 
diff --git a/numba_cuda/numba/cuda/kernels/transpose.py b/numba_cuda/numba/cuda/kernels/transpose.py
index 01e2670b0..33a0f9bc6 100644
--- a/numba_cuda/numba/cuda/kernels/transpose.py
+++ b/numba_cuda/numba/cuda/kernels/transpose.py
@@ -26,7 +26,7 @@ def transpose(a, b=None):
     if not b:
         cols, rows = a.shape
         strides = a.dtype.itemsize * cols, a.dtype.itemsize
-        b = cuda.cudadrv.devicearray.DeviceNDArray(
+        b = cuda.cudadrv.devicearray._DeviceNDArray(
             (rows, cols), strides, dtype=a.dtype, stream=stream
         )
 
diff --git a/numba_cuda/numba/cuda/vectorizers.py b/numba_cuda/numba/cuda/vectorizers.py
index 2607ea043..937546044 100644
--- a/numba_cuda/numba/cuda/vectorizers.py
+++ b/numba_cuda/numba/cuda/vectorizers.py
@@ -133,7 +133,7 @@ def _call_steps(self):
         return _CUDAGUFuncCallSteps
 
     def _broadcast_scalar_input(self, ary, shape):
-        return cuda.cudadrv.devicearray.DeviceNDArray(
+        return cuda.cudadrv.devicearray._DeviceNDArray(
             shape=shape, strides=(0,), dtype=ary.dtype, gpu_data=ary.gpu_data
         )
 
@@ -141,7 +141,7 @@ def _broadcast_add_axis(self, ary, newshape):
         newax = len(newshape) - len(ary.shape)
         # Add 0 strides for missing dimension
         newstrides = (0,) * newax + ary.strides
-        return cuda.cudadrv.devicearray.DeviceNDArray(
+        return cuda.cudadrv.devicearray._DeviceNDArray(
             shape=newshape,
             strides=newstrides,
             dtype=ary.dtype,
@@ -194,7 +194,7 @@ def broadcast_device(self, ary, shape):
         for ax in ax_differs:
             strides[ax] = 0
 
-        return cuda.cudadrv.devicearray.DeviceNDArray(
+        return cuda.cudadrv.devicearray._DeviceNDArray(
             shape=shape, strides=strides, dtype=ary.dtype, gpu_data=ary.gpu_data
         )
 

From 762a6b1b470d1501836d0fb066b0fb977f2087fe Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Sat, 1 Nov 2025 15:10:05 -0700
Subject: [PATCH 02/60] progress replacing tests

---
 numba_cuda/numba/cuda/cudadrv/devicearray.py  | 30 +++++++++++++++----
 .../numba/cuda/tests/cudadrv/test_events.py   | 15 +++++++---
 .../numba/cuda/tests/cudadrv/test_pinned.py   |  2 +-
 .../numba/cuda/tests/cudapy/test_datetime.py  |  1 +
 .../numba/cuda/tests/cudapy/test_idiv.py      | 11 ++++---
 .../numba/cuda/tests/cudapy/test_matmul.py    | 21 ++++++-------
 .../numba/cuda/tests/cudapy/test_nondet.py    | 17 +++++------
 7 files changed, 59 insertions(+), 38 deletions(-)

diff --git a/numba_cuda/numba/cuda/cudadrv/devicearray.py b/numba_cuda/numba/cuda/cudadrv/devicearray.py
index a359ca498..623cfa7d8 100644
--- a/numba_cuda/numba/cuda/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/cudadrv/devicearray.py
@@ -369,6 +369,10 @@ def squeeze(self, axis=None, stream=0):
             Squeezed view into the array.
 
         """
+        breakpoint()
+        return self._squeeze(axis=axis, stream=stream)
+
+    def _squeeze(self, axis=None, stream=0):
         new_dummy, _ = self._dummy.squeeze(axis=axis)
         return _DeviceNDArray(
             shape=new_dummy.shape,
@@ -622,6 +626,11 @@ def reshape(self, *newshape, **kws):
 
             d_arr = d_arr.reshape(20, 50, order="F")
         """
+
+        return self._reshape(*newshape, **kws)
+
+    def _reshape(self, *newshape, **kws):
+
         if len(newshape) == 1 and isinstance(newshape[0], (tuple, list)):
             newshape = newshape[0]
 
@@ -673,6 +682,7 @@ def ravel(self, order="C", stream=0):
     @devices.require_context
     @deprecated_array_api
     def __getitem__(self, item):
+        breakpoint()
         return self._do_getitem(item)
 
     @devices.require_context
@@ -775,7 +785,7 @@ def _do_setitem(self, key, value, stream=0):
         rhs_shape = np.ones(lhs.ndim, dtype=np.int64)
         # negative indices would not work if rhs.ndim == 0
         rhs_shape[lhs.ndim - rhs.ndim :] = rhs.shape
-        rhs = rhs.reshape(*rhs_shape)
+        rhs = rhs._reshape(*rhs_shape)
         for i, (l, r) in enumerate(zip(lhs.shape, rhs.shape)):
             if r != 1 and l != r:
                 raise ValueError(
@@ -857,10 +867,9 @@ def device_setup(self, gpu_data, stream=0):
         self.stream = stream
 
 
-@deprecated_array_api
 def from_array_like(ary, stream=0, gpu_data=None):
     "Create a DeviceNDArray object that is like ary."
-    return _DeviceNDArray(
+    return DeviceNDArray(
         ary.shape, ary.strides, ary.dtype, stream=stream, gpu_data=gpu_data
     )
 
@@ -885,7 +894,11 @@ def array_core(ary):
     core_index = []
     for stride in ary.strides:
         core_index.append(0 if stride == 0 else slice(None))
-    return ary[tuple(core_index)]
+    
+    if isinstance(ary, _DeviceNDArray):
+        return ary._do_getitem(tuple(core_index))
+    else:
+        return ary[tuple(core_index)]
 
 
 def is_contiguous(ary):
@@ -965,7 +978,14 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
 
 
 def check_array_compatibility(ary1, ary2):
-    ary1sq, ary2sq = ary1.squeeze(), ary2.squeeze()
+    if isinstance(ary1, _DeviceNDArray):
+        ary1sq = ary1._squeeze()
+    else: 
+        ary1sq = ary1.squeeze()
+    if isinstance(ary2, _DeviceNDArray):
+        ary2sq = ary2._squeeze()
+    else:
+        ary2sq = ary2.squeeze()
     if ary1.dtype != ary2.dtype:
         raise TypeError(
             "incompatible dtype: %s vs. %s" % (ary1.dtype, ary2.dtype)
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
index 20ceebb97..6a2588a10 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
@@ -6,17 +6,17 @@
 from numba.cuda.testing import unittest, CUDATestCase
 from cuda.core.experimental import Device
 from numba.cuda.testing import skip_on_cudasim
+import cupy as cp
 
 
 class TestCudaEvent(CUDATestCase):
     def test_event_elapsed(self):
         N = 32
-        dary = cuda.device_array(N, dtype=np.double)
         evtstart = cuda.event()
         evtend = cuda.event()
 
         evtstart.record()
-        cuda.to_device(np.arange(N, dtype=np.double), to=dary)
+        dary = cp.array(np.arange(N, dtype=np.double))
         evtend.record()
         evtend.wait()
         evtend.synchronize()
@@ -35,13 +35,20 @@ def test_event_elapsed_cuda_core_stream(self):
         self.event_elapsed_inner(stream)
 
     def event_elapsed_inner(self, stream):
+        
+        @cuda.jit
+        def kernel():
+            pass
+
+
         N = 32
-        dary = cuda.device_array(N, dtype=np.double)
         evtstart = cuda.event()
         evtend = cuda.event()
 
         evtstart.record(stream=stream)
-        cuda.to_device(np.arange(N, dtype=np.double), to=dary, stream=stream)
+
+        kernel[1,1, stream]()
+
         evtend.record(stream=stream)
         evtend.wait(stream=stream)
         evtend.synchronize()
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py b/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py
index 919b4428d..e58518899 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py
@@ -7,7 +7,7 @@
 from numba import cuda
 from numba.cuda.testing import unittest, CUDATestCase
 
-
+# TODO
 class TestPinned(CUDATestCase):
     def _run_copies(self, A):
         A0 = np.copy(A)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py b/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
index 2c513dd0a..5de232456 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
@@ -7,6 +7,7 @@
 from numba.cuda.np.numpy_support import from_dtype
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
 import unittest
+import cupy as cp
 
 
 class TestCudaDateTime(CUDATestCase):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py b/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
index 9550e3e3b..21242e833 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
@@ -5,6 +5,7 @@
 from numba import cuda
 from numba.cuda import float32, float64, int32, void
 from numba.cuda.testing import unittest, CUDATestCase
+import cupy as cp
 
 
 class TestCudaIDiv(CUDATestCase):
@@ -15,10 +16,9 @@ def div(grid, l_x, l_y):
                 for y in range(l_y):
                     grid[x, y] /= 2.0
 
-        x = np.ones((2, 2), dtype=np.float32)
-        grid = cuda.to_device(x)
+        grid = cp.ones((2, 2), dtype=np.float32)
         div[1, 1](grid, 2, 2)
-        y = grid.copy_to_host()
+        y = grid.get()
         self.assertTrue(np.all(y == 0.5))
 
     def test_inplace_div_double(self):
@@ -28,10 +28,9 @@ def div_double(grid, l_x, l_y):
                 for y in range(l_y):
                     grid[x, y] /= 2.0
 
-        x = np.ones((2, 2), dtype=np.float64)
-        grid = cuda.to_device(x)
+        grid = cp.ones((2, 2), dtype=np.float64)
         div_double[1, 1](grid, 2, 2)
-        y = grid.copy_to_host()
+        y = grid.get()
         self.assertTrue(np.all(y == 0.5))
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
index 00f0256bd..a2a242273 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
@@ -7,6 +7,7 @@
 from numba.cuda import float32, void
 from numba.cuda.testing import unittest, CUDATestCase
 from numba.cuda.core import config
+import cupy as cp
 
 # Ensure the test takes a reasonable amount of time in the simulator
 if config.ENABLE_CUDASIM:
@@ -53,20 +54,16 @@ def cu_square_matrix_mul(A, B, C):
                 C[y, x] = acc
 
         np.random.seed(42)
-        A = np.array(np.random.random((n, n)), dtype=np.float32)
-        B = np.array(np.random.random((n, n)), dtype=np.float32)
-        C = np.empty_like(A)
-
-        stream = cuda.stream()
-        with stream.auto_synchronize():
-            dA = cuda.to_device(A, stream)
-            dB = cuda.to_device(B, stream)
-            dC = cuda.to_device(C, stream)
-            cu_square_matrix_mul[(bpg, bpg), (tpb, tpb), stream](dA, dB, dC)
-            dC.copy_to_host(C, stream)
+        dA = cp.array(np.random.random((n, n)), dtype=np.float32)
+        dB = cp.array(np.random.random((n, n)), dtype=np.float32)
+        dC = cp.empty_like(dA)
+
+
+        cu_square_matrix_mul[(bpg, bpg), (tpb, tpb)](dA, dB, dC)
+        C = dC.get()
 
         # Host compute
-        Cans = np.dot(A, B)
+        Cans = np.dot(dA.get(), dB.get())
 
         # Check result
         np.testing.assert_allclose(C, Cans, rtol=1e-5)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py b/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
index 2becebd0a..6835d6cd0 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
@@ -5,11 +5,12 @@
 from numba import cuda
 from numba.cuda import float32, void
 from numba.cuda.testing import unittest, CUDATestCase
+import cupy as cp
 
 
 def generate_input(n):
-    A = np.array(np.arange(n * n).reshape(n, n), dtype=np.float32)
-    B = np.array(np.arange(n) + 0, dtype=A.dtype)
+    A = cp.array(np.arange(n * n).reshape(n, n), dtype=np.float32)
+    B = cp.array(np.arange(n) + 0, dtype=A.dtype)
     return A, B
 
 
@@ -33,20 +34,16 @@ def diagproduct(c, a, b):
 
         N = 8
 
-        A, B = generate_input(N)
-
-        F = np.empty(A.shape, dtype=A.dtype)
+        dA, dB = generate_input(N)
+        dF = cp.empty(dA.shape, dtype=dA.dtype)
 
         blockdim = (32, 8)
         griddim = (1, 1)
 
-        dA = cuda.to_device(A)
-        dB = cuda.to_device(B)
-        dF = cuda.to_device(F, copy=False)
         diagproduct[griddim, blockdim](dF, dA, dB)
 
-        E = np.dot(A, np.diag(B))
-        np.testing.assert_array_almost_equal(dF.copy_to_host(), E)
+        E = np.dot(dA.get(), np.diag(dB.get()))
+        np.testing.assert_array_almost_equal(dF.get(), E)
 
 
 if __name__ == "__main__":

From 20815727a2df77f620e4de6ebaf9e39c193ac225 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 4 Nov 2025 09:44:33 -0800
Subject: [PATCH 03/60] progress

---
 numba_cuda/numba/cuda/api.py                  |  6 ++
 numba_cuda/numba/cuda/cudadrv/devicearray.py  | 19 +++--
 .../cuda/tests/cudadrv/test_array_attr.py     | 50 ++++++++----
 .../cuda/tests/cudadrv/test_context_stack.py  |  5 +-
 .../tests/cudadrv/test_cuda_array_slicing.py  | 81 ++++++++++++-------
 5 files changed, 109 insertions(+), 52 deletions(-)

diff --git a/numba_cuda/numba/cuda/api.py b/numba_cuda/numba/cuda/api.py
index 8700f960b..526cc98b3 100644
--- a/numba_cuda/numba/cuda/api.py
+++ b/numba_cuda/numba/cuda/api.py
@@ -71,6 +71,7 @@ def as_cuda_array(obj, sync=True):
     If ``sync`` is ``True``, then the imported stream (if present) will be
     synchronized.
     """
+    breakpoint()
     if (
         interface := getattr(obj, "__cuda_array_interface__", None)
     ) is not None:
@@ -133,6 +134,7 @@ def to_device(obj, stream=0, copy=True, to=None):
 
         hary = d_ary.copy_to_host(stream=stream)
     """
+    breakpoint()
     warnings.warn(
         "to_device is deprecated. Please prefer cupy for moving numpy arrays to the device.",
         FutureWarning,
@@ -157,6 +159,7 @@ def device_array(shape, dtype=np.float64, strides=None, order="C", stream=0):
         "device_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
         FutureWarning,
     )
+    breakpoint()
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
     )
@@ -342,6 +345,7 @@ def device_array_like(ary, stream=0):
     Call :func:`device_array() <numba.cuda.device_array>` with information from
     the array.
     """
+    breakpoint()
     strides = _contiguous_strides_like_array(ary)
     order = _order_like_array(ary)
     return device_array(
@@ -358,6 +362,7 @@ def mapped_array_like(ary, stream=0, portable=False, wc=False):
     Call :func:`mapped_array() <numba.cuda.mapped_array>` with the information
     from the array.
     """
+    breakpoint()
     strides = _contiguous_strides_like_array(ary)
     order = _order_like_array(ary)
     return mapped_array(
@@ -376,6 +381,7 @@ def pinned_array_like(ary):
     Call :func:`pinned_array() <numba.cuda.pinned_array>` with the information
     from the array.
     """
+    breakpoint()
     strides = _contiguous_strides_like_array(ary)
     order = _order_like_array(ary)
     return pinned_array(
diff --git a/numba_cuda/numba/cuda/cudadrv/devicearray.py b/numba_cuda/numba/cuda/cudadrv/devicearray.py
index 623cfa7d8..84471c175 100644
--- a/numba_cuda/numba/cuda/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/cudadrv/devicearray.py
@@ -630,7 +630,6 @@ def reshape(self, *newshape, **kws):
         return self._reshape(*newshape, **kws)
 
     def _reshape(self, *newshape, **kws):
-
         if len(newshape) == 1 and isinstance(newshape[0], (tuple, list)):
             newshape = newshape[0]
 
@@ -869,7 +868,16 @@ def device_setup(self, gpu_data, stream=0):
 
 def from_array_like(ary, stream=0, gpu_data=None):
     "Create a DeviceNDArray object that is like ary."
-    return DeviceNDArray(
+
+    warnings.warn(
+        "from_array_like is deprecated. Please prefer cupy for array functions",
+        FutureWarning,
+    )
+    return _from_array_like(ary, stream=stream, gpu_data=gpu_data)
+
+
+def _from_array_like(ary, stream=0, gpu_data=None):
+    return _DeviceNDArray(
         ary.shape, ary.strides, ary.dtype, stream=stream, gpu_data=gpu_data
     )
 
@@ -894,7 +902,7 @@ def array_core(ary):
     core_index = []
     for stride in ary.strides:
         core_index.append(0 if stride == 0 else slice(None))
-    
+
     if isinstance(ary, _DeviceNDArray):
         return ary._do_getitem(tuple(core_index))
     else:
@@ -958,7 +966,7 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
                 obj, copy=False if numpy_version < (2, 0) else None, subok=True
             )
             sentry_contiguous(obj)
-            devobj = from_array_like(obj, stream=stream)
+            devobj = _from_array_like(obj, stream=stream)
         if copy:
             if (
                 config.CUDA_WARN_ON_IMPLICIT_COPY
@@ -980,7 +988,7 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
 def check_array_compatibility(ary1, ary2):
     if isinstance(ary1, _DeviceNDArray):
         ary1sq = ary1._squeeze()
-    else: 
+    else:
         ary1sq = ary1.squeeze()
     if isinstance(ary2, _DeviceNDArray):
         ary2sq = ary2._squeeze()
@@ -1012,6 +1020,7 @@ class DeviceNDArray(_DeviceNDArray):
     """
 
     def __init__(self, *args, **kwargs):
+        breakpoint()
         warnings.warn(
             "DeviceNDArray api is deprecated. Please prefer cupy for array functions",
             FutureWarning,
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py b/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py
index 9bcb78309..9f5b79bbc 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py
@@ -1,19 +1,39 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: BSD-2-Clause
 
+import warnings
 import numpy as np
 from numba import cuda
 from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
 
 
 class TestArrayAttr(CUDATestCase):
+    def _to_device(self, ary):
+        """
+        Helper wrapper around cuda.to_device that suppresses the specific
+        FutureWarning:
+
+            "to_device is deprecated. Please prefer cupy for moving numpy arrays to the device."
+
+        We use warnings.catch_warnings() and filterwarnings with a message
+        regex so we only silence that particular deprecation.
+        """
+        with warnings.catch_warnings():
+            # message is treated as a regex by filterwarnings
+            warnings.filterwarnings(
+                "ignore",
+                category=FutureWarning,
+                message=r".*to_device is deprecated.*",
+            )
+            return cuda.to_device(ary)
+
     def test_contigous_2d(self):
         ary = np.arange(10)
         cary = ary.reshape(2, 5)
         fary = np.asfortranarray(cary)
 
-        dcary = cuda.to_device(cary)
-        dfary = cuda.to_device(fary)
+        dcary = self._to_device(cary)
+        dfary = self._to_device(fary)
         self.assertTrue(dcary.is_c_contiguous())
         self.assertTrue(not dfary.is_c_contiguous())
         self.assertTrue(not dcary.is_f_contiguous())
@@ -24,8 +44,8 @@ def test_contigous_3d(self):
         cary = ary.reshape(2, 5, 2)
         fary = np.asfortranarray(cary)
 
-        dcary = cuda.to_device(cary)
-        dfary = cuda.to_device(fary)
+        dcary = self._to_device(cary)
+        dfary = self._to_device(fary)
         self.assertTrue(dcary.is_c_contiguous())
         self.assertTrue(not dfary.is_c_contiguous())
         self.assertTrue(not dcary.is_f_contiguous())
@@ -36,8 +56,8 @@ def test_contigous_4d(self):
         cary = ary.reshape(2, 5, 2, 3)
         fary = np.asfortranarray(cary)
 
-        dcary = cuda.to_device(cary)
-        dfary = cuda.to_device(fary)
+        dcary = self._to_device(cary)
+        dfary = self._to_device(fary)
         self.assertTrue(dcary.is_c_contiguous())
         self.assertTrue(not dfary.is_c_contiguous())
         self.assertTrue(not dcary.is_f_contiguous())
@@ -45,7 +65,7 @@ def test_contigous_4d(self):
 
     def test_ravel_1d(self):
         ary = np.arange(60)
-        dary = cuda.to_device(ary)
+        dary = self._to_device(ary)
         for order in "CFA":
             expect = ary.ravel(order=order)
             dflat = dary.ravel(order=order)
@@ -57,7 +77,7 @@ def test_ravel_1d(self):
     @skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
     def test_ravel_stride_1d(self):
         ary = np.arange(60)
-        dary = cuda.to_device(ary)
+        dary = self._to_device(ary)
         # No-copy stride device array
         darystride = dary[::2]
         dary_data = dary.__cuda_array_interface__["data"][0]
@@ -72,7 +92,7 @@ def test_ravel_c(self):
         reshaped = ary.reshape(2, 5, 2, 3)
 
         expect = reshaped.ravel(order="C")
-        dary = cuda.to_device(reshaped)
+        dary = self._to_device(reshaped)
         dflat = dary.ravel()
         flat = dflat.copy_to_host()
         self.assertTrue(dary is not dflat)
@@ -82,7 +102,7 @@ def test_ravel_c(self):
         # explicit order kwarg
         for order in "CA":
             expect = reshaped.ravel(order=order)
-            dary = cuda.to_device(reshaped)
+            dary = self._to_device(reshaped)
             dflat = dary.ravel(order=order)
             flat = dflat.copy_to_host()
             self.assertTrue(dary is not dflat)
@@ -94,7 +114,7 @@ def test_ravel_stride_c(self):
         ary = np.arange(60)
         reshaped = ary.reshape(2, 5, 2, 3)
 
-        dary = cuda.to_device(reshaped)
+        dary = self._to_device(reshaped)
         darystride = dary[::2, ::2, ::2, ::2]
         dary_data = dary.__cuda_array_interface__["data"][0]
         ddarystride_data = darystride.__cuda_array_interface__["data"][0]
@@ -107,7 +127,7 @@ def test_ravel_f(self):
         reshaped = np.asfortranarray(ary.reshape(2, 5, 2, 3))
         for order in "FA":
             expect = reshaped.ravel(order=order)
-            dary = cuda.to_device(reshaped)
+            dary = self._to_device(reshaped)
             dflat = dary.ravel(order=order)
             flat = dflat.copy_to_host()
             self.assertTrue(dary is not dflat)
@@ -118,7 +138,7 @@ def test_ravel_f(self):
     def test_ravel_stride_f(self):
         ary = np.arange(60)
         reshaped = np.asfortranarray(ary.reshape(2, 5, 2, 3))
-        dary = cuda.to_device(reshaped)
+        dary = self._to_device(reshaped)
         darystride = dary[::2, ::2, ::2, ::2]
         dary_data = dary.__cuda_array_interface__["data"][0]
         ddarystride_data = darystride.__cuda_array_interface__["data"][0]
@@ -129,7 +149,7 @@ def test_ravel_stride_f(self):
     def test_reshape_c(self):
         ary = np.arange(10)
         expect = ary.reshape(2, 5)
-        dary = cuda.to_device(ary)
+        dary = self._to_device(ary)
         dary_reshaped = dary.reshape(2, 5)
         got = dary_reshaped.copy_to_host()
         self.assertPreciseEqual(expect, got)
@@ -137,7 +157,7 @@ def test_reshape_c(self):
     def test_reshape_f(self):
         ary = np.arange(10)
         expect = ary.reshape(2, 5, order="F")
-        dary = cuda.to_device(ary)
+        dary = self._to_device(ary)
         dary_reshaped = dary.reshape(2, 5, order="F")
         got = dary_reshaped.copy_to_host()
         self.assertPreciseEqual(expect, got)
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py b/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
index 66825bd7e..7e07ddae7 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
@@ -7,6 +7,7 @@
 from numba import cuda
 from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
 from numba.cuda.cudadrv import driver
+import cupy as cp
 
 
 class TestContextStack(CUDATestCase):
@@ -150,9 +151,9 @@ def foo(a):
                 for i in range(a.size):
                     a[i] = i
 
-            a = cuda.device_array(10)
+            a = cp.empty(10)
             foo[1, 1](a)
-            self.assertEqual(list(a.copy_to_host()), list(range(10)))
+            self.assertEqual(list(a.get()), list(range(10)))
 
         self.test_attached_primary(do)
 
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py
index 93794d34b..8b6d28b36 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py
@@ -1,6 +1,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: BSD-2-Clause
 
+import warnings
 from itertools import product
 
 import numpy as np
@@ -10,10 +11,30 @@
 from unittest.mock import patch
 
 
+def _to_device(ary, **kwargs):
+    """
+    Module-level helper that wraps cuda.to_device and suppresses the specific
+    FutureWarning:
+
+        "to_device is deprecated. Please prefer cupy for moving numpy arrays to the device."
+
+    The helper uses warnings.catch_warnings() and filterwarnings with a
+    message regex so we only silence that particular deprecation. Any kwargs
+    (e.g. stream=...) are forwarded to cuda.to_device.
+    """
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore",
+            category=FutureWarning,
+            message=r".*to_device is deprecated.*",
+        )
+        return cuda.to_device(ary, **kwargs)
+
+
 class CudaArrayIndexing(CUDATestCase):
     def test_index_1d(self):
         arr = np.arange(10)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         (x,) = arr.shape
         for i in range(-x, x):
             self.assertEqual(arr[i], darr[i])
@@ -24,7 +45,7 @@ def test_index_1d(self):
 
     def test_index_2d(self):
         arr = np.arange(3 * 4).reshape(3, 4)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         x, y = arr.shape
         for i in range(-x, x):
             for j in range(-y, y):
@@ -40,7 +61,7 @@ def test_index_2d(self):
 
     def test_index_3d(self):
         arr = np.arange(3 * 4 * 5).reshape(3, 4, 5)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         x, y, z = arr.shape
         for i in range(-x, x):
             for j in range(-y, y):
@@ -63,13 +84,13 @@ def test_index_3d(self):
 class CudaArrayStridedSlice(CUDATestCase):
     def test_strided_index_1d(self):
         arr = np.arange(10)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         for i in range(arr.size):
             np.testing.assert_equal(arr[i::2], darr[i::2].copy_to_host())
 
     def test_strided_index_2d(self):
         arr = np.arange(6 * 7).reshape(6, 7)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
 
         for i in range(arr.shape[0]):
             for j in range(arr.shape[1]):
@@ -79,7 +100,7 @@ def test_strided_index_2d(self):
 
     def test_strided_index_3d(self):
         arr = np.arange(6 * 7 * 8).reshape(6, 7, 8)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
 
         for i in range(arr.shape[0]):
             for j in range(arr.shape[1]):
@@ -93,7 +114,7 @@ def test_strided_index_3d(self):
 class CudaArraySlicing(CUDATestCase):
     def test_prefix_1d(self):
         arr = np.arange(5)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         for i in range(arr.size):
             expect = arr[i:]
             got = darr[i:].copy_to_host()
@@ -101,7 +122,7 @@ def test_prefix_1d(self):
 
     def test_prefix_2d(self):
         arr = np.arange(3**2).reshape(3, 3)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         for i in range(arr.shape[0]):
             for j in range(arr.shape[1]):
                 expect = arr[i:, j:]
@@ -113,7 +134,7 @@ def test_prefix_2d(self):
 
     def test_select_3d_first_two_dim(self):
         arr = np.arange(3 * 4 * 5).reshape(3, 4, 5)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         # Select first dimension
         for i in range(arr.shape[0]):
             expect = arr[i]
@@ -134,7 +155,7 @@ def test_select_3d_first_two_dim(self):
 
     def test_select_f(self):
         a = np.arange(5 * 6 * 7).reshape(5, 6, 7, order="F")
-        da = cuda.to_device(a)
+        da = _to_device(a)
 
         for i in range(a.shape[0]):
             for j in range(a.shape[1]):
@@ -153,7 +174,7 @@ def test_select_f(self):
 
     def test_select_c(self):
         a = np.arange(5 * 6 * 7).reshape(5, 6, 7, order="C")
-        da = cuda.to_device(a)
+        da = _to_device(a)
 
         for i in range(a.shape[0]):
             for j in range(a.shape[1]):
@@ -173,18 +194,18 @@ def test_select_c(self):
     def test_prefix_select(self):
         arr = np.arange(5 * 7).reshape(5, 7, order="F")
 
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         self.assertTrue(np.all(darr[:1, 1].copy_to_host() == arr[:1, 1]))
 
     def test_negative_slicing_1d(self):
         arr = np.arange(10)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         for i, j in product(range(-10, 10), repeat=2):
             np.testing.assert_array_equal(arr[i:j], darr[i:j].copy_to_host())
 
     def test_negative_slicing_2d(self):
         arr = np.arange(12).reshape(3, 4)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         for x, y, w, s in product(range(-4, 4), repeat=4):
             np.testing.assert_array_equal(
                 arr[x:y, w:s], darr[x:y, w:s].copy_to_host()
@@ -192,7 +213,7 @@ def test_negative_slicing_2d(self):
 
     def test_empty_slice_1d(self):
         arr = np.arange(5)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         for i in range(darr.shape[0]):
             np.testing.assert_array_equal(darr[i:i].copy_to_host(), arr[i:i])
         # empty slice of empty slice
@@ -205,7 +226,7 @@ def test_empty_slice_1d(self):
 
     def test_empty_slice_2d(self):
         arr = np.arange(5 * 7).reshape(5, 7)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         np.testing.assert_array_equal(darr[:0].copy_to_host(), arr[:0])
         np.testing.assert_array_equal(darr[3, :0].copy_to_host(), arr[3, :0])
         # empty slice of empty slice
@@ -225,28 +246,28 @@ class CudaArraySetting(CUDATestCase):
 
     def test_scalar(self):
         arr = np.arange(5 * 7).reshape(5, 7)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         arr[2, 2] = 500
         darr[2, 2] = 500
         np.testing.assert_array_equal(darr.copy_to_host(), arr)
 
     def test_rank(self):
         arr = np.arange(5 * 7).reshape(5, 7)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         arr[2] = 500
         darr[2] = 500
         np.testing.assert_array_equal(darr.copy_to_host(), arr)
 
     def test_broadcast(self):
         arr = np.arange(5 * 7).reshape(5, 7)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         arr[:, 2] = 500
         darr[:, 2] = 500
         np.testing.assert_array_equal(darr.copy_to_host(), arr)
 
     def test_array_assign_column(self):
         arr = np.arange(5 * 7).reshape(5, 7)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         _400 = np.full(shape=7, fill_value=400)
         arr[2] = _400
         darr[2] = _400
@@ -254,7 +275,7 @@ def test_array_assign_column(self):
 
     def test_array_assign_row(self):
         arr = np.arange(5 * 7).reshape(5, 7)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         _400 = np.full(shape=5, fill_value=400)
         arr[:, 2] = _400
         darr[:, 2] = _400
@@ -262,7 +283,7 @@ def test_array_assign_row(self):
 
     def test_array_assign_subarray(self):
         arr = np.arange(5 * 6 * 7).reshape(5, 6, 7)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         _400 = np.full(shape=(6, 7), fill_value=400)
         arr[2] = _400
         darr[2] = _400
@@ -270,7 +291,7 @@ def test_array_assign_subarray(self):
 
     def test_array_assign_deep_subarray(self):
         arr = np.arange(5 * 6 * 7 * 8).reshape(5, 6, 7, 8)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         _400 = np.full(shape=(5, 6, 8), fill_value=400)
         arr[:, :, 2] = _400
         darr[:, :, 2] = _400
@@ -278,7 +299,7 @@ def test_array_assign_deep_subarray(self):
 
     def test_array_assign_all(self):
         arr = np.arange(5 * 7).reshape(5, 7)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         _400 = np.full(shape=(5, 7), fill_value=400)
         arr[:] = _400
         darr[:] = _400
@@ -286,13 +307,13 @@ def test_array_assign_all(self):
 
     def test_strides(self):
         arr = np.ones(20)
-        darr = cuda.to_device(arr)
+        darr = _to_device(arr)
         arr[::2] = 500
         darr[::2] = 500
         np.testing.assert_array_equal(darr.copy_to_host(), arr)
 
     def test_incompatible_highdim(self):
-        darr = cuda.to_device(np.arange(5 * 7))
+        darr = _to_device(np.arange(5 * 7))
 
         with self.assertRaises(ValueError) as e:
             darr[:] = np.ones(shape=(1, 2, 3))
@@ -307,7 +328,7 @@ def test_incompatible_highdim(self):
         )
 
     def test_incompatible_shape(self):
-        darr = cuda.to_device(np.arange(5))
+        darr = _to_device(np.arange(5))
 
         with self.assertRaises(ValueError) as e:
             darr[:] = [1, 3]
@@ -325,7 +346,7 @@ def test_incompatible_shape(self):
     @skip_on_cudasim("cudasim does not use streams and operates synchronously")
     def test_sync(self):
         # There should be a synchronization when no stream is supplied
-        darr = cuda.to_device(np.arange(5))
+        darr = _to_device(np.arange(5))
 
         with patch.object(
             cuda.cudadrv.driver.Stream, "synchronize", return_value=None
@@ -347,7 +368,7 @@ def test_no_sync_default_stream(self):
         )
 
         for stream in streams:
-            darr = cuda.to_device(np.arange(5), stream=stream)
+            darr = _to_device(np.arange(5), stream=stream)
 
             with patch.object(
                 cuda.cudadrv.driver.Stream, "synchronize", return_value=None
@@ -369,7 +390,7 @@ def test_no_sync_supplied_stream(self):
         )
 
         for stream in streams:
-            darr = cuda.to_device(np.arange(5))
+            darr = _to_device(np.arange(5))
 
             with patch.object(
                 cuda.cudadrv.driver.Stream, "synchronize", return_value=None

From edf413d4cdac25091d035383efd4c0729f35c28c Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Fri, 14 Nov 2025 05:32:02 -0800
Subject: [PATCH 04/60] clean

---
 numba_cuda/numba/cuda/api.py                 | 6 ------
 numba_cuda/numba/cuda/cudadrv/devicearray.py | 1 -
 2 files changed, 7 deletions(-)

diff --git a/numba_cuda/numba/cuda/api.py b/numba_cuda/numba/cuda/api.py
index 526cc98b3..8700f960b 100644
--- a/numba_cuda/numba/cuda/api.py
+++ b/numba_cuda/numba/cuda/api.py
@@ -71,7 +71,6 @@ def as_cuda_array(obj, sync=True):
     If ``sync`` is ``True``, then the imported stream (if present) will be
     synchronized.
     """
-    breakpoint()
     if (
         interface := getattr(obj, "__cuda_array_interface__", None)
     ) is not None:
@@ -134,7 +133,6 @@ def to_device(obj, stream=0, copy=True, to=None):
 
         hary = d_ary.copy_to_host(stream=stream)
     """
-    breakpoint()
     warnings.warn(
         "to_device is deprecated. Please prefer cupy for moving numpy arrays to the device.",
         FutureWarning,
@@ -159,7 +157,6 @@ def device_array(shape, dtype=np.float64, strides=None, order="C", stream=0):
         "device_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
         FutureWarning,
     )
-    breakpoint()
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
     )
@@ -345,7 +342,6 @@ def device_array_like(ary, stream=0):
     Call :func:`device_array() <numba.cuda.device_array>` with information from
     the array.
     """
-    breakpoint()
     strides = _contiguous_strides_like_array(ary)
     order = _order_like_array(ary)
     return device_array(
@@ -362,7 +358,6 @@ def mapped_array_like(ary, stream=0, portable=False, wc=False):
     Call :func:`mapped_array() <numba.cuda.mapped_array>` with the information
     from the array.
     """
-    breakpoint()
     strides = _contiguous_strides_like_array(ary)
     order = _order_like_array(ary)
     return mapped_array(
@@ -381,7 +376,6 @@ def pinned_array_like(ary):
     Call :func:`pinned_array() <numba.cuda.pinned_array>` with the information
     from the array.
     """
-    breakpoint()
     strides = _contiguous_strides_like_array(ary)
     order = _order_like_array(ary)
     return pinned_array(
diff --git a/numba_cuda/numba/cuda/cudadrv/devicearray.py b/numba_cuda/numba/cuda/cudadrv/devicearray.py
index 84471c175..798351462 100644
--- a/numba_cuda/numba/cuda/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/cudadrv/devicearray.py
@@ -1020,7 +1020,6 @@ class DeviceNDArray(_DeviceNDArray):
     """
 
     def __init__(self, *args, **kwargs):
-        breakpoint()
         warnings.warn(
             "DeviceNDArray api is deprecated. Please prefer cupy for array functions",
             FutureWarning,

From 6e08f80a8564b49bd0d32c11fbf02e916cdbfc57 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 17 Nov 2025 08:23:23 -0800
Subject: [PATCH 05/60] more clean

---
 numba_cuda/numba/cuda/cudadrv/devicearray.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/numba_cuda/numba/cuda/cudadrv/devicearray.py b/numba_cuda/numba/cuda/cudadrv/devicearray.py
index 798351462..02949f7c6 100644
--- a/numba_cuda/numba/cuda/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/cudadrv/devicearray.py
@@ -369,7 +369,6 @@ def squeeze(self, axis=None, stream=0):
             Squeezed view into the array.
 
         """
-        breakpoint()
         return self._squeeze(axis=axis, stream=stream)
 
     def _squeeze(self, axis=None, stream=0):
@@ -681,7 +680,6 @@ def ravel(self, order="C", stream=0):
     @devices.require_context
     @deprecated_array_api
     def __getitem__(self, item):
-        breakpoint()
         return self._do_getitem(item)
 
     @devices.require_context

From 50683d0b626f709ae9199fd3d9fc986f876dfbc9 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Thu, 6 Nov 2025 13:22:13 -0800
Subject: [PATCH 06/60] working through more test cases

---
 numba_cuda/numba/cuda/api.py                  | 11 ++-
 numba_cuda/numba/cuda/cudadrv/devicearray.py  | 12 ++-
 numba_cuda/numba/cuda/testing.py              | 15 ++-
 .../cuda/tests/cudadrv/test_array_attr.py     | 58 ++++-------
 .../tests/cudadrv/test_cuda_array_slicing.py  | 95 ++++++++-----------
 .../tests/cudadrv/test_cuda_devicerecord.py   |  8 +-
 .../cuda/tests/cudadrv/test_cuda_ndarray.py   | 10 +-
 .../cuda/tests/cudadrv/test_host_alloc.py     |  4 +-
 .../cuda/tests/cudadrv/test_managed_alloc.py  |  4 +-
 .../numba/cuda/tests/cudapy/test_array.py     |  4 +-
 .../numba/cuda/tests/cudapy/test_datetime.py  | 14 ++-
 .../numba/cuda/tests/cudapy/test_debuginfo.py |  5 +-
 .../cuda/tests/cudapy/test_device_func.py     |  5 +-
 .../cuda/tests/cudapy/test_dispatcher.py      |  5 +-
 pyproject.toml                                |  1 +
 15 files changed, 122 insertions(+), 129 deletions(-)

diff --git a/numba_cuda/numba/cuda/api.py b/numba_cuda/numba/cuda/api.py
index 8700f960b..beac781c6 100644
--- a/numba_cuda/numba/cuda/api.py
+++ b/numba_cuda/numba/cuda/api.py
@@ -13,6 +13,7 @@
 from .cudadrv import devicearray, devices, driver
 from numba.cuda.core import config
 from numba.cuda.api_util import prepare_shape_strides_dtype
+from numba.cuda.cudadrv.devicearray import DeprecatedDeviceArrayApiWarning
 
 # NDarray device helper
 
@@ -135,7 +136,7 @@ def to_device(obj, stream=0, copy=True, to=None):
     """
     warnings.warn(
         "to_device is deprecated. Please prefer cupy for moving numpy arrays to the device.",
-        FutureWarning,
+        DeprecatedDeviceArrayApiWarning,
     )
     if to is None:
         to, new = devicearray.auto_device(
@@ -155,7 +156,7 @@ def device_array(shape, dtype=np.float64, strides=None, order="C", stream=0):
     """
     warnings.warn(
         "device_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
-        FutureWarning,
+        DeprecatedDeviceArrayApiWarning,
     )
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
@@ -191,7 +192,7 @@ def managed_array(
     """
     warnings.warn(
         "managed_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
-        FutureWarning,
+        DeprecatedDeviceArrayApiWarning,
     )
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
@@ -217,7 +218,7 @@ def pinned_array(shape, dtype=np.float64, strides=None, order="C"):
     """
     warnings.warn(
         "pinned_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
-        FutureWarning,
+        DeprecatedDeviceArrayApiWarning,
     )
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
@@ -253,7 +254,7 @@ def mapped_array(
     """
     warnings.warn(
         "mapped_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
-        FutureWarning,
+        DeprecatedDeviceArrayApiWarning,
     )
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
diff --git a/numba_cuda/numba/cuda/cudadrv/devicearray.py b/numba_cuda/numba/cuda/cudadrv/devicearray.py
index 02949f7c6..915b3f55c 100644
--- a/numba_cuda/numba/cuda/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/cudadrv/devicearray.py
@@ -36,12 +36,16 @@ def lru_cache(func):
         return func
 
 
+class DeprecatedDeviceArrayApiWarning(FutureWarning):
+    pass
+
+
 def deprecated_array_api(func):
     @functools.wraps(func)
     def wrapper(*args, **kwargs):
         warnings.warn(
             f"{func.__name__} api is deprecated. Please prefer cupy for array functions",
-            FutureWarning,
+            DeprecatedDeviceArrayApiWarning,
         )
         return func(*args, **kwargs)
 
@@ -869,7 +873,7 @@ def from_array_like(ary, stream=0, gpu_data=None):
 
     warnings.warn(
         "from_array_like is deprecated. Please prefer cupy for array functions",
-        FutureWarning,
+        DeprecatedDeviceArrayApiWarning,
     )
     return _from_array_like(ary, stream=stream, gpu_data=gpu_data)
 
@@ -1012,7 +1016,7 @@ class DeviceNDArray(_DeviceNDArray):
     """
     Deprecated public wrapper around the implementation class _DeviceNDArray.
 
-    Instantiating this class will emit a FutureWarning indicating that the
+    Instantiating this class will emit a DeprecatedDeviceArrayApiWarning indicating that the
     public name DeviceNDArray is deprecated. The implementation class is now
     named _DeviceNDArray; code should migrate to that name.
     """
@@ -1020,6 +1024,6 @@ class DeviceNDArray(_DeviceNDArray):
     def __init__(self, *args, **kwargs):
         warnings.warn(
             "DeviceNDArray api is deprecated. Please prefer cupy for array functions",
-            FutureWarning,
+            DeprecatedDeviceArrayApiWarning,
         )
         super().__init__(*args, **kwargs)
diff --git a/numba_cuda/numba/cuda/testing.py b/numba_cuda/numba/cuda/testing.py
index 04ccfcf09..ed138278a 100644
--- a/numba_cuda/numba/cuda/testing.py
+++ b/numba_cuda/numba/cuda/testing.py
@@ -13,12 +13,13 @@
 from numba.cuda import config
 from numba.cuda.tests.support import TestCase
 from pathlib import Path
-
+import warnings
 from typing import Iterable, Union
 from io import StringIO
 import unittest
 import numpy as np
 from numba.cuda import HAS_NUMBA
+from numba.cuda.cudadrv.devicearray import DeprecatedDeviceArrayApiWarning
 
 if PYVERSION >= (3, 10):
     from filecheck.matcher import Matcher
@@ -185,6 +186,18 @@ def assertFileCheckMatches(
             )
 
 
+class DeprecatedDeviceArrayApiTest(CUDATestCase):
+    def setUp(self):
+        warnings.filterwarnings(
+            "ignore", category=DeprecatedDeviceArrayApiWarning
+        )
+        super().setUp()
+
+    def tearDown(self):
+        warnings.resetwarnings()
+        super().tearDown()
+
+
 def skip_on_cudasim(reason):
     """Skip this test if running on the CUDA simulator"""
     assert isinstance(reason, str)
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py b/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py
index 9f5b79bbc..bdf6dda33 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py
@@ -1,39 +1,23 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: BSD-2-Clause
 
-import warnings
 import numpy as np
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    unittest,
+    DeprecatedDeviceArrayApiTest,
+    skip_on_cudasim,
+)
 
 
-class TestArrayAttr(CUDATestCase):
-    def _to_device(self, ary):
-        """
-        Helper wrapper around cuda.to_device that suppresses the specific
-        FutureWarning:
-
-            "to_device is deprecated. Please prefer cupy for moving numpy arrays to the device."
-
-        We use warnings.catch_warnings() and filterwarnings with a message
-        regex so we only silence that particular deprecation.
-        """
-        with warnings.catch_warnings():
-            # message is treated as a regex by filterwarnings
-            warnings.filterwarnings(
-                "ignore",
-                category=FutureWarning,
-                message=r".*to_device is deprecated.*",
-            )
-            return cuda.to_device(ary)
-
+class TestArrayAttr(DeprecatedDeviceArrayApiTest):
     def test_contigous_2d(self):
         ary = np.arange(10)
         cary = ary.reshape(2, 5)
         fary = np.asfortranarray(cary)
 
-        dcary = self._to_device(cary)
-        dfary = self._to_device(fary)
+        dcary = cuda.to_device(cary)
+        dfary = cuda.to_device(fary)
         self.assertTrue(dcary.is_c_contiguous())
         self.assertTrue(not dfary.is_c_contiguous())
         self.assertTrue(not dcary.is_f_contiguous())
@@ -44,8 +28,8 @@ def test_contigous_3d(self):
         cary = ary.reshape(2, 5, 2)
         fary = np.asfortranarray(cary)
 
-        dcary = self._to_device(cary)
-        dfary = self._to_device(fary)
+        dcary = cuda.to_device(cary)
+        dfary = cuda.to_device(fary)
         self.assertTrue(dcary.is_c_contiguous())
         self.assertTrue(not dfary.is_c_contiguous())
         self.assertTrue(not dcary.is_f_contiguous())
@@ -56,8 +40,8 @@ def test_contigous_4d(self):
         cary = ary.reshape(2, 5, 2, 3)
         fary = np.asfortranarray(cary)
 
-        dcary = self._to_device(cary)
-        dfary = self._to_device(fary)
+        dcary = cuda.to_device(cary)
+        dfary = cuda.to_device(fary)
         self.assertTrue(dcary.is_c_contiguous())
         self.assertTrue(not dfary.is_c_contiguous())
         self.assertTrue(not dcary.is_f_contiguous())
@@ -65,7 +49,7 @@ def test_contigous_4d(self):
 
     def test_ravel_1d(self):
         ary = np.arange(60)
-        dary = self._to_device(ary)
+        dary = cuda.to_device(ary)
         for order in "CFA":
             expect = ary.ravel(order=order)
             dflat = dary.ravel(order=order)
@@ -77,7 +61,7 @@ def test_ravel_1d(self):
     @skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
     def test_ravel_stride_1d(self):
         ary = np.arange(60)
-        dary = self._to_device(ary)
+        dary = cuda.to_device(ary)
         # No-copy stride device array
         darystride = dary[::2]
         dary_data = dary.__cuda_array_interface__["data"][0]
@@ -92,7 +76,7 @@ def test_ravel_c(self):
         reshaped = ary.reshape(2, 5, 2, 3)
 
         expect = reshaped.ravel(order="C")
-        dary = self._to_device(reshaped)
+        dary = cuda.to_device(reshaped)
         dflat = dary.ravel()
         flat = dflat.copy_to_host()
         self.assertTrue(dary is not dflat)
@@ -102,7 +86,7 @@ def test_ravel_c(self):
         # explicit order kwarg
         for order in "CA":
             expect = reshaped.ravel(order=order)
-            dary = self._to_device(reshaped)
+            dary = cuda.to_device(reshaped)
             dflat = dary.ravel(order=order)
             flat = dflat.copy_to_host()
             self.assertTrue(dary is not dflat)
@@ -114,7 +98,7 @@ def test_ravel_stride_c(self):
         ary = np.arange(60)
         reshaped = ary.reshape(2, 5, 2, 3)
 
-        dary = self._to_device(reshaped)
+        dary = cuda.to_device(reshaped)
         darystride = dary[::2, ::2, ::2, ::2]
         dary_data = dary.__cuda_array_interface__["data"][0]
         ddarystride_data = darystride.__cuda_array_interface__["data"][0]
@@ -127,7 +111,7 @@ def test_ravel_f(self):
         reshaped = np.asfortranarray(ary.reshape(2, 5, 2, 3))
         for order in "FA":
             expect = reshaped.ravel(order=order)
-            dary = self._to_device(reshaped)
+            dary = cuda.to_device(reshaped)
             dflat = dary.ravel(order=order)
             flat = dflat.copy_to_host()
             self.assertTrue(dary is not dflat)
@@ -138,7 +122,7 @@ def test_ravel_f(self):
     def test_ravel_stride_f(self):
         ary = np.arange(60)
         reshaped = np.asfortranarray(ary.reshape(2, 5, 2, 3))
-        dary = self._to_device(reshaped)
+        dary = cuda.to_device(reshaped)
         darystride = dary[::2, ::2, ::2, ::2]
         dary_data = dary.__cuda_array_interface__["data"][0]
         ddarystride_data = darystride.__cuda_array_interface__["data"][0]
@@ -149,7 +133,7 @@ def test_ravel_stride_f(self):
     def test_reshape_c(self):
         ary = np.arange(10)
         expect = ary.reshape(2, 5)
-        dary = self._to_device(ary)
+        dary = cuda.to_device(ary)
         dary_reshaped = dary.reshape(2, 5)
         got = dary_reshaped.copy_to_host()
         self.assertPreciseEqual(expect, got)
@@ -157,7 +141,7 @@ def test_reshape_c(self):
     def test_reshape_f(self):
         ary = np.arange(10)
         expect = ary.reshape(2, 5, order="F")
-        dary = self._to_device(ary)
+        dary = cuda.to_device(ary)
         dary_reshaped = dary.reshape(2, 5, order="F")
         got = dary_reshaped.copy_to_host()
         self.assertPreciseEqual(expect, got)
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py
index 8b6d28b36..55619aa95 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py
@@ -1,40 +1,23 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: BSD-2-Clause
 
-import warnings
 from itertools import product
 
 import numpy as np
 
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    unittest,
+    DeprecatedDeviceArrayApiTest,
+    skip_on_cudasim,
+)
 from unittest.mock import patch
 
 
-def _to_device(ary, **kwargs):
-    """
-    Module-level helper that wraps cuda.to_device and suppresses the specific
-    FutureWarning:
-
-        "to_device is deprecated. Please prefer cupy for moving numpy arrays to the device."
-
-    The helper uses warnings.catch_warnings() and filterwarnings with a
-    message regex so we only silence that particular deprecation. Any kwargs
-    (e.g. stream=...) are forwarded to cuda.to_device.
-    """
-    with warnings.catch_warnings():
-        warnings.filterwarnings(
-            "ignore",
-            category=FutureWarning,
-            message=r".*to_device is deprecated.*",
-        )
-        return cuda.to_device(ary, **kwargs)
-
-
-class CudaArrayIndexing(CUDATestCase):
+class CudaArrayIndexing(DeprecatedDeviceArrayApiTest):
     def test_index_1d(self):
         arr = np.arange(10)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         (x,) = arr.shape
         for i in range(-x, x):
             self.assertEqual(arr[i], darr[i])
@@ -45,7 +28,7 @@ def test_index_1d(self):
 
     def test_index_2d(self):
         arr = np.arange(3 * 4).reshape(3, 4)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         x, y = arr.shape
         for i in range(-x, x):
             for j in range(-y, y):
@@ -61,7 +44,7 @@ def test_index_2d(self):
 
     def test_index_3d(self):
         arr = np.arange(3 * 4 * 5).reshape(3, 4, 5)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         x, y, z = arr.shape
         for i in range(-x, x):
             for j in range(-y, y):
@@ -81,16 +64,16 @@ def test_index_3d(self):
             darr[0, 0, z]
 
 
-class CudaArrayStridedSlice(CUDATestCase):
+class CudaArrayStridedSlice(DeprecatedDeviceArrayApiTest):
     def test_strided_index_1d(self):
         arr = np.arange(10)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         for i in range(arr.size):
             np.testing.assert_equal(arr[i::2], darr[i::2].copy_to_host())
 
     def test_strided_index_2d(self):
         arr = np.arange(6 * 7).reshape(6, 7)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
 
         for i in range(arr.shape[0]):
             for j in range(arr.shape[1]):
@@ -100,7 +83,7 @@ def test_strided_index_2d(self):
 
     def test_strided_index_3d(self):
         arr = np.arange(6 * 7 * 8).reshape(6, 7, 8)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
 
         for i in range(arr.shape[0]):
             for j in range(arr.shape[1]):
@@ -111,10 +94,10 @@ def test_strided_index_3d(self):
                     )
 
 
-class CudaArraySlicing(CUDATestCase):
+class CudaArraySlicing(DeprecatedDeviceArrayApiTest):
     def test_prefix_1d(self):
         arr = np.arange(5)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         for i in range(arr.size):
             expect = arr[i:]
             got = darr[i:].copy_to_host()
@@ -122,7 +105,7 @@ def test_prefix_1d(self):
 
     def test_prefix_2d(self):
         arr = np.arange(3**2).reshape(3, 3)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         for i in range(arr.shape[0]):
             for j in range(arr.shape[1]):
                 expect = arr[i:, j:]
@@ -134,7 +117,7 @@ def test_prefix_2d(self):
 
     def test_select_3d_first_two_dim(self):
         arr = np.arange(3 * 4 * 5).reshape(3, 4, 5)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         # Select first dimension
         for i in range(arr.shape[0]):
             expect = arr[i]
@@ -155,7 +138,7 @@ def test_select_3d_first_two_dim(self):
 
     def test_select_f(self):
         a = np.arange(5 * 6 * 7).reshape(5, 6, 7, order="F")
-        da = _to_device(a)
+        da = cuda.to_device(a)
 
         for i in range(a.shape[0]):
             for j in range(a.shape[1]):
@@ -174,7 +157,7 @@ def test_select_f(self):
 
     def test_select_c(self):
         a = np.arange(5 * 6 * 7).reshape(5, 6, 7, order="C")
-        da = _to_device(a)
+        da = cuda.to_device(a)
 
         for i in range(a.shape[0]):
             for j in range(a.shape[1]):
@@ -194,18 +177,18 @@ def test_select_c(self):
     def test_prefix_select(self):
         arr = np.arange(5 * 7).reshape(5, 7, order="F")
 
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         self.assertTrue(np.all(darr[:1, 1].copy_to_host() == arr[:1, 1]))
 
     def test_negative_slicing_1d(self):
         arr = np.arange(10)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         for i, j in product(range(-10, 10), repeat=2):
             np.testing.assert_array_equal(arr[i:j], darr[i:j].copy_to_host())
 
     def test_negative_slicing_2d(self):
         arr = np.arange(12).reshape(3, 4)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         for x, y, w, s in product(range(-4, 4), repeat=4):
             np.testing.assert_array_equal(
                 arr[x:y, w:s], darr[x:y, w:s].copy_to_host()
@@ -213,7 +196,7 @@ def test_negative_slicing_2d(self):
 
     def test_empty_slice_1d(self):
         arr = np.arange(5)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         for i in range(darr.shape[0]):
             np.testing.assert_array_equal(darr[i:i].copy_to_host(), arr[i:i])
         # empty slice of empty slice
@@ -226,7 +209,7 @@ def test_empty_slice_1d(self):
 
     def test_empty_slice_2d(self):
         arr = np.arange(5 * 7).reshape(5, 7)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         np.testing.assert_array_equal(darr[:0].copy_to_host(), arr[:0])
         np.testing.assert_array_equal(darr[3, :0].copy_to_host(), arr[3, :0])
         # empty slice of empty slice
@@ -238,7 +221,7 @@ def test_empty_slice_2d(self):
         )
 
 
-class CudaArraySetting(CUDATestCase):
+class CudaArraySetting(DeprecatedDeviceArrayApiTest):
     """
     Most of the slicing logic is tested in the cases above, so these
     tests focus on the setting logic.
@@ -246,28 +229,28 @@ class CudaArraySetting(CUDATestCase):
 
     def test_scalar(self):
         arr = np.arange(5 * 7).reshape(5, 7)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         arr[2, 2] = 500
         darr[2, 2] = 500
         np.testing.assert_array_equal(darr.copy_to_host(), arr)
 
     def test_rank(self):
         arr = np.arange(5 * 7).reshape(5, 7)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         arr[2] = 500
         darr[2] = 500
         np.testing.assert_array_equal(darr.copy_to_host(), arr)
 
     def test_broadcast(self):
         arr = np.arange(5 * 7).reshape(5, 7)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         arr[:, 2] = 500
         darr[:, 2] = 500
         np.testing.assert_array_equal(darr.copy_to_host(), arr)
 
     def test_array_assign_column(self):
         arr = np.arange(5 * 7).reshape(5, 7)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         _400 = np.full(shape=7, fill_value=400)
         arr[2] = _400
         darr[2] = _400
@@ -275,7 +258,7 @@ def test_array_assign_column(self):
 
     def test_array_assign_row(self):
         arr = np.arange(5 * 7).reshape(5, 7)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         _400 = np.full(shape=5, fill_value=400)
         arr[:, 2] = _400
         darr[:, 2] = _400
@@ -283,7 +266,7 @@ def test_array_assign_row(self):
 
     def test_array_assign_subarray(self):
         arr = np.arange(5 * 6 * 7).reshape(5, 6, 7)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         _400 = np.full(shape=(6, 7), fill_value=400)
         arr[2] = _400
         darr[2] = _400
@@ -291,7 +274,7 @@ def test_array_assign_subarray(self):
 
     def test_array_assign_deep_subarray(self):
         arr = np.arange(5 * 6 * 7 * 8).reshape(5, 6, 7, 8)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         _400 = np.full(shape=(5, 6, 8), fill_value=400)
         arr[:, :, 2] = _400
         darr[:, :, 2] = _400
@@ -299,7 +282,7 @@ def test_array_assign_deep_subarray(self):
 
     def test_array_assign_all(self):
         arr = np.arange(5 * 7).reshape(5, 7)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         _400 = np.full(shape=(5, 7), fill_value=400)
         arr[:] = _400
         darr[:] = _400
@@ -307,13 +290,13 @@ def test_array_assign_all(self):
 
     def test_strides(self):
         arr = np.ones(20)
-        darr = _to_device(arr)
+        darr = cuda.to_device(arr)
         arr[::2] = 500
         darr[::2] = 500
         np.testing.assert_array_equal(darr.copy_to_host(), arr)
 
     def test_incompatible_highdim(self):
-        darr = _to_device(np.arange(5 * 7))
+        darr = cuda.to_device(np.arange(5 * 7))
 
         with self.assertRaises(ValueError) as e:
             darr[:] = np.ones(shape=(1, 2, 3))
@@ -328,7 +311,7 @@ def test_incompatible_highdim(self):
         )
 
     def test_incompatible_shape(self):
-        darr = _to_device(np.arange(5))
+        darr = cuda.to_device(np.arange(5))
 
         with self.assertRaises(ValueError) as e:
             darr[:] = [1, 3]
@@ -346,7 +329,7 @@ def test_incompatible_shape(self):
     @skip_on_cudasim("cudasim does not use streams and operates synchronously")
     def test_sync(self):
         # There should be a synchronization when no stream is supplied
-        darr = _to_device(np.arange(5))
+        darr = cuda.to_device(np.arange(5))
 
         with patch.object(
             cuda.cudadrv.driver.Stream, "synchronize", return_value=None
@@ -368,7 +351,7 @@ def test_no_sync_default_stream(self):
         )
 
         for stream in streams:
-            darr = _to_device(np.arange(5), stream=stream)
+            darr = cuda.to_device(np.arange(5), stream=stream)
 
             with patch.object(
                 cuda.cudadrv.driver.Stream, "synchronize", return_value=None
@@ -390,7 +373,7 @@ def test_no_sync_supplied_stream(self):
         )
 
         for stream in streams:
-            darr = _to_device(np.arange(5))
+            darr = cuda.to_device(np.arange(5))
 
             with patch.object(
                 cuda.cudadrv.driver.Stream, "synchronize", return_value=None
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py
index 36f21cc92..0da0ca0f0 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py
@@ -8,7 +8,7 @@
     from_record_like,
     auto_device,
 )
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 from numba.cuda.testing import skip_on_cudasim
 from numba.cuda.np import numpy_support
 from numba import cuda
@@ -33,7 +33,7 @@
 
 
 @skip_on_cudasim("Device Record API unsupported in the simulator")
-class TestCudaDeviceRecord(CUDATestCase):
+class TestCudaDeviceRecord(DeprecatedDeviceArrayApiTest):
     """
     Tests the DeviceRecord class with np.void host types.
     """
@@ -113,12 +113,12 @@ class TestCudaDeviceRecordWithRecord(TestCudaDeviceRecord):
     """
 
     def setUp(self):
-        CUDATestCase.setUp(self)
+        DeprecatedDeviceArrayApiTest.setUp(self)
         self._create_data(np.recarray)
 
 
 @skip_on_cudasim("Structured array attr access not supported in simulator")
-class TestRecordDtypeWithStructArrays(CUDATestCase):
+class TestRecordDtypeWithStructArrays(DeprecatedDeviceArrayApiTest):
     """
     Test operation of device arrays on structured arrays.
     """
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py
index 7a3af9511..11f46b11a 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py
@@ -5,12 +5,12 @@
 import numpy as np
 from numba.cuda.cudadrv import devicearray
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 from numba.cuda.testing import skip_on_cudasim
 from numba.cuda.tests.support import IS_NUMPY_2
 
 
-class TestCudaNDArray(CUDATestCase):
+class TestCudaNDArray(DeprecatedDeviceArrayApiTest):
     def test_device_array_interface(self):
         dary = cuda.device_array(shape=100)
         devicearray.verify_cuda_ndarray_interface(dary)
@@ -496,7 +496,7 @@ def test_issue_8477(self):
         dev_array_from_host.copy_to_device(dev_array)
 
 
-class TestArrayMethod(CUDATestCase):
+class TestArrayMethod(DeprecatedDeviceArrayApiTest):
     """Tests of the __array__() method via np.array"""
 
     def test_np_array(self):
@@ -527,7 +527,7 @@ def test_np_array_copy_true(self):
         np.testing.assert_equal(dev_array.copy_to_host(), host_array)
 
 
-class TestRecarray(CUDATestCase):
+class TestRecarray(DeprecatedDeviceArrayApiTest):
     def test_recarray(self):
         # From issue #4111
         a = np.recarray(
@@ -557,7 +557,7 @@ def test(x, out1, out2):
         np.testing.assert_array_equal(expect2, got2)
 
 
-class TestCoreContiguous(CUDATestCase):
+class TestCoreContiguous(DeprecatedDeviceArrayApiTest):
     def _test_against_array_core(self, view):
         self.assertEqual(
             devicearray.is_contiguous(view),
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py b/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py
index a4363fc44..381a05dd8 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py
@@ -4,10 +4,10 @@
 import numpy as np
 from numba.cuda.cudadrv import driver
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 
 
-class TestHostAlloc(CUDATestCase):
+class TestHostAlloc(DeprecatedDeviceArrayApiTest):
     def tearDown(self):
         cuda.current_context().reset()
 
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py b/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py
index e768d9dbb..6cec7005a 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py
@@ -4,7 +4,7 @@
 import numpy as np
 from numba.cuda.cudadrv.driver import device_memset, driver
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 from numba.cuda.testing import skip_on_cudasim, skip_on_arm
 from numba.cuda.tests.support import linux_only
 
@@ -12,7 +12,7 @@
 @skip_on_cudasim("CUDA Driver API unsupported in the simulator")
 @linux_only
 @skip_on_arm("Managed Alloc support is experimental/untested on ARM")
-class TestManagedAlloc(CUDATestCase):
+class TestManagedAlloc(DeprecatedDeviceArrayApiTest):
     def tearDown(self):
         super().tearDown()
         cuda.current_context().reset()
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array.py b/numba_cuda/numba/cuda/tests/cudapy/test_array.py
index efee7339b..f9f5c1704 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 from numba.cuda.testing import skip_on_cudasim, skip_unless_cudasim
 from numba import cuda
 from numba.cuda import config
@@ -44,7 +44,7 @@ def array_reshape(arr, newshape):
     return arr.reshape(newshape)
 
 
-class TestCudaArray(CUDATestCase):
+class TestCudaArray(DeprecatedDeviceArrayApiTest):
     def test_gpu_array_zero_length(self):
         x = np.arange(0)
         dx = cuda.to_device(x)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py b/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
index 5de232456..19177a3b5 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
@@ -82,20 +82,26 @@ def timediff(start, end, out):
     @skip_on_cudasim("no .copy_to_host() in the simulator")
     def test_datetime_view_as_int64(self):
         arr = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
-        darr = cuda.to_device(arr)
+        darr = cp.asarray(arr)
         viewed = darr.view(np.int64)
         self.assertPreciseEqual(arr.view(np.int64), viewed.copy_to_host())
-        self.assertEqual(viewed.gpu_data, darr.gpu_data)
+        self.assertEqual(
+            viewed.__cuda_array_interface__["descr"][0],
+            darr.__cuda_array_interface__["descr"][0],
+        )
 
     @skip_on_cudasim("no .copy_to_host() in the simulator")
     def test_timedelta_view_as_int64(self):
         arr = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
         arr = arr - (arr - 1)
         self.assertEqual(arr.dtype, np.dtype("timedelta64[D]"))
-        darr = cuda.to_device(arr)
+        darr = cp.asarray(arr)
         viewed = darr.view(np.int64)
         self.assertPreciseEqual(arr.view(np.int64), viewed.copy_to_host())
-        self.assertEqual(viewed.gpu_data, darr.gpu_data)
+        self.assertEqual(
+            viewed.__cuda_array_interface__["descr"][0],
+            darr.__cuda_array_interface__["descr"][0],
+        )
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
index 4b1d33514..9e823c5d3 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
@@ -17,6 +17,7 @@
 from numba.cuda.tests.support import ignore_internal_warnings
 import numpy as np
 import inspect
+import cupy as cp
 
 
 @skip_on_cudasim("Simulator does not produce debug dumps")
@@ -454,7 +455,7 @@ def a_union_use_case(arg, results):
             results[0] = 1 if not bar else 0
 
         with captured_stdout() as out:
-            results = cuda.to_device(np.zeros(16, dtype=np.int64))
+            results = cp.zeros(16, dtype=np.int64)
             a_union_use_case[1, 1](100, results)
             print(results.copy_to_host())
         expected = "[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]"
@@ -638,7 +639,7 @@ def foo(x, y):
 
         # check it compiles
         with override_config("DEBUGINFO_DEFAULT", 1):
-            result = cuda.device_array(1, dtype=np.float32)
+            result = cp.ones(1, dtype=np.float32)
             foo[1, 1](result, np.pi)
             result.copy_to_host()
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
index 3fa62728b..906f6eab8 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
@@ -23,6 +23,7 @@
 
 if HAS_NUMBA:
     from numba import jit
+import cupy as cp
 
 
 class TestDeviceFunc(CUDATestCase):
@@ -207,9 +208,7 @@ def rgba_caller(x, channels):
             x[0] = rgba(channels[0], channels[1], channels[2], channels[3])
 
         x = cuda.device_array(1, dtype=np.int32)
-        channels = cuda.to_device(
-            np.asarray([1.0, 2.0, 3.0, 4.0], dtype=np.float32)
-        )
+        channels = cp.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32)
 
         rgba_caller[1, 1](x, channels)
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py b/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
index dd9180197..9be03425a 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
@@ -24,6 +24,7 @@
     CUDATestCase,
 )
 import math
+import cupy as cp
 
 
 def add(x, y):
@@ -500,8 +501,8 @@ def axpy(r, a, x, y, n):
         a = 5
         hx = np.arange(10, dtype=np.int32)
         hy = np.arange(10, dtype=np.int32) * 2
-        dx = cuda.to_device(hx)
-        dy = cuda.to_device(hy)
+        dx = cp.array(hx)
+        dy = cp.array(hy)
         dr = cuda.device_array_like(dx)
 
         r_ptr = dr.__cuda_array_interface__["data"][0]
diff --git a/pyproject.toml b/pyproject.toml
index fa6b1ed82..2bf8101cc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -57,6 +57,7 @@ test = [
     "filecheck",
     "ml_dtypes",
     "statistics",
+    "cupy"
 ]
 test-cu12 = [
     "nvidia-curand-cu12",

From e44516befcb760c92467a1bd6d2e5b224e5f268b Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 18 Nov 2025 11:31:28 -0800
Subject: [PATCH 07/60] working out class relationships

---
 numba_cuda/numba/cuda/_api.py                 | 356 ++++++++++++++++++
 numba_cuda/numba/cuda/api.py                  |   6 +-
 numba_cuda/numba/cuda/cudadrv/devicearray.py  |  13 +
 numba_cuda/numba/cuda/deviceufunc.py          |   4 +-
 .../numba/cuda/memory_management/nrt.py       |  10 +-
 numba_cuda/numba/cuda/random.py               |   3 +-
 .../tests/cudadrv/test_cuda_auto_context.py   |   4 +-
 .../cuda/tests/cudadrv/test_cuda_driver.py    |   6 +-
 .../cuda/tests/cudadrv/test_deallocations.py  |  22 +-
 .../cuda/tests/cudadrv/test_emm_plugins.py    |   4 +-
 .../numba/cuda/tests/cudadrv/test_pinned.py   |   4 +-
 .../numba/cuda/tests/cudadrv/test_profiler.py |   5 +-
 .../cuda/tests/cudadrv/test_select_device.py  |   2 +-
 .../cuda/tests/cudapy/test_array_methods.py   |   5 +-
 .../tests/cudapy/test_array_reductions.py     |  85 ++---
 .../cuda/tests/cudapy/test_blackscholes.py    |  15 +-
 .../tests/cudapy/test_cuda_array_interface.py |   3 +-
 .../tests/cudapy/test_cuda_jit_no_types.py    |  20 +-
 .../numba/cuda/tests/cudapy/test_datetime.py  |   1 -
 .../numba/cuda/tests/cudapy/test_debuginfo.py |   4 +-
 .../cuda/tests/cudapy/test_device_func.py     |   2 +-
 .../numba/cuda/tests/cudapy/test_gufunc.py    |   1 +
 .../numba/cuda/tests/cudapy/test_laplace.py   |  35 +-
 .../numba/cuda/tests/cudapy/test_random.py    |   2 +-
 .../numba/cuda/tests/cudapy/test_reduction.py |   5 +-
 .../numba/cuda/tests/cudapy/test_slicing.py   |   4 +-
 numba_cuda/numba/cuda/tests/cudapy/test_sm.py |   7 +-
 .../numba/cuda/tests/cudapy/test_ssa.py       |   5 +-
 .../numba/cuda/tests/cudapy/test_transpose.py |   5 +-
 .../cuda/tests/doc_examples/test_laplace.py   |   7 +-
 .../cuda/tests/doc_examples/test_matmul.py    |  23 +-
 .../tests/doc_examples/test_montecarlo.py     |   3 +-
 .../cuda/tests/doc_examples/test_random.py    |   4 +-
 .../tests/doc_examples/test_sessionize.py     |  16 +-
 .../cuda/tests/doc_examples/test_vecadd.py    |  11 +-
 numba_cuda/numba/cuda/vectorizers.py          |  22 +-
 36 files changed, 548 insertions(+), 176 deletions(-)
 create mode 100644 numba_cuda/numba/cuda/_api.py

diff --git a/numba_cuda/numba/cuda/_api.py b/numba_cuda/numba/cuda/_api.py
new file mode 100644
index 000000000..7555efcc1
--- /dev/null
+++ b/numba_cuda/numba/cuda/_api.py
@@ -0,0 +1,356 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-2-Clause
+
+"""
+API that are reported to numba.cuda
+"""
+
+import contextlib
+import os
+
+import numpy as np
+import warnings
+from .cudadrv import devicearray, devices, driver
+from numba.cuda.core import config
+from numba.cuda.api_util import prepare_shape_strides_dtype
+from numba.cuda.cudadrv.devicearray import DeprecatedDeviceArrayApiWarning
+
+# NDarray device helper
+
+require_context = devices.require_context
+current_context = devices.get_context
+gpus = devices.gpus
+
+
+def _from_cuda_array_interface(desc, owner=None, sync=True):
+    """Create a _DeviceNDArray from a cuda-array-interface description.
+    The ``owner`` is the owner of the underlying memory.
+    The resulting _DeviceNDArray will acquire a reference from it.
+
+    If ``sync`` is ``True``, then the imported stream (if present) will be
+    synchronized.
+    """
+    version = desc.get("version")
+    # Mask introduced in version 1
+    if 1 <= version:
+        mask = desc.get("mask")
+        # Would ideally be better to detect if the mask is all valid
+        if mask is not None:
+            raise NotImplementedError("Masked arrays are not supported")
+
+    shape = desc["shape"]
+    strides = desc.get("strides")
+
+    shape, strides, dtype = prepare_shape_strides_dtype(
+        shape, strides, desc["typestr"], order="C"
+    )
+    size = driver.memory_size_from_info(shape, strides, dtype.itemsize)
+
+    cudevptr_class = driver.binding.CUdeviceptr
+    devptr = cudevptr_class(desc["data"][0])
+    data = driver.MemoryPointer(devptr, size=size, owner=owner)
+    stream_ptr = desc.get("stream", None)
+    if stream_ptr is not None:
+        stream = external_stream(stream_ptr)
+        if sync and config.CUDA_ARRAY_INTERFACE_SYNC:
+            stream.synchronize()
+    else:
+        stream = 0  # No "Numba default stream", not the CUDA default stream
+    da = devicearray._DeviceNDArray(
+        shape=shape, strides=strides, dtype=dtype, gpu_data=data, stream=stream
+    )
+    return da
+
+
+def _as_cuda_array(obj, sync=True):
+    """Create a _DeviceNDArray from any object that implements
+    the :ref:`cuda array interface <cuda-array-interface>`.
+
+    A view of the underlying GPU buffer is created.  No copying of the data
+    is done.  The resulting _DeviceNDArray will acquire a reference from `obj`.
+
+    If ``sync`` is ``True``, then the imported stream (if present) will be
+    synchronized.
+    """
+    if (
+        interface := getattr(obj, "__cuda_array_interface__", None)
+    ) is not None:
+        return from_cuda_array_interface(interface, owner=obj, sync=sync)
+    raise TypeError("*obj* doesn't implement the cuda array interface.")
+
+
+def _is_cuda_array(obj):
+    """Test if the object has defined the `__cuda_array_interface__` attribute.
+
+    Does not verify the validity of the interface.
+    """
+    return hasattr(obj, "__cuda_array_interface__")
+
+
+
+@require_context
+def _to_device(obj, stream=0, copy=True, to=None):
+    """to_device(obj, stream=0, copy=True, to=None)
+
+    Allocate and transfer a numpy ndarray or structured scalar to the device.
+
+    To copy host->device a numpy array::
+
+        ary = np.arange(10)
+        d_ary = cuda.to_device(ary)
+
+    To enqueue the transfer to a stream::
+
+        stream = cuda.stream()
+        d_ary = cuda.to_device(ary, stream=stream)
+
+    The resulting ``d_ary`` is a ``DeviceNDArray``.
+
+    To copy device->host::
+
+        hary = d_ary.copy_to_host()
+
+    To copy device->host to an existing array::
+
+        ary = np.empty(shape=d_ary.shape, dtype=d_ary.dtype)
+        d_ary.copy_to_host(ary)
+
+    To enqueue the transfer to a stream::
+
+        hary = d_ary.copy_to_host(stream=stream)
+    """
+    if to is None:
+        to, new = devicearray.auto_device(
+            obj, stream=stream, copy=copy, user_explicit=True
+        )
+        return to
+    if copy:
+        to.copy_to_device(obj, stream=stream)
+    return to
+
+
+@require_context
+def _device_array(shape, dtype=np.float64, strides=None, order="C", stream=0):
+    """device_array(shape, dtype=np.float64, strides=None, order='C', stream=0)
+
+    Allocate an empty device ndarray. Similar to :meth:`numpy.empty`.
+    """
+    shape, strides, dtype = prepare_shape_strides_dtype(
+        shape, strides, dtype, order
+    )
+    return devicearray._DeviceNDArray(
+        shape=shape, strides=strides, dtype=dtype, stream=stream
+    )
+
+
+@require_context
+def _managed_array(
+    shape,
+    dtype=np.float64,
+    strides=None,
+    order="C",
+    stream=0,
+    attach_global=True,
+):
+    """managed_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
+                     attach_global=True)
+
+    Allocate a np.ndarray with a buffer that is managed.
+    Similar to np.empty().
+
+    Managed memory is supported on Linux / x86 and PowerPC, and is considered
+    experimental on Windows and Linux / AArch64.
+
+    :param attach_global: A flag indicating whether to attach globally. Global
+                          attachment implies that the memory is accessible from
+                          any stream on any device. If ``False``, attachment is
+                          *host*, and memory is only accessible by devices
+                          with Compute Capability 6.0 and later.
+    """
+    shape, strides, dtype = prepare_shape_strides_dtype(
+        shape, strides, dtype, order
+    )
+    bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
+    buffer = current_context().memallocmanaged(
+        bytesize, attach_global=attach_global
+    )
+    npary = np.ndarray(
+        shape=shape, strides=strides, dtype=dtype, order=order, buffer=buffer
+    )
+    managedview = np.ndarray.view(npary, type=devicearray.ManagedNDArray)
+    managedview.device_setup(buffer, stream=stream)
+    return managedview
+
+
+@require_context
+def _pinned_array(shape, dtype=np.float64, strides=None, order="C"):
+    """pinned_array(shape, dtype=np.float64, strides=None, order='C')
+
+    Allocate an :class:`ndarray <numpy.ndarray>` with a buffer that is pinned
+    (pagelocked).  Similar to :func:`np.empty() <numpy.empty>`.
+    """
+    warnings.warn(
+        "pinned_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
+        DeprecatedDeviceArrayApiWarning,
+    )
+    shape, strides, dtype = prepare_shape_strides_dtype(
+        shape, strides, dtype, order
+    )
+    bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
+    buffer = current_context().memhostalloc(bytesize)
+    return np.ndarray(
+        shape=shape, strides=strides, dtype=dtype, order=order, buffer=buffer
+    )
+
+
+@require_context
+def _mapped_array(
+    shape,
+    dtype=np.float64,
+    strides=None,
+    order="C",
+    stream=0,
+    portable=False,
+    wc=False,
+):
+    """mapped_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
+                    portable=False, wc=False)
+
+    Allocate a mapped ndarray with a buffer that is pinned and mapped on
+    to the device. Similar to np.empty()
+
+    :param portable: a boolean flag to allow the allocated device memory to be
+              usable in multiple devices.
+    :param wc: a boolean flag to enable writecombined allocation which is faster
+        to write by the host and to read by the device, but slower to
+        write by the host and slower to write by the device.
+    """
+    warnings.warn(
+        "mapped_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
+        DeprecatedDeviceArrayApiWarning,
+    )
+    shape, strides, dtype = prepare_shape_strides_dtype(
+        shape, strides, dtype, order
+    )
+    bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
+    buffer = current_context().memhostalloc(bytesize, mapped=True)
+    npary = np.ndarray(
+        shape=shape, strides=strides, dtype=dtype, order=order, buffer=buffer
+    )
+    mappedview = np.ndarray.view(npary, type=devicearray.MappedNDArray)
+    mappedview.device_setup(buffer, stream=stream)
+    return mappedview
+
+
+@contextlib.contextmanager
+@require_context
+def _open_ipc_array(handle, shape, dtype, strides=None, offset=0):
+    """
+    A context manager that opens a IPC *handle* (*CUipcMemHandle*) that is
+    represented as a sequence of bytes (e.g. *bytes*, tuple of int)
+    and represent it as an array of the given *shape*, *strides* and *dtype*.
+    The *strides* can be omitted.  In that case, it is assumed to be a 1D
+    C contiguous array.
+
+    Yields a device array.
+
+    The IPC handle is closed automatically when context manager exits.
+    """
+    dtype = np.dtype(dtype)
+    # compute size
+    size = np.prod(shape) * dtype.itemsize
+    # manually recreate the IPC mem handle
+    driver_handle = driver.binding.CUipcMemHandle()
+    driver_handle.reserved = handle
+    # use *IpcHandle* to open the IPC memory
+    ipchandle = driver.IpcHandle(None, driver_handle, size, offset=offset)
+    yield ipchandle.open_array(
+        current_context(), shape=shape, strides=strides, dtype=dtype
+    )
+    ipchandle.close()
+
+
+
+def _contiguous_strides_like_array(ary):
+    """
+    Given an array, compute strides for a new contiguous array of the same
+    shape.
+    """
+    # Don't recompute strides if the default strides will be sufficient to
+    # create a contiguous array.
+    if ary.flags["C_CONTIGUOUS"] or ary.flags["F_CONTIGUOUS"] or ary.ndim <= 1:
+        return None
+
+    # Otherwise, we need to compute new strides using an algorithm adapted from
+    # NumPy v1.17.4's PyArray_NewLikeArrayWithShape in
+    # core/src/multiarray/ctors.c. We permute the strides in ascending order
+    # then compute the stride for the dimensions with the same permutation.
+
+    # Stride permutation. E.g. a stride array (4, -2, 12) becomes
+    # [(1, -2), (0, 4), (2, 12)]
+    strideperm = [x for x in enumerate(ary.strides)]
+    strideperm.sort(key=lambda x: x[1])
+
+    # Compute new strides using permutation
+    strides = [0] * len(ary.strides)
+    stride = ary.dtype.itemsize
+    for i_perm, _ in strideperm:
+        strides[i_perm] = stride
+        stride *= ary.shape[i_perm]
+    return tuple(strides)
+
+
+def _order_like_array(ary):
+    if ary.flags["F_CONTIGUOUS"] and not ary.flags["C_CONTIGUOUS"]:
+        return "F"
+    else:
+        return "C"
+
+
+def _device_array_like(ary, stream=0):
+    """
+    Call :func:`device_array() <numba.cuda.device_array>` with information from
+    the array.
+    """
+    strides = _contiguous_strides_like_array(ary)
+    order = _order_like_array(ary)
+    return device_array(
+        shape=ary.shape,
+        dtype=ary.dtype,
+        strides=strides,
+        order=order,
+        stream=stream,
+    )
+
+
+def _mapped_array_like(ary, stream=0, portable=False, wc=False):
+    """
+    Call :func:`mapped_array() <numba.cuda.mapped_array>` with the information
+    from the array.
+    """
+    strides = _contiguous_strides_like_array(ary)
+    order = _order_like_array(ary)
+    return mapped_array(
+        shape=ary.shape,
+        dtype=ary.dtype,
+        strides=strides,
+        order=order,
+        stream=stream,
+        portable=portable,
+        wc=wc,
+    )
+
+
+def _pinned_array_like(ary):
+    """
+    Call :func:`pinned_array() <numba.cuda.pinned_array>` with the information
+    from the array.
+    """
+    strides = _contiguous_strides_like_array(ary)
+    order = _order_like_array(ary)
+    return pinned_array(
+        shape=ary.shape, dtype=ary.dtype, strides=strides, order=order
+    )
+
+
+
diff --git a/numba_cuda/numba/cuda/api.py b/numba_cuda/numba/cuda/api.py
index beac781c6..c33a5977a 100644
--- a/numba_cuda/numba/cuda/api.py
+++ b/numba_cuda/numba/cuda/api.py
@@ -158,10 +158,14 @@ def device_array(shape, dtype=np.float64, strides=None, order="C", stream=0):
         "device_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
         DeprecatedDeviceArrayApiWarning,
     )
+    return _device_array(shape, dtype=dtype, strides=strides, order=order, stream=stream)
+
+
+def _device_array(shape, dtype=np.float64, strides=None, order="C", stream=0):
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
     )
-    return devicearray.DeviceNDArray(
+    return devicearray._DeviceNDArray(
         shape=shape, strides=strides, dtype=dtype, stream=stream
     )
 
diff --git a/numba_cuda/numba/cuda/cudadrv/devicearray.py b/numba_cuda/numba/cuda/cudadrv/devicearray.py
index 915b3f55c..4a19dfefe 100644
--- a/numba_cuda/numba/cuda/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/cudadrv/devicearray.py
@@ -1022,8 +1022,21 @@ class DeviceNDArray(_DeviceNDArray):
     """
 
     def __init__(self, *args, **kwargs):
+        breakpoint()
         warnings.warn(
             "DeviceNDArray api is deprecated. Please prefer cupy for array functions",
             DeprecatedDeviceArrayApiWarning,
         )
         super().__init__(*args, **kwargs)
+
+    @classmethod
+    def _legacy_ctor(cls, *args, **kwargs):
+        """
+        Legacy constructor that does not emit a deprecation warning.
+        Useful for APIs like vectorize and guvectorize that need to
+        continue to return the deprecated class right now for backwards
+        compatibility.
+        """
+        instance = cls.__new__(cls)
+        _DeviceNDArray.__init__(instance, *args, **kwargs)
+        return instance 
diff --git a/numba_cuda/numba/cuda/deviceufunc.py b/numba_cuda/numba/cuda/deviceufunc.py
index ce8172378..44e887254 100644
--- a/numba_cuda/numba/cuda/deviceufunc.py
+++ b/numba_cuda/numba/cuda/deviceufunc.py
@@ -369,7 +369,7 @@ def attempt_ravel(a):
         if out is None:
             # No output is provided
             devout = cr.allocate_device_array(shape, resty, stream=stream)
-
+            breakpoint()
             devarys.extend([devout])
             cr.launch(func, shape[0], stream, devarys)
 
@@ -806,7 +806,7 @@ def _broadcast(self, schedule, params, retvals):
 
         newretvals = []
         for retval, oshape in zip(retvals, schedule.oshapes):
-            newretvals.append(retval.reshape(odim, *oshape))
+            newretvals.append(retval._reshape(odim, *oshape))
         return tuple(newparams) + tuple(newretvals)
 
     def _broadcast_array(self, ary, newdim, innerdim):
diff --git a/numba_cuda/numba/cuda/memory_management/nrt.py b/numba_cuda/numba/cuda/memory_management/nrt.py
index cf2db24b3..80ebf70e9 100644
--- a/numba_cuda/numba/cuda/memory_management/nrt.py
+++ b/numba_cuda/numba/cuda/memory_management/nrt.py
@@ -151,7 +151,7 @@ def allocate(self, stream=None):
         """
         Allocate memsys on global memory
         """
-        from numba.cuda import device_array
+        from numba.cuda._api import _device_array
 
         # Check if memsys module is defined
         if self._memsys_module is None:
@@ -166,7 +166,7 @@ def allocate(self, stream=None):
         driver.cuMemcpyDtoH(
             ctypes.addressof(memsys_size), device_memsys_size, nbytes
         )
-        self._memsys = device_array(
+        self._memsys = _device_array(
             (memsys_size.value,), dtype="i1", stream=stream
         )
         self.set_memsys_to_module(self._memsys_module, stream=stream)
@@ -241,7 +241,7 @@ def memsys_stats_enabled(self, stream=None):
         Return a boolean indicating whether memsys is enabled. Synchronizes
         context
         """
-        enabled_ar = cuda.managed_array(1, np.uint8)
+        enabled_ar = cuda._api._managed_array(1, np.uint8)
         enabled_ptr = enabled_ar.device_ctypes_pointer
 
         self._single_thread_launch(
@@ -268,7 +268,7 @@ def _copy_memsys_to_host(self, stream):
             ]
         )
 
-        stats_for_read = cuda.managed_array(1, dt)
+        stats_for_read = cuda._api._managed_array(1, dt)
         stats_ptr = stats_for_read.device_ctypes_pointer
 
         self._single_thread_launch(
@@ -299,7 +299,7 @@ def _get_single_stat(self, stat, stream=None):
         """
         Get a single stat from the memsys
         """
-        got = cuda.managed_array(1, np.uint64)
+        got = cuda._api._managed_array(1, np.uint64)
         got_ptr = got.device_ctypes_pointer
 
         self._single_thread_launch(
diff --git a/numba_cuda/numba/cuda/random.py b/numba_cuda/numba/cuda/random.py
index e8e620d5c..f0d6c595f 100644
--- a/numba_cuda/numba/cuda/random.py
+++ b/numba_cuda/numba/cuda/random.py
@@ -14,6 +14,7 @@
 )
 from numba.cuda.np.numpy_support import from_dtype
 from numba.cuda import config
+from numba.cuda import _api
 
 if HAS_NUMBA:
     from numba import jit
@@ -303,6 +304,6 @@ def create_xoroshiro128p_states(n, seed, subsequence_start=0, stream=0):
     :type stream: CUDA stream
     :param stream: stream to run initialization kernel on
     """
-    states = cuda.device_array(n, dtype=xoroshiro128p_dtype, stream=stream)
+    states = _api._device_array(n, dtype=xoroshiro128p_dtype, stream=stream)
     init_xoroshiro128p_states(states, seed, subsequence_start, stream)
     return states
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py
index 272090d90..32f405ee1 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py
@@ -3,10 +3,10 @@
 
 import numpy as np
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 
 
-class TestCudaAutoContext(CUDATestCase):
+class TestCudaAutoContext(DeprecatedDeviceArrayApiTest):
     def test_auto_context(self):
         """A problem was revealed by a customer that the use cuda.to_device
         does not create a CUDA context.
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py
index d15dca6bd..ccd955f0d 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py
@@ -12,11 +12,12 @@
 
 from numba import cuda
 from numba.cuda.cudadrv import devices, driver as _driver
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, CUDATestCase, DeprecatedDeviceArrayApiTest
 from numba.cuda.testing import skip_on_cudasim
 import contextlib
 
 from cuda.core.experimental import Device
+import cupy as cp
 
 ptx1 = """
     .version 1.4
@@ -71,8 +72,9 @@
 """
 
 
+
 @skip_on_cudasim("CUDA Driver API unsupported in the simulator")
-class TestCudaDriver(CUDATestCase):
+class TestCudaDriver(DeprecatedDeviceArrayApiTest):
     def setUp(self):
         super().setUp()
         self.assertTrue(len(devices.gpus) > 0)
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py b/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py
index 63af86a3f..d85f031e1 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py
@@ -26,10 +26,10 @@ def test_max_pending_count(self):
         self.assertEqual(len(deallocs), 0)
         # deallocate to maximum count
         for i in range(config.CUDA_DEALLOCS_COUNT):
-            cuda.to_device(np.arange(1))
+            cuda._api._to_device(np.arange(1))
             self.assertEqual(len(deallocs), i + 1)
         # one more to trigger .clear()
-        cuda.to_device(np.arange(1))
+        cuda._api._to_device(np.arange(1))
         self.assertEqual(len(deallocs), 0)
 
     @skip_if_external_memmgr("Deallocation specific to Numba memory management")
@@ -55,12 +55,12 @@ def test_max_pending_bytes(self):
 
             # allocate half the max size
             # this will not trigger deallocation
-            cuda.to_device(np.ones(max_pending // 2, dtype=np.int8))
+            cuda._api._to_device(np.ones(max_pending // 2, dtype=np.int8))
             self.assertEqual(len(deallocs), 1)
 
             # allocate another remaining
             # this will not trigger deallocation
-            cuda.to_device(
+            cuda._api._to_device(
                 np.ones(
                     deallocs._max_pending_bytes - deallocs._size, dtype=np.int8
                 )
@@ -68,7 +68,7 @@ def test_max_pending_bytes(self):
             self.assertEqual(len(deallocs), 2)
 
             # another byte to trigger .clear()
-            cuda.to_device(np.ones(1, dtype=np.int8))
+            cuda._api._to_device(np.ones(1, dtype=np.int8))
             self.assertEqual(len(deallocs), 0)
         finally:
             # restore old ratio
@@ -77,12 +77,12 @@ def test_max_pending_bytes(self):
     @skip_if_external_memmgr("Deallocation specific to Numba memory management")
     def test_defer_cleanup(self):
         harr = np.arange(5)
-        darr1 = cuda.to_device(harr)
+        darr1 = cuda._api._to_device(harr)
         deallocs = cuda.current_context().memory_manager.deallocations
         deallocs.clear()
         self.assertEqual(len(deallocs), 0)
         with cuda.defer_cleanup():
-            darr2 = cuda.to_device(harr)
+            darr2 = cuda._api._to_device(harr)
             del darr1
             self.assertEqual(len(deallocs), 1)
             del darr2
@@ -96,13 +96,13 @@ def test_defer_cleanup(self):
     @skip_if_external_memmgr("Deallocation specific to Numba memory management")
     def test_nested_defer_cleanup(self):
         harr = np.arange(5)
-        darr1 = cuda.to_device(harr)
+        darr1 = cuda._api._to_device(harr)
         deallocs = cuda.current_context().memory_manager.deallocations
         deallocs.clear()
         self.assertEqual(len(deallocs), 0)
         with cuda.defer_cleanup():
             with cuda.defer_cleanup():
-                darr2 = cuda.to_device(harr)
+                darr2 = cuda._api._to_device(harr)
                 del darr1
                 self.assertEqual(len(deallocs), 1)
                 del darr2
@@ -118,7 +118,7 @@ def test_nested_defer_cleanup(self):
     @skip_if_external_memmgr("Deallocation specific to Numba memory management")
     def test_exception(self):
         harr = np.arange(5)
-        darr1 = cuda.to_device(harr)
+        darr1 = cuda._api._to_device(harr)
         deallocs = cuda.current_context().memory_manager.deallocations
         deallocs.clear()
         self.assertEqual(len(deallocs), 0)
@@ -128,7 +128,7 @@ class CustomError(Exception):
 
         with self.assertRaises(CustomError):
             with cuda.defer_cleanup():
-                darr2 = cuda.to_device(harr)
+                darr2 = cuda._api._to_device(harr)
                 del darr2
                 self.assertEqual(len(deallocs), 1)
                 deallocs.clear()
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py b/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py
index be07ce5bb..44aee8f7c 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py
@@ -6,7 +6,7 @@
 
 from numba import cuda
 from numba.cuda.core import config
-from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest, skip_on_cudasim
 from numba.cuda.tests.support import linux_only
 
 if not config.ENABLE_CUDASIM:
@@ -101,7 +101,7 @@ def interface_version(self):
 
 
 @skip_on_cudasim("EMM Plugins not supported on CUDA simulator")
-class TestDeviceOnlyEMMPlugin(CUDATestCase):
+class TestDeviceOnlyEMMPlugin(DeprecatedDeviceArrayApiTest):
     """
     Tests that the API of an EMM Plugin that implements device allocations
     only is used correctly by Numba.
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py b/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py
index e58518899..795f1417b 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py
@@ -5,10 +5,10 @@
 import platform
 
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 
 # TODO
-class TestPinned(CUDATestCase):
+class TestPinned(DeprecatedDeviceArrayApiTest):
     def _run_copies(self, A):
         A0 = np.copy(A)
 
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py b/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py
index f36013ca9..62525af0a 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py
@@ -5,17 +5,18 @@
 from numba.cuda.testing import CUDATestCase
 from numba import cuda
 from numba.cuda.testing import skip_on_cudasim
+import cupy as cp
 
 
 @skip_on_cudasim("CUDA Profiler unsupported in the simulator")
 class TestProfiler(CUDATestCase):
     def test_profiling(self):
         with cuda.profiling():
-            a = cuda.device_array(10)
+            a = cp.zeros(10)
             del a
 
         with cuda.profiling():
-            a = cuda.device_array(100)
+            a = cp.zeros(10)
             del a
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py b/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py
index 68a4f1db1..5b8f4164b 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py
@@ -17,7 +17,7 @@ def newthread(exception_queue):
         cuda.select_device(0)
         stream = cuda.stream()
         A = np.arange(100)
-        dA = cuda.to_device(A, stream=stream)
+        dA = cuda._api._to_device(A, stream=stream)
         stream.synchronize()
         del dA
         del stream
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py b/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
index f30d42de7..f4d951a43 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
@@ -6,6 +6,7 @@
 from numba.cuda.testing import CUDATestCase
 import unittest
 from numba.cuda import config
+import cupy as cp
 
 
 def reinterpret_array_type(byte_arr, start, stop, output):
@@ -52,10 +53,10 @@ def kernel(out):
             for i in range(len(out)):
                 out[i] = q[i]
 
-        out = cuda.to_device(np.zeros(len(val), dtype="float64"))
+        out = cp.asarray(np.zeros(len(val), dtype="float64"))
 
         kernel[1, 1](out)
-        for i, j in zip(out.copy_to_host(), val):
+        for i, j in zip(out.get(), val):
             self.assertEqual(i, j)
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
index caee1ba61..02fd31471 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
@@ -7,6 +7,7 @@
 from numba.cuda.testing import skip_on_cudasim
 from numba.cuda.misc.special import literal_unroll
 from numba.cuda import config
+import cupy as cp
 
 
 @skip_on_cudasim("doesn't work in the simulator")
@@ -45,9 +46,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.all(a) for a in cases], dtype=np.bool_)
-        out = cuda.to_device(np.zeros(len(cases), dtype=np.bool_))
+        out = cp.zeros(len(cases), dtype=cp.bool_)        
         kernel[1, 1](out)
-        got = out.copy_to_host()
+        got = out.get()                                    
         self.assertPreciseEqual(expected, got)
 
     def test_any_basic(self):
@@ -68,9 +69,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.any(a) for a in cases], dtype=np.bool_)
-        out = cuda.to_device(np.zeros(len(cases), dtype=np.bool_))
+        out = cp.zeros(len(cases), dtype=cp.bool_)         
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())       
 
     def test_sum_basic(self):
         arrays = (
@@ -80,9 +81,7 @@ def test_sum_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64(
-                [np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]
-            ),
+            np.float64([np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]),
             np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
@@ -95,9 +94,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.sum(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)      
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())       
 
     def test_mean_basic(self):
         arrays = (
@@ -107,9 +106,7 @@ def test_mean_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64(
-                [np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]
-            ),
+            np.float64([np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]),
             np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
@@ -122,9 +119,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.mean(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)      
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())       
 
     def test_var_basic(self):
         arrays = (
@@ -134,10 +131,6 @@ def test_var_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64(
-                [np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]
-            ),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -149,9 +142,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.var(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)      
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host(), prec="double")
+        self.assertPreciseEqual(expected, out.get(), prec="double")   
 
     def test_std_basic(self):
         arrays = (
@@ -161,10 +154,6 @@ def test_std_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64(
-                [np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]
-            ),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -176,9 +165,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.std(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)      
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())       
 
     def test_min_basic(self):
         arrays = (
@@ -188,10 +177,6 @@ def test_min_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64(
-                [np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]
-            ),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -203,9 +188,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.min(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)      
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())       
 
     def test_max_basic(self):
         arrays = (
@@ -215,10 +200,6 @@ def test_max_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64(
-                [np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]
-            ),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -230,9 +211,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.max(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)      
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())       
 
     def test_nanmin_basic(self):
         arrays = (
@@ -243,7 +224,6 @@ def test_nanmin_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -255,9 +235,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.nanmin(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)      
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())       
 
     def test_nanmax_basic(self):
         arrays = (
@@ -268,7 +248,6 @@ def test_nanmax_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -280,9 +259,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.nanmax(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)      
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())       
 
     def test_nanmean_basic(self):
         arrays = (
@@ -290,10 +269,6 @@ def test_nanmean_basic(self):
             np.float64([-0.0, -1.5]),
             np.float64([-1.5, 2.5, np.nan]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64(
-                [np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]
-            ),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -305,9 +280,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.nanmean(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)      
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())       
 
     def test_nansum_basic(self):
         arrays = (
@@ -318,7 +293,6 @@ def test_nansum_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -330,9 +304,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.nansum(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)      
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())       
 
     def test_nanprod_basic(self):
         arrays = (
@@ -343,7 +317,6 @@ def test_nanprod_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -355,6 +328,6 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.nanprod(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)      
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())       
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
index 3612a542e..61388c469 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
@@ -6,6 +6,7 @@
 from numba import cuda
 from numba.cuda import double, void
 from numba.cuda.testing import unittest, CUDATestCase
+import cupy as cp
 
 
 RISKFREE = 0.02
@@ -128,11 +129,11 @@ def black_scholes_cuda(callResult, putResult, S, X, T, R, V):
         blockdim = 512, 1
         griddim = int(math.ceil(float(OPT_N) / blockdim[0])), 1
         stream = cuda.stream()
-        d_callResult = cuda.to_device(callResultNumba, stream)
-        d_putResult = cuda.to_device(putResultNumba, stream)
-        d_stockPrice = cuda.to_device(stockPrice, stream)
-        d_optionStrike = cuda.to_device(optionStrike, stream)
-        d_optionYears = cuda.to_device(optionYears, stream)
+        d_callResult = cp.asarray(callResultNumba, stream)
+        d_putResult = cp.asarray(putResultNumba, stream)
+        d_stockPrice = cp.asarray(stockPrice, stream)
+        d_optionStrike = cp.asarray(optionStrike, stream)
+        d_optionYears = cp.asarray(optionYears, stream)
 
         for i in range(iterations):
             black_scholes_cuda[griddim, blockdim, stream](
@@ -144,8 +145,8 @@ def black_scholes_cuda(callResult, putResult, S, X, T, R, V):
                 RISKFREE,
                 VOLATILITY,
             )
-        d_callResult.copy_to_host(callResultNumba, stream)
-        d_putResult.copy_to_host(putResultNumba, stream)
+        d_callResult.get(callResultNumba, stream)
+        d_putResult.get(putResultNumba, stream)
         stream.synchronize()
 
         delta = np.abs(callResultNumpy - callResultNumba)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
index d9a642144..60e0ff460 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
@@ -9,6 +9,7 @@
 from numba.cuda.testing import skip_on_cudasim, skip_if_external_memmgr
 from numba.cuda.tests.support import linux_only, override_config
 from unittest.mock import call, patch
+import cupy as cp
 
 
 @skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
@@ -87,7 +88,7 @@ def vadd(a, b):
 
         # Case 1: use custom array as argument
         h_arr = np.random.random(10)
-        arr = ForeignArray(cuda.to_device(h_arr))
+        arr = ForeignArray(cp.asarray(h_arr))
         val = 6
         out = vadd(arr, val)
         np.testing.assert_array_equal(out.copy_to_host(), h_arr + val)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
index bbed82414..32fea035b 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
@@ -6,6 +6,7 @@
 from numba.cuda.testing import CUDATestCase
 from numba.cuda.tests.support import override_config
 import unittest
+import cupy as cp
 
 
 class TestCudaJitNoTypes(CUDATestCase):
@@ -22,12 +23,12 @@ def foo(x, y):
         x = np.arange(10)
         y = np.empty_like(x)
 
-        dx = cuda.to_device(x)
-        dy = cuda.to_device(y)
+        dx = cp.asarray(x)
+        dy = cp.asarray(y)
 
         foo[10, 1](dx, dy)
 
-        dy.copy_to_host(y)
+        y = dy.get()
 
         self.assertTrue(np.all(x == y))
 
@@ -70,13 +71,14 @@ def outer(argin, argout):
         a = np.zeros(1)
         b = np.zeros(1)
 
-        stream = cuda.stream()
-        d_a = cuda.to_device(a, stream)
-        d_b = cuda.to_device(b, stream)
+        stream = cp.cuda.stream()
+        with stream:
+            d_a = cp.asarray(a)
+            d_b = cp.asarray(b)
 
-        outer[1, 1, stream](d_a, d_b)
-
-        d_b.copy_to_host(b, stream)
+            outer[1, 1, stream](d_a, d_b)
+        
+            b = d_b.get()
 
         self.assertEqual(b[0], (a[0] + 1) + (2 + 1))
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py b/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
index 19177a3b5..3a5e16b1a 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
@@ -74,7 +74,6 @@ def timediff(start, end, out):
 
         arr1 = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
         arr2 = arr1 + np.random.randint(0, 10000, arr1.size)
-
         delta = timediff(arr1, arr2)
 
         self.assertPreciseEqual(delta, arr2 - arr1)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
index 9e823c5d3..f106b2e6e 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
@@ -706,9 +706,9 @@ def foo(dest, n):
             foo.py_func
         )
 
-        result = cuda.device_array(1, dtype=np.int32)
+        result = cp.asarray([1], dtype=np.int32)
         foo[1, 1](result, 1)
-        result.copy_to_host()
+        result = result.get()
         self.assertEqual(result[0], 5)
 
         ir_content = foo.inspect_llvm()[foo.signatures[0]]
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
index 906f6eab8..b8744853c 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
@@ -207,7 +207,7 @@ def rgba(r, g, b, a):
         def rgba_caller(x, channels):
             x[0] = rgba(channels[0], channels[1], channels[2], channels[3])
 
-        x = cuda.device_array(1, dtype=np.int32)
+        x = cp.asarray([1], dtype=np.int32)
         channels = cp.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32)
 
         rgba_caller[1, 1](x, channels)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
index da36e8635..237542330 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
@@ -12,6 +12,7 @@
 import unittest
 from numba.cuda.core.errors import NumbaPerformanceWarning, TypingError
 from numba.cuda.tests.support import override_config
+import cupy as cp
 
 
 def _get_matmulcore_gufunc(dtype=float32):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py b/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
index 8874c449d..6a423dc97 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
@@ -6,6 +6,7 @@
 from numba.cuda import float64, void
 from numba.cuda.testing import unittest, CUDATestCase
 from numba.cuda.core import config
+import cupy as cp
 
 # NOTE: CUDA kernel does not return any value
 
@@ -93,30 +94,32 @@ def jocabi_relax_core(A, Anew, error):
 
         error_grid = np.zeros(griddim)
 
-        stream = cuda.stream()
+        stream = cupy.cuda.stream()
 
-        dA = cuda.to_device(A, stream)  # to device and don't come back
-        dAnew = cuda.to_device(Anew, stream)  # to device and don't come back
-        derror_grid = cuda.to_device(error_grid, stream)
+        with stream:
+            dA = cp.asarray(A)  # to device and don't come back
+            dAnew = cp.asarray(Anew)  # to device and don't come back
+        
+            derror_grid = cp.asarray(error_grid)
 
-        while error > tol and iter < iter_max:
-            self.assertTrue(error_grid.dtype == np.float64)
+            while error > tol and iter < iter_max:
+                self.assertTrue(error_grid.dtype == np.float64)
 
-            jocabi_relax_core[griddim, blockdim, stream](dA, dAnew, derror_grid)
+                jocabi_relax_core[griddim, blockdim, stream](dA, dAnew, derror_grid)
 
-            derror_grid.copy_to_host(error_grid, stream=stream)
+                error_grid = derror_grid.get()
 
-            # error_grid is available on host
-            stream.synchronize()
+                # error_grid is available on host
+                stream.synchronize()
 
-            error = np.abs(error_grid).max()
+                error = np.abs(error_grid).max()
 
-            # swap dA and dAnew
-            tmp = dA
-            dA = dAnew
-            dAnew = tmp
+                # swap dA and dAnew
+                tmp = dA
+                dA = dAnew
+                dAnew = tmp
 
-            iter += 1
+                iter += 1
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_random.py b/numba_cuda/numba/cuda/tests/cudapy/test_random.py
index c99e29aa5..b1882346c 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_random.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_random.py
@@ -19,7 +19,7 @@
     xoroshiro128p_uniform_float64,
     xoroshiro128p_normal_float64,
 )
-
+import cupy as cp
 
 # Distributions
 UNIFORM = 1
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py b/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py
index 9a741b938..1c4ecbf17 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py
@@ -6,6 +6,7 @@
 from numba.cuda.core.config import ENABLE_CUDASIM
 from numba.cuda.testing import CUDATestCase
 import unittest
+import cupy as cp
 
 # Avoid recompilation of the sum_reduce function by keeping it at global scope
 sum_reduce = cuda.Reduce(lambda a, b: a + b)
@@ -55,7 +56,7 @@ def test_empty_array_host(self):
 
     def test_empty_array_device(self):
         A = np.arange(0, dtype=np.float64) + 1
-        dA = cuda.to_device(A)
+        dA = cp.asarray(A)
         expect = A.sum()
         got = sum_reduce(dA)
         self.assertEqual(expect, got)
@@ -83,7 +84,7 @@ def test_non_identity_init(self):
 
     def test_result_on_device(self):
         A = np.arange(10, dtype=np.float64) + 1
-        got = cuda.to_device(np.zeros(1, dtype=np.float64))
+        got = cp.zeros(1, dtype=np.float64)
         expect = A.sum()
         res = sum_reduce(A, res=got)
         self.assertIsNone(res)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py b/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py
index 9dedfcfd2..22d47d370 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 
 
 def foo(inp, out):
@@ -16,7 +16,7 @@ def copy(inp, out):
     cufoo(inp[i, :], out[i, :])
 
 
-class TestCudaSlicing(CUDATestCase):
+class TestCudaSlicing(DeprecatedDeviceArrayApiTest):
     def test_slice_as_arg(self):
         global cufoo
         cufoo = cuda.jit("void(int32[:], int32[:])", device=True)(foo)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
index 663cf88fd..0cf4aa7f0 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
@@ -4,6 +4,7 @@
 from numba import cuda
 from numba.cuda import int32, float64, void
 from numba.cuda import HAS_NUMBA
+import cupy as cp
 
 if HAS_NUMBA:
     from numba.core.errors import TypingError as NumbaTypingError
@@ -91,7 +92,7 @@ def costs_func(d_block_costs):
             d_block_costs[0] = s_initialcost[0] + prediction
 
         block_costs = np.zeros(num_blocks, dtype=np.float64)
-        d_block_costs = cuda.to_device(block_costs)
+        d_block_costs = cp.asarray(block_costs)
 
         costs_func[num_blocks, threads_per_block](d_block_costs)
 
@@ -130,9 +131,9 @@ def use_sm_chunk_copy(x, y):
                 for j in range(nthreads):
                     y[bd * bx + j] = sm[j]
 
-        d_result = cuda.device_array_like(arr)
+        d_result = cp.asarray(arr)
         use_sm_chunk_copy[nblocks, nthreads](arr, d_result)
-        host_result = d_result.copy_to_host()
+        host_result = d_result.get()
         np.testing.assert_array_equal(arr, host_result)
 
     def test_shared_recarray(self):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py b/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
index 2f242451f..1508104cf 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
@@ -18,6 +18,7 @@
 from numba.cuda.extending import overload
 from numba.cuda.tests.support import override_config
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+import cupy as cp
 
 
 _DEBUG = False
@@ -38,11 +39,11 @@ class SSABaseTest(CUDATestCase):
     def check_func(self, func, result_array, *args):
         # For CUDA kernels, we need to create output arrays and call with [1,1] launch config
         # Create GPU array with same shape as expected result array
-        gpu_result_array = cuda.to_device(np.zeros_like(result_array))
+        gpu_result_array = cp.zeros(len(result_array), dtype=result_array.dtype)
 
         # Call the CUDA kernel
         func[1, 1](gpu_result_array, *copy.deepcopy(args))
-        gpu_result = gpu_result_array.copy_to_host()
+        gpu_result = gpu_result_array.get()
 
         # Call the original Python function for expected result
         cpu_result = np.zeros_like(result_array)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py b/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py
index 776936849..1bc84a2a1 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py
@@ -5,14 +5,15 @@
 from numba import cuda
 from numba.cuda.kernels.transpose import transpose
 from numba.cuda.testing import unittest
-from numba.cuda.testing import skip_on_cudasim, CUDATestCase
+from numba.cuda.testing import skip_on_cudasim, DeprecatedDeviceArrayApiTest
+import cupy as cp
 
 
 recordwith2darray = np.dtype([("i", np.int32), ("j", np.float32, (3, 2))])
 
 
 @skip_on_cudasim("Device Array API unsupported in the simulator")
-class TestTranspose(CUDATestCase):
+class TestTranspose(DeprecatedDeviceArrayApiTest):
     def test_transpose(self):
         variants = (
             (5, 6, np.float64),
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py b/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
index 01eefbf4f..2c17cceea 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
@@ -10,6 +10,7 @@
     skip_unless_cc_60,
 )
 from numba.cuda.tests.support import captured_stdout
+import cupy as cp
 
 
 @skip_if_cudadevrt_missing
@@ -49,10 +50,10 @@ def test_ex_laplace(self):
 
         # Middle element is made very hot
         data[500] = 10000
-        buf_0 = cuda.to_device(data)
+        buf_0 = cp.asarray(data)
 
         # This extra array is used for synchronization purposes
-        buf_1 = cuda.device_array_like(buf_0)
+        buf_1 = cp.zeros_like(buf_0)
 
         niter = 10000
         # ex_laplace.allocate.end
@@ -128,7 +129,7 @@ def solve_heat_equation(buf_0, buf_1, timesteps, k):
         solve_heat_equation.forall(len(data))(buf_0, buf_1, niter, 0.25)
         # ex_laplace.launch.end
 
-        results = buf_1.copy_to_host()
+        results = buf_1.get()
         if plot:
             fig, ax = plt.subplots(figsize=(16 * 0.66, 9 * 0.66))
             plt.plot(
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py b/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
index ee8ed61a8..1ab6047c8 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
@@ -13,6 +13,7 @@
 import unittest
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
 from numba.cuda.tests.support import captured_stdout
+import cupy as cp
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
@@ -60,9 +61,9 @@ def matmul(A, B, C):
         y_h = np.ones([4, 4])
         z_h = np.zeros([4, 4])
 
-        x_d = cuda.to_device(x_h)
-        y_d = cuda.to_device(y_h)
-        z_d = cuda.to_device(z_h)
+        x_d = cp.asarray(x_h)
+        y_d = cp.asarray(y_h)
+        z_d = cp.asarray(z_h)
 
         threadsperblock = (16, 16)
         blockspergrid_x = math.ceil(z_h.shape[0] / threadsperblock[0])
@@ -70,7 +71,7 @@ def matmul(A, B, C):
         blockspergrid = (blockspergrid_x, blockspergrid_y)
 
         matmul[blockspergrid, threadsperblock](x_d, y_d, z_d)
-        z_h = z_d.copy_to_host()
+        z_h = z_d.get()
         print(z_h)
         print(x_h @ y_h)
         # magictoken.ex_run_matmul.end
@@ -130,9 +131,9 @@ def fast_matmul(A, B, C):
         y_h = np.ones([4, 4])
         z_h = np.zeros([4, 4])
 
-        x_d = cuda.to_device(x_h)
-        y_d = cuda.to_device(y_h)
-        z_d = cuda.to_device(z_h)
+        x_d = cp.asarray(x_h)
+        y_d = cp.asarray(y_h)
+        z_d = cp.asarray(z_h)
 
         threadsperblock = (TPB, TPB)
         blockspergrid_x = math.ceil(z_h.shape[0] / threadsperblock[0])
@@ -154,9 +155,9 @@ def fast_matmul(A, B, C):
         y_h = np.ones([23, 7])
         z_h = np.zeros([5, 7])
 
-        x_d = cuda.to_device(x_h)
-        y_d = cuda.to_device(y_h)
-        z_d = cuda.to_device(z_h)
+        x_d = cp.asarray(x_h)
+        y_d = cp.asarray(y_h)
+        z_d = cp.asarray(z_h)
 
         threadsperblock = (TPB, TPB)
         grid_y_max = max(x_h.shape[0], y_h.shape[0])
@@ -166,7 +167,7 @@ def fast_matmul(A, B, C):
         blockspergrid = (blockspergrid_x, blockspergrid_y)
 
         fast_matmul[blockspergrid, threadsperblock](x_d, y_d, z_d)
-        z_h = z_d.copy_to_host()
+        z_h = z_d.get()
         print(z_h)
         print(x_h @ y_h)
         # magictoken.ex_run_nonsquare.end
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py b/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py
index 67ad2bc88..894a5729d 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py
@@ -9,6 +9,7 @@
     skip_on_standalone_numba_cuda,
 )
 from numba.cuda.tests.support import captured_stdout
+import cupy as cp
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
@@ -80,7 +81,7 @@ def mc_integrate(lower_lim, upper_lim, nsamps):
             approximate the definite integral of `func` from
             `lower_lim` to `upper_lim`
             """
-            out = cuda.to_device(np.zeros(nsamps, dtype="float32"))
+            out = cp.zeros(nsamps, dtype="float32")
             rng_states = create_xoroshiro128p_states(nsamps, seed=42)
 
             # jit the function for use in CUDA kernels
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_random.py b/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
index f8c198a2c..6edf52c1d 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
@@ -50,12 +50,12 @@ def random_3d(arr, rng_states):
         rng_states = create_xoroshiro128p_states(nthreads, seed=1)
 
         # Generate random numbers
-        arr = cuda.device_array((X, Y, Z), dtype=np.float32)
+        arr = cp.asarray((X, Y, Z), dtype=np.float32)
         random_3d[(gx, gy, gz), (bx, by, bz)](arr, rng_states)
         # magictoken.ex_3d_grid.end
 
         # Some basic tests of the randomly-generated numbers
-        host_arr = arr.copy_to_host()
+        host_arr = arr.get()
         self.assertGreater(np.mean(host_arr), 0.49)
         self.assertLess(np.mean(host_arr), 0.51)
         self.assertTrue(np.all(host_arr <= 1.0))
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py b/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py
index c22bebf76..5c8321088 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py
@@ -10,6 +10,7 @@
     skip_unless_cc_60,
 )
 from numba.cuda.tests.support import captured_stdout
+import cupy as cp
 
 
 @skip_if_cudadevrt_missing
@@ -42,8 +43,7 @@ def test_ex_sessionize(self):
 
         # ex_sessionize.allocate.begin
         # Generate data
-        ids = cuda.to_device(
-            np.array(
+        cp.array(
                 [
                     1,
                     1,
@@ -75,9 +75,7 @@ def test_ex_sessionize(self):
                     4,
                 ]
             )
-        )
-        sec = cuda.to_device(
-            np.array(
+        sec = cp.array(
                 [
                     1,
                     2,
@@ -110,9 +108,9 @@ def test_ex_sessionize(self):
                 ],
                 dtype="datetime64[ns]",
             ).astype("int64")  # Cast to int64 for compatibility
-        )
+
         # Create a vector to hold the results
-        results = cuda.to_device(np.zeros(len(ids)))
+        results = cp.zeros(len(ids))
         # ex_sessionize.allocate.end
 
         # ex_sessionize.kernel.begin
@@ -161,7 +159,7 @@ def sessionize(user_id, timestamp, results):
         # ex_sessionize.launch.begin
         sessionize.forall(len(ids))(ids, sec, results)
 
-        print(results.copy_to_host())
+        print(results.get())
         # array([ 0.,  0.,  0.,  3.,  3.,  3.,
         #         6.,  6.,  6.,  9.,  9., 11.,
         #         11., 13., 13., 13., 13., 17.,
@@ -199,7 +197,7 @@ def sessionize(user_id, timestamp, results):
             24,
             24,
         ]
-        np.testing.assert_equal(expect, results.copy_to_host())
+        np.testing.assert_equal(expect, results.get())
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py b/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
index 3172298fe..6f8e55aaa 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
@@ -5,6 +5,7 @@
 
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
 from numba.cuda.tests.support import captured_stdout
+import cupy as cp
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
@@ -48,14 +49,14 @@ def f(a, b, c):
 
         # ex_vecadd.allocate.begin
         N = 100000
-        a = cuda.to_device(np.random.random(N))
-        b = cuda.to_device(np.random.random(N))
+        a = cp.random.random(N)
+        b = cp.random.random(N)
         c = cuda.device_array_like(a)
         # ex_vecadd.allocate.end
 
         # ex_vecadd.forall.begin
         f.forall(len(a))(a, b, c)
-        print(c.copy_to_host())
+        print(c.get())
         # ex_vecadd.forall.end
 
         # ex_vecadd.launch.begin
@@ -64,11 +65,11 @@ def f(a, b, c):
         # Enough blocks to cover the entire vector depending on its length
         nblocks = (len(a) // nthreads) + 1
         f[nblocks, nthreads](a, b, c)
-        print(c.copy_to_host())
+        print(c.get())
         # ex_vecadd.launch.end
 
         np.testing.assert_equal(
-            c.copy_to_host(), a.copy_to_host() + b.copy_to_host()
+            c.get(), a.get() + b.get()
         )
 
 
diff --git a/numba_cuda/numba/cuda/vectorizers.py b/numba_cuda/numba/cuda/vectorizers.py
index 937546044..950b01462 100644
--- a/numba_cuda/numba/cuda/vectorizers.py
+++ b/numba_cuda/numba/cuda/vectorizers.py
@@ -9,6 +9,9 @@
     GeneralizedUFunc,
     GUFuncCallSteps,
 )
+from numba.cuda import _api
+from numba.cuda.cudadrv.devicearray import DeviceNDArray
+from numba.cuda.api_util import prepare_shape_strides_dtype
 
 
 class CUDAUFuncDispatcher(object):
@@ -97,7 +100,7 @@ def __init__(self, nin, nout, args, kwargs):
         self._stream = kwargs.get("stream", 0)
 
     def is_device_array(self, obj):
-        return cuda.is_cuda_array(obj)
+        return _api._is_cuda_array(obj)
 
     def as_device_array(self, obj):
         # We don't want to call as_cuda_array on objects that are already Numba
@@ -105,19 +108,22 @@ def as_device_array(self, obj):
         # Producer then importing it as a Consumer, which causes a
         # synchronization on the array's stream (if it has one) by default.
         # When we have a Numba device array, we can simply return it.
-        if cuda.cudadrv.devicearray.is_cuda_ndarray(obj):
+        if _api.is_cuda_ndarray(obj):
             return obj
-        return cuda.as_cuda_array(obj)
+        return _api._as_cuda_array(obj)
 
     def to_device(self, hostary):
-        return cuda.to_device(hostary, stream=self._stream)
+        return _api._to_device(hostary, stream=self._stream)
 
     def to_host(self, devary, hostary):
         out = devary.copy_to_host(hostary, stream=self._stream)
         return out
 
     def allocate_device_array(self, shape, dtype):
-        return cuda.device_array(shape=shape, dtype=dtype, stream=self._stream)
+        shape, strides, dtype = prepare_shape_strides_dtype(
+            shape, strides, dtype, "C"
+        )
+        return DeviceNDArray._legacy_ctor(shape, strides, dtype, stream=self._stream)
 
     def launch_kernel(self, kernel, nelem, args):
         kernel.forall(nelem, stream=self._stream)(*args)
@@ -173,13 +179,15 @@ def as_device_array(self, obj):
         return cuda.as_cuda_array(obj)
 
     def to_device(self, hostary, stream):
-        return cuda.to_device(hostary, stream=stream)
+        return _api._to_device(hostary, stream=stream)
 
     def to_host(self, devary, stream):
         return devary.copy_to_host(stream=stream)
 
     def allocate_device_array(self, shape, dtype, stream):
-        return cuda.device_array(shape=shape, dtype=dtype, stream=stream)
+        # want to return a deprecated DeviceNDArray without warning
+        # 
+        return _api._device_array(shape=shape, dtype=dtype, stream=stream)
 
     def broadcast_device(self, ary, shape):
         ax_differs = [

From 58d716c9add49083867da8a053e43f92d6ba225c Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Thu, 20 Nov 2025 05:10:43 -0800
Subject: [PATCH 08/60] partially switch designs

---
 numba_cuda/numba/cuda/api.py                  | 56 ++++--------
 numba_cuda/numba/cuda/cudadrv/devicearray.py  | 88 ++++++-------------
 numba_cuda/numba/cuda/cudadrv/driver.py       |  2 +-
 numba_cuda/numba/cuda/deviceufunc.py          |  1 -
 numba_cuda/numba/cuda/kernels/reduction.py    |  2 +-
 numba_cuda/numba/cuda/kernels/transpose.py    |  7 ++
 .../cuda/tests/cudadrv/test_emm_plugins.py    |  2 +-
 .../numba/cuda/tests/cudapy/test_ipc.py       |  6 +-
 8 files changed, 56 insertions(+), 108 deletions(-)

diff --git a/numba_cuda/numba/cuda/api.py b/numba_cuda/numba/cuda/api.py
index c33a5977a..38957de9e 100644
--- a/numba_cuda/numba/cuda/api.py
+++ b/numba_cuda/numba/cuda/api.py
@@ -14,6 +14,7 @@
 from numba.cuda.core import config
 from numba.cuda.api_util import prepare_shape_strides_dtype
 from numba.cuda.cudadrv.devicearray import DeprecatedDeviceArrayApiWarning
+from . import _api
 
 # NDarray device helper
 
@@ -23,60 +24,37 @@
 
 
 def from_cuda_array_interface(desc, owner=None, sync=True):
-    """Create a _DeviceNDArray from a cuda-array-interface description.
+    """Create a DeviceNDArray from a cuda-array-interface description.
     The ``owner`` is the owner of the underlying memory.
-    The resulting _DeviceNDArray will acquire a reference from it.
+    The resulting DeviceNDArray will acquire a reference from it.
 
     If ``sync`` is ``True``, then the imported stream (if present) will be
     synchronized.
     """
-    version = desc.get("version")
-    # Mask introduced in version 1
-    if 1 <= version:
-        mask = desc.get("mask")
-        # Would ideally be better to detect if the mask is all valid
-        if mask is not None:
-            raise NotImplementedError("Masked arrays are not supported")
-
-    shape = desc["shape"]
-    strides = desc.get("strides")
-
-    shape, strides, dtype = prepare_shape_strides_dtype(
-        shape, strides, desc["typestr"], order="C"
-    )
-    size = driver.memory_size_from_info(shape, strides, dtype.itemsize)
-
-    cudevptr_class = driver.binding.CUdeviceptr
-    devptr = cudevptr_class(desc["data"][0])
-    data = driver.MemoryPointer(devptr, size=size, owner=owner)
-    stream_ptr = desc.get("stream", None)
-    if stream_ptr is not None:
-        stream = external_stream(stream_ptr)
-        if sync and config.CUDA_ARRAY_INTERFACE_SYNC:
-            stream.synchronize()
-    else:
-        stream = 0  # No "Numba default stream", not the CUDA default stream
-    da = devicearray._DeviceNDArray(
-        shape=shape, strides=strides, dtype=dtype, gpu_data=data, stream=stream
+    warnings.warn(
+        "Constructing DeviceNDArray objects via the __cuda_array_interface__ "
+        "is now deprecated. Please prefer cupy for constructing device arrays."
     )
-    return da
+    return _api._from_cuda_array_interface(desc, owner=owner, sync=sync)
+
 
 
 def as_cuda_array(obj, sync=True):
-    """Create a _DeviceNDArray from any object that implements
+    """Create a DeviceNDArray from any object that implements
     the :ref:`cuda array interface <cuda-array-interface>`.
 
     A view of the underlying GPU buffer is created.  No copying of the data
-    is done.  The resulting _DeviceNDArray will acquire a reference from `obj`.
+    is done.  The resulting DeviceNDArray will acquire a reference from `obj`.
 
     If ``sync`` is ``True``, then the imported stream (if present) will be
     synchronized.
     """
-    if (
-        interface := getattr(obj, "__cuda_array_interface__", None)
-    ) is not None:
-        return from_cuda_array_interface(interface, owner=obj, sync=sync)
-    raise TypeError("*obj* doesn't implement the cuda array interface.")
+    warnings.warn(
+        "Constructing DeviceNDArray objects via as_cuda_array is now deprecated. "
+        "Please prefer cupy for constructing device arrays.",
+        DeprecatedDeviceArrayApiWarning,
+    )
+    return _api._as_cuda_array(obj, sync=sync)
 
 
 def is_cuda_array(obj):
@@ -165,7 +143,7 @@ def _device_array(shape, dtype=np.float64, strides=None, order="C", stream=0):
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
     )
-    return devicearray._DeviceNDArray(
+    return devicearray.DeviceNDArray._create_nowarn(
         shape=shape, strides=strides, dtype=dtype, stream=stream
     )
 
diff --git a/numba_cuda/numba/cuda/cudadrv/devicearray.py b/numba_cuda/numba/cuda/cudadrv/devicearray.py
index b00e57ef6..75a484b2e 100644
--- a/numba_cuda/numba/cuda/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/cudadrv/devicearray.py
@@ -322,7 +322,7 @@ def split(self, section, stream=0):
             end = min(begin + section, self.size)
             shape = (end - begin,)
             gpu_data = self.gpu_data.view(begin * itemsize, end * itemsize)
-            yield _DeviceNDArray(
+            yield DeviceNDArray._create_nowarn(
                 shape,
                 strides,
                 dtype=self.dtype,
@@ -370,7 +370,7 @@ def squeeze(self, axis=None, stream=0):
 
     def _squeeze(self, axis=None, stream=0):
         new_dummy, _ = self._dummy.squeeze(axis=axis)
-        return _DeviceNDArray(
+        return DeviceNDArray._create_nowarn(
             shape=new_dummy.shape,
             strides=new_dummy.strides,
             dtype=self.dtype,
@@ -407,7 +407,7 @@ def view(self, dtype):
 
             strides[-1] = dtype.itemsize
 
-        return _DeviceNDArray(
+        return DeviceNDArray._create_nowarn(
             shape=shape,
             strides=strides,
             dtype=dtype,
@@ -480,7 +480,7 @@ def _do_getitem(self, item, stream=0):
             shape, strides, dtype = prepare_shape_strides_dtype(
                 typ.shape, None, typ.subdtype[0], "C"
             )
-            return _DeviceNDArray(
+            return DeviceNDArray._create_nowarn(
                 shape=shape,
                 strides=strides,
                 dtype=dtype,
@@ -571,12 +571,25 @@ def kernel(lhs, rhs):
     return kernel
 
 
-class _DeviceNDArray(DeviceNDArrayBase):
+class DeviceNDArray(DeviceNDArrayBase):
     """
-    An on-GPU array type (internal implementation class formerly named
-    DeviceNDArray)
+    An on-GPU array type
     """
 
+    def __init__(self, *args, **kwargs):
+        warnings.warn(
+            "DeviceNDArray is deprecated. Please prefer cupy for array operations.",
+            DeprecatedDeviceArrayApiWarning,
+        )
+        super().__init__(*args, **kwargs)
+
+    @classmethod
+    def _create_nowarn(cls, *args, **kwargs):
+        """Create a DeviceNDArray without the deprecation warning."""
+        instance = cls.__new__(cls)
+        DeviceNDArrayBase.__init__(instance, *args, **kwargs)
+        return instance
+
     def is_f_contiguous(self):
         """
         Return true if the array is Fortran-contiguous.
@@ -614,7 +627,6 @@ def __array__(self, dtype=None, copy=None):
     def __len__(self):
         return self.shape[0]
 
-    @deprecated_array_api
     def reshape(self, *newshape, **kws):
         """
         Reshape the array without changing its contents, similarly to
@@ -622,10 +634,6 @@ def reshape(self, *newshape, **kws):
 
             d_arr = d_arr.reshape(20, 50, order="F")
         """
-
-        return self._reshape(*newshape, **kws)
-
-    def _reshape(self, *newshape, **kws):
         if len(newshape) == 1 and isinstance(newshape[0], (tuple, list)):
             newshape = newshape[0]
 
@@ -651,7 +659,6 @@ def _reshape(self, *newshape, **kws):
         else:
             raise NotImplementedError("operation requires copying")
 
-    @deprecated_array_api
     def ravel(self, order="C", stream=0):
         """
         Flattens a contiguous array without changing its contents, similar to
@@ -675,12 +682,10 @@ def ravel(self, order="C", stream=0):
             raise NotImplementedError("operation requires copying")
 
     @devices.require_context
-    @deprecated_array_api
     def __getitem__(self, item):
         return self._do_getitem(item)
 
     @devices.require_context
-    @deprecated_array_api
     def getitem(self, item, stream=0):
         """Do `__getitem__(item)` with CUDA stream"""
         return self._do_getitem(item, stream)
@@ -779,7 +784,7 @@ def _do_setitem(self, key, value, stream=0):
         rhs_shape = np.ones(lhs.ndim, dtype=np.int64)
         # negative indices would not work if rhs.ndim == 0
         rhs_shape[lhs.ndim - rhs.ndim :] = rhs.shape
-        rhs = rhs._reshape(*rhs_shape)
+        rhs = rhs.reshape(*rhs_shape)
         for i, (l, r) in enumerate(zip(lhs.shape, rhs.shape)):
             if r != 1 and l != r:
                 raise ValueError(
@@ -794,7 +799,6 @@ def _do_setitem(self, key, value, stream=0):
         if synchronous:
             stream.synchronize()
 
-
 class IpcArrayHandle(object):
     """
     An IPC array handle that can be serialized and transfer to another process
@@ -826,7 +830,7 @@ def open(self):
         original process.  Must not be used on the original process.
         """
         dptr = self._ipc_handle.open(devices.get_context())
-        return _DeviceNDArray(gpu_data=dptr, **self._array_desc)
+        return DeviceNDArray._create_nowarn(gpu_data=dptr, **self._array_desc)
 
     def close(self):
         """
@@ -872,7 +876,7 @@ def from_array_like(ary, stream=0, gpu_data=None):
 
 
 def _from_array_like(ary, stream=0, gpu_data=None):
-    return _DeviceNDArray(
+    return DeviceNDArray._create_nowarn(
         ary.shape, ary.strides, ary.dtype, stream=stream, gpu_data=gpu_data
     )
 
@@ -897,11 +901,7 @@ def array_core(ary):
     core_index = []
     for stride in ary.strides:
         core_index.append(0 if stride == 0 else slice(None))
-
-    if isinstance(ary, _DeviceNDArray):
-        return ary._do_getitem(tuple(core_index))
-    else:
-        return ary[tuple(core_index)]
+    return ary[tuple(core_index)]
 
 
 def is_contiguous(ary):
@@ -981,14 +981,8 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
 
 
 def check_array_compatibility(ary1, ary2):
-    if isinstance(ary1, _DeviceNDArray):
-        ary1sq = ary1._squeeze()
-    else:
-        ary1sq = ary1.squeeze()
-    if isinstance(ary2, _DeviceNDArray):
-        ary2sq = ary2._squeeze()
-    else:
-        ary2sq = ary2.squeeze()
+    ary1sq = ary1.squeeze()
+    ary2sq = ary2.squeeze()
     if ary1.dtype != ary2.dtype:
         raise TypeError(
             "incompatible dtype: %s vs. %s" % (ary1.dtype, ary2.dtype)
@@ -1003,33 +997,3 @@ def check_array_compatibility(ary1, ary2):
         raise ValueError(
             "incompatible strides: %s vs. %s" % (ary1.strides, ary2.strides)
         )
-
-
-class DeviceNDArray(_DeviceNDArray):
-    """
-    Deprecated public wrapper around the implementation class _DeviceNDArray.
-
-    Instantiating this class will emit a DeprecatedDeviceArrayApiWarning indicating that the
-    public name DeviceNDArray is deprecated. The implementation class is now
-    named _DeviceNDArray; code should migrate to that name.
-    """
-
-    def __init__(self, *args, **kwargs):
-        breakpoint()
-        warnings.warn(
-            "DeviceNDArray api is deprecated. Please prefer cupy for array functions",
-            DeprecatedDeviceArrayApiWarning,
-        )
-        super().__init__(*args, **kwargs)
-
-    @classmethod
-    def _legacy_ctor(cls, *args, **kwargs):
-        """
-        Legacy constructor that does not emit a deprecation warning.
-        Useful for APIs like vectorize and guvectorize that need to
-        continue to return the deprecated class right now for backwards
-        compatibility.
-        """
-        instance = cls.__new__(cls)
-        _DeviceNDArray.__init__(instance, *args, **kwargs)
-        return instance 
diff --git a/numba_cuda/numba/cuda/cudadrv/driver.py b/numba_cuda/numba/cuda/cudadrv/driver.py
index ad6248b5c..c5f924fed 100644
--- a/numba_cuda/numba/cuda/cudadrv/driver.py
+++ b/numba_cuda/numba/cuda/cudadrv/driver.py
@@ -1716,7 +1716,7 @@ def open_array(self, context, shape, dtype, strides=None):
             strides = dtype.itemsize
         dptr = self.open(context)
         # read the device pointer as an array
-        return devicearray._DeviceNDArray(
+        return devicearray.DeviceNDArray._create_nowarn(
             shape=shape, strides=strides, dtype=dtype, gpu_data=dptr
         )
 
diff --git a/numba_cuda/numba/cuda/deviceufunc.py b/numba_cuda/numba/cuda/deviceufunc.py
index 44e887254..55b1c7338 100644
--- a/numba_cuda/numba/cuda/deviceufunc.py
+++ b/numba_cuda/numba/cuda/deviceufunc.py
@@ -369,7 +369,6 @@ def attempt_ravel(a):
         if out is None:
             # No output is provided
             devout = cr.allocate_device_array(shape, resty, stream=stream)
-            breakpoint()
             devarys.extend([devout])
             cr.launch(func, shape[0], stream, devarys)
 
diff --git a/numba_cuda/numba/cuda/kernels/reduction.py b/numba_cuda/numba/cuda/kernels/reduction.py
index 463db8846..0cb609585 100644
--- a/numba_cuda/numba/cuda/kernels/reduction.py
+++ b/numba_cuda/numba/cuda/kernels/reduction.py
@@ -239,7 +239,7 @@ def __call__(self, arr, size=None, res=None, init=0, stream=0):
         partials_size = full_blockct
         if size_partial:
             partials_size += 1
-        partials = cuda.device_array(shape=partials_size, dtype=arr.dtype)
+        partials = cuda._api._device_array(shape=partials_size, dtype=arr.dtype)
 
         if size_full:
             # kernel for the fully populated threadblocks
diff --git a/numba_cuda/numba/cuda/kernels/transpose.py b/numba_cuda/numba/cuda/kernels/transpose.py
index 33a0f9bc6..b97ec70f2 100644
--- a/numba_cuda/numba/cuda/kernels/transpose.py
+++ b/numba_cuda/numba/cuda/kernels/transpose.py
@@ -5,6 +5,7 @@
 from numba.cuda.cudadrv.driver import driver
 import math
 from numba.cuda.np import numpy_support as nps
+from numba.cuda.cudadrv.devicearray import DeprecatedDeviceArrayApiWarning
 
 
 def transpose(a, b=None):
@@ -19,7 +20,13 @@ def transpose(a, b=None):
         the device its stream will be used to perform the transpose (and to copy
         `b` to the device if necessary).
     """
+    warnings.warn(
+        "The DeviceNDArray class and its transpose method are deprecated. "
+        "Please prefer cupy for device array operations."
+    )
+    return _transpose(a, b=b)
 
+def _transpose(a, b=None):
     # prefer `a`'s stream if
     stream = getattr(a, "stream", 0)
 
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py b/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py
index 44aee8f7c..dc760d538 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py
@@ -6,7 +6,7 @@
 
 from numba import cuda
 from numba.cuda.core import config
-from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest, skip_on_cudasim
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest, skip_on_cudasim, CUDATestCase
 from numba.cuda.tests.support import linux_only
 
 if not config.ENABLE_CUDASIM:
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py b/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py
index f22eec2e0..bff2b84dd 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py
@@ -16,7 +16,7 @@
     skip_on_cudasim,
     skip_under_cuda_memcheck,
     skip_on_wsl2,
-    CUDATestCase,
+    DeprecatedDeviceArrayApiTest,
     ForeignArray,
 )
 from numba.cuda.tests.support import linux_only, windows_only
@@ -64,7 +64,7 @@ def ipc_array_test(ipcarr, parent_pid):
     return arr
 
 
-class CUDAIpcTestCase(CUDATestCase):
+class CUDAIpcTestCase(DeprecatedDeviceArrayApiTest):
     @classmethod
     def setUpClass(cls) -> None:
         cls.exe = concurrent.futures.ProcessPoolExecutor(
@@ -249,7 +249,7 @@ def test_ipc_array(self):
 
 @windows_only
 @skip_on_cudasim("Ipc not available in CUDASIM")
-class TestIpcNotSupported(CUDATestCase):
+class TestIpcNotSupported(DeprecatedDeviceArrayApiTest):
     def test_unsupported(self):
         arr = np.arange(10, dtype=np.intp)
         devarr = cuda.to_device(arr)

From ec5c17586144d749e939424bd74d7cbca5a04c1f Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Thu, 4 Dec 2025 04:41:25 -0800
Subject: [PATCH 09/60] partial

---
 numba_cuda/numba/cuda/_api.py                             | 4 ++--
 numba_cuda/numba/cuda/api.py                              | 8 ++++++++
 numba_cuda/numba/cuda/cudadrv/devicearray.py              | 8 ++++----
 numba_cuda/numba/cuda/deviceufunc.py                      | 2 +-
 numba_cuda/numba/cuda/dispatcher.py                       | 2 +-
 numba_cuda/numba/cuda/kernels/transpose.py                | 6 +++---
 .../numba/cuda/tests/cudapy/test_cuda_array_interface.py  | 2 +-
 numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py      | 4 ++--
 numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py | 6 +++---
 numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py   | 2 +-
 numba_cuda/numba/cuda/vectorizers.py                      | 7 ++-----
 11 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/numba_cuda/numba/cuda/_api.py b/numba_cuda/numba/cuda/_api.py
index 83c244e6d..85adc6d65 100644
--- a/numba_cuda/numba/cuda/_api.py
+++ b/numba_cuda/numba/cuda/_api.py
@@ -65,7 +65,7 @@ def _from_cuda_array_interface(desc, owner=None, sync=True):
             stream.synchronize()
     else:
         stream = 0  # No "Numba default stream", not the CUDA default stream
-    da = devicearray.DeviceNDArray(
+    da = devicearray.DeviceNDArray._create_nowarn(
         shape=shape, strides=strides, dtype=dtype, gpu_data=data, stream=stream
     )
     return da
@@ -84,7 +84,7 @@ def _as_cuda_array(obj, sync=True):
     if (
         interface := getattr(obj, "__cuda_array_interface__", None)
     ) is not None:
-        return from_cuda_array_interface(interface, owner=obj, sync=sync)
+        return _from_cuda_array_interface(interface, owner=obj, sync=sync)
     raise TypeError("*obj* doesn't implement the cuda array interface.")
 
 
diff --git a/numba_cuda/numba/cuda/api.py b/numba_cuda/numba/cuda/api.py
index c6c8251a9..64cd50f4d 100644
--- a/numba_cuda/numba/cuda/api.py
+++ b/numba_cuda/numba/cuda/api.py
@@ -31,6 +31,7 @@ def from_cuda_array_interface(desc, owner=None, sync=True):
     If ``sync`` is ``True``, then the imported stream (if present) will be
     synchronized.
     """
+    assert False
     warnings.warn(
         "Constructing DeviceNDArray objects via the __cuda_array_interface__ "
         "is now deprecated. Please prefer cupy for constructing device arrays."
@@ -61,6 +62,11 @@ def is_cuda_array(obj):
 
     Does not verify the validity of the interface.
     """
+    warnings.warn(
+        "is_cuda_array is deprecated. Please prefer cupy for device array operations.",
+        DeprecatedDeviceArrayApiWarning,
+    )
+    return _api._is_cuda_array(obj)
     return hasattr(obj, "__cuda_array_interface__")
 
 
@@ -111,6 +117,7 @@ def to_device(obj, stream=0, copy=True, to=None):
 
         hary = d_ary.copy_to_host(stream=stream)
     """
+    assert False
     warnings.warn(
         "to_device is deprecated. Please prefer cupy for moving numpy arrays to the device.",
         DeprecatedDeviceArrayApiWarning,
@@ -131,6 +138,7 @@ def device_array(shape, dtype=np.float64, strides=None, order="C", stream=0):
 
     Allocate an empty device ndarray. Similar to :meth:`numpy.empty`.
     """
+    breakpoint()
     warnings.warn(
         "device_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
         DeprecatedDeviceArrayApiWarning,
diff --git a/numba_cuda/numba/cuda/cudadrv/devicearray.py b/numba_cuda/numba/cuda/cudadrv/devicearray.py
index 8f12643f4..6709916a3 100644
--- a/numba_cuda/numba/cuda/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/cudadrv/devicearray.py
@@ -643,7 +643,7 @@ def reshape(self, *newshape, **kws):
         cls = type(self)
         if newshape == self.shape:
             # nothing to do
-            return cls(
+            return cls._create_nowarn(
                 shape=self.shape,
                 strides=self.strides,
                 dtype=self.dtype,
@@ -653,7 +653,7 @@ def reshape(self, *newshape, **kws):
         newarr, extents = self._dummy.reshape(*newshape, **kws)
 
         if extents == [self._dummy.extent]:
-            return cls(
+            return cls._create_nowarn(
                 shape=newarr.shape,
                 strides=newarr.strides,
                 dtype=self.dtype,
@@ -949,9 +949,9 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
     elif (
         interface := getattr(obj, "__cuda_array_interface__", None)
     ) is not None:
-        from numba.cuda.api import from_cuda_array_interface
+        from numba.cuda._api import _from_cuda_array_interface
 
-        return from_cuda_array_interface(interface, owner=obj), False
+        return _from_cuda_array_interface(interface, owner=obj), False
     else:
         if isinstance(obj, np.void):
             devobj = from_record_like(obj, stream=stream)
diff --git a/numba_cuda/numba/cuda/deviceufunc.py b/numba_cuda/numba/cuda/deviceufunc.py
index 55b1c7338..588547e3a 100644
--- a/numba_cuda/numba/cuda/deviceufunc.py
+++ b/numba_cuda/numba/cuda/deviceufunc.py
@@ -805,7 +805,7 @@ def _broadcast(self, schedule, params, retvals):
 
         newretvals = []
         for retval, oshape in zip(retvals, schedule.oshapes):
-            newretvals.append(retval._reshape(odim, *oshape))
+            newretvals.append(retval.reshape(odim, *oshape))
         return tuple(newparams) + tuple(newretvals)
 
     def _broadcast_array(self, ary, newdim, innerdim):
diff --git a/numba_cuda/numba/cuda/dispatcher.py b/numba_cuda/numba/cuda/dispatcher.py
index 3ae092904..214d82952 100644
--- a/numba_cuda/numba/cuda/dispatcher.py
+++ b/numba_cuda/numba/cuda/dispatcher.py
@@ -1632,7 +1632,7 @@ def typeof_pyval(self, val):
                 # stream - this is done when the kernel is launched.
 
                 return typeof(
-                    cuda.from_cuda_array_interface(interface, sync=False),
+                    cuda._api._from_cuda_array_interface(interface, sync=False),
                     Purpose.argument,
                 )
             else:
diff --git a/numba_cuda/numba/cuda/kernels/transpose.py b/numba_cuda/numba/cuda/kernels/transpose.py
index b97ec70f2..48a5a4175 100644
--- a/numba_cuda/numba/cuda/kernels/transpose.py
+++ b/numba_cuda/numba/cuda/kernels/transpose.py
@@ -6,7 +6,7 @@
 import math
 from numba.cuda.np import numpy_support as nps
 from numba.cuda.cudadrv.devicearray import DeprecatedDeviceArrayApiWarning
-
+import warnings
 
 def transpose(a, b=None):
     """Compute the transpose of 'a' and store it into 'b', if given,
@@ -22,7 +22,7 @@ def transpose(a, b=None):
     """
     warnings.warn(
         "The DeviceNDArray class and its transpose method are deprecated. "
-        "Please prefer cupy for device array operations."
+        "Please prefer cupy for device array operations.", DeprecatedDeviceArrayApiWarning
     )
     return _transpose(a, b=b)
 
@@ -33,7 +33,7 @@ def _transpose(a, b=None):
     if not b:
         cols, rows = a.shape
         strides = a.dtype.itemsize * cols, a.dtype.itemsize
-        b = cuda.cudadrv.devicearray._DeviceNDArray(
+        b = cuda.cudadrv.devicearray.DeviceNDArray._create_nowarn(
             (rows, cols), strides, dtype=a.dtype, stream=stream
         )
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
index 60e0ff460..9d1b921d0 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
@@ -228,7 +228,7 @@ def test_masked_array(self):
 
     def test_zero_size_array(self):
         # for #4175
-        c_arr = cuda.device_array(0)
+        c_arr = cp.asarray(0)
         self.assertEqual(c_arr.__cuda_array_interface__["data"][0], 0)
 
         @cuda.jit
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
index 909adbaf7..9f85c3290 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
@@ -461,7 +461,7 @@ def a_union_use_case(arg, results):
         with captured_stdout() as out:
             results = cp.zeros(16, dtype=np.int64)
             a_union_use_case[1, 1](100, results)
-            print(results.copy_to_host())
+            print(results.get())
         expected = "[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]"
         self.assertIn(expected, out.getvalue())
 
@@ -685,7 +685,7 @@ def foo(x, y):
         with override_config("DEBUGINFO_DEFAULT", 1):
             result = cp.ones(1, dtype=np.float32)
             foo[1, 1](result, np.pi)
-            result.copy_to_host()
+            result = result.get()
 
         result_host = math.sin(np.pi) + math.cos(np.pi)
         self.assertPreciseEqual(result[0], result_host)
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py b/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py
index 2d85c1241..4626cc76f 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py
@@ -47,10 +47,10 @@ def add_one(x, n):
         # ex_cpointer.kernel.end
 
         # ex_cpointer.launch.begin
-        x = cuda.to_device(np.arange(10, dtype=np.uint8))
+        x = cp.arange(10, dtype=np.uint8)
 
         # Print initial values of x
-        print(x.copy_to_host())  # [0 1 2 3 4 5 6 7 8 9]
+        print(x.get())  # [0 1 2 3 4 5 6 7 8 9]
 
         # Obtain a pointer to the data from from the CUDA Array Interface
         x_ptr = x.__cuda_array_interface__["data"][0]
@@ -60,7 +60,7 @@ def add_one(x, n):
         add_one[1, 32](x_ptr, x_len)
 
         # Demonstrate that the data was updated by the kernel
-        print(x.copy_to_host())  # [ 1  2  3  4  5  6  7  8  9 10]
+        print(x.get())  # [ 1  2  3  4  5  6  7  8  9 10]
         # ex_cpointer.launch.end
 
 
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py b/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
index 6f8e55aaa..1dc13d660 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
@@ -51,7 +51,7 @@ def f(a, b, c):
         N = 100000
         a = cp.random.random(N)
         b = cp.random.random(N)
-        c = cuda.device_array_like(a)
+        c = cp.asarray(a)
         # ex_vecadd.allocate.end
 
         # ex_vecadd.forall.begin
diff --git a/numba_cuda/numba/cuda/vectorizers.py b/numba_cuda/numba/cuda/vectorizers.py
index 81c20c84c..851f08387 100644
--- a/numba_cuda/numba/cuda/vectorizers.py
+++ b/numba_cuda/numba/cuda/vectorizers.py
@@ -108,7 +108,7 @@ def as_device_array(self, obj):
         # Producer then importing it as a Consumer, which causes a
         # synchronization on the array's stream (if it has one) by default.
         # When we have a Numba device array, we can simply return it.
-        if _api.is_cuda_ndarray(obj):
+        if _api._is_cuda_array(obj):
             return obj
         return _api._as_cuda_array(obj)
 
@@ -120,10 +120,7 @@ def to_host(self, devary, hostary):
         return out
 
     def allocate_device_array(self, shape, dtype):
-        shape, strides, dtype = prepare_shape_strides_dtype(
-            shape, strides, dtype, "C"
-        )
-        return DeviceNDArray._legacy_ctor(shape, strides, dtype, stream=self._stream)
+        return cuda._api._device_array(shape=shape, dtype=dtype, stream=self._stream)
 
     def launch_kernel(self, kernel, nelem, args):
         kernel.forall(nelem, stream=self._stream)(*args)

From e06ce499dc4e0caaf756d6ee7f1a53c296993b37 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Fri, 5 Dec 2025 11:32:20 -0800
Subject: [PATCH 10/60] more progress

---
 numba_cuda/numba/cuda/api.py                  |  2 -
 numba_cuda/numba/cuda/tests/cudapy/test_sm.py |  4 +-
 .../cuda/tests/doc_examples/test_random.py    |  1 +
 .../tests/doc_examples/test_sessionize.py     | 70 ++++++++++---------
 numba_cuda/numba/cuda/vectorizers.py          | 10 +--
 5 files changed, 44 insertions(+), 43 deletions(-)

diff --git a/numba_cuda/numba/cuda/api.py b/numba_cuda/numba/cuda/api.py
index 64cd50f4d..78ae5f3f4 100644
--- a/numba_cuda/numba/cuda/api.py
+++ b/numba_cuda/numba/cuda/api.py
@@ -117,7 +117,6 @@ def to_device(obj, stream=0, copy=True, to=None):
 
         hary = d_ary.copy_to_host(stream=stream)
     """
-    assert False
     warnings.warn(
         "to_device is deprecated. Please prefer cupy for moving numpy arrays to the device.",
         DeprecatedDeviceArrayApiWarning,
@@ -138,7 +137,6 @@ def device_array(shape, dtype=np.float64, strides=None, order="C", stream=0):
 
     Allocate an empty device ndarray. Similar to :meth:`numpy.empty`.
     """
-    breakpoint()
     warnings.warn(
         "device_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
         DeprecatedDeviceArrayApiWarning,
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
index 0cf4aa7f0..6922757b3 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
@@ -394,9 +394,9 @@ def sm_slice_copy(x, y, chunksize):
                     y[bd * bx + j] = sm1[j]
                     y[bd * bx + j + chunksize] = sm2[j]
 
-        d_result = cuda.device_array_like(arr)
+        d_result = cp.asarray(arr)
         sm_slice_copy[nblocks, nthreads, 0, nshared](arr, d_result, chunksize)
-        host_result = d_result.copy_to_host()
+        host_result = d_result.get()
         np.testing.assert_array_equal(arr, host_result)
 
     @skip_on_cudasim("Can't check typing in simulator")
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_random.py b/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
index 6edf52c1d..8c2ce352b 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
@@ -6,6 +6,7 @@
 
 import unittest
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+import cupy as cp
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py b/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py
index 5c8321088..61e211f32 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py
@@ -43,39 +43,40 @@ def test_ex_sessionize(self):
 
         # ex_sessionize.allocate.begin
         # Generate data
-        cp.array(
-                [
-                    1,
-                    1,
-                    1,
-                    1,
-                    1,
-                    1,
-                    2,
-                    2,
-                    2,
-                    3,
-                    3,
-                    3,
-                    3,
-                    3,
-                    3,
-                    3,
-                    3,
-                    3,
-                    3,
-                    4,
-                    4,
-                    4,
-                    4,
-                    4,
-                    4,
-                    4,
-                    4,
-                    4,
-                ]
-            )
-        sec = cp.array(
+        ids = cp.array(
+            [
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                2,
+                2,
+                2,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                4,
+                4,
+                4,
+                4,
+                4,
+                4,
+                4,
+                4,
+                4,
+            ]
+        )
+        sec = cp.asarray(
+            np.array(
                 [
                     1,
                     2,
@@ -107,7 +108,8 @@ def test_ex_sessionize(self):
                     25003,
                 ],
                 dtype="datetime64[ns]",
-            ).astype("int64")  # Cast to int64 for compatibility
+            ).astype("int64")
+        )
 
         # Create a vector to hold the results
         results = cp.zeros(len(ids))
diff --git a/numba_cuda/numba/cuda/vectorizers.py b/numba_cuda/numba/cuda/vectorizers.py
index 851f08387..2151cb320 100644
--- a/numba_cuda/numba/cuda/vectorizers.py
+++ b/numba_cuda/numba/cuda/vectorizers.py
@@ -10,8 +10,6 @@
     GUFuncCallSteps,
 )
 from numba.cuda import _api
-from numba.cuda.cudadrv.devicearray import DeviceNDArray
-from numba.cuda.api_util import prepare_shape_strides_dtype
 
 
 class CUDAUFuncDispatcher(object):
@@ -120,7 +118,9 @@ def to_host(self, devary, hostary):
         return out
 
     def allocate_device_array(self, shape, dtype):
-        return cuda._api._device_array(shape=shape, dtype=dtype, stream=self._stream)
+        return cuda._api._device_array(
+            shape=shape, dtype=dtype, stream=self._stream
+        )
 
     def launch_kernel(self, kernel, nelem, args):
         kernel.forall(nelem, stream=self._stream)(*args)
@@ -163,7 +163,7 @@ def launch(self, func, count, stream, args):
         func.forall(count, stream=stream)(*args)
 
     def is_device_array(self, obj):
-        return cuda.is_cuda_array(obj)
+        return cuda._api._is_cuda_array(obj)
 
     def as_device_array(self, obj):
         # We don't want to call as_cuda_array on objects that are already Numba
@@ -183,7 +183,7 @@ def to_host(self, devary, stream):
 
     def allocate_device_array(self, shape, dtype, stream):
         # want to return a deprecated DeviceNDArray without warning
-        # 
+        #
         return _api._device_array(shape=shape, dtype=dtype, stream=stream)
 
     def broadcast_device(self, ary, shape):

From ef6860ac64f1de4e494b7283b68d0338e117fb2b Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 8 Dec 2025 07:22:30 -0800
Subject: [PATCH 11/60] fix a few more tests

---
 .../numba/cuda/tests/cudapy/test_dispatcher.py     |  4 ++--
 numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py  | 14 +++++++++-----
 numba_cuda/numba/cuda/tests/cudapy/test_ipc.py     | 10 +++++++++-
 .../numba/cuda/tests/cudapy/test_multigpu.py       |  5 +++--
 .../numba/cuda/tests/cudapy/test_multithreads.py   |  5 +++--
 .../numba/cuda/tests/nocuda/test_dummyarray.py     |  7 ++++---
 6 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py b/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
index 9be03425a..814719ed6 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
@@ -503,7 +503,7 @@ def axpy(r, a, x, y, n):
         hy = np.arange(10, dtype=np.int32) * 2
         dx = cp.array(hx)
         dy = cp.array(hy)
-        dr = cuda.device_array_like(dx)
+        dr = cp.asarray(dx)
 
         r_ptr = dr.__cuda_array_interface__["data"][0]
         x_ptr = dx.__cuda_array_interface__["data"][0]
@@ -512,7 +512,7 @@ def axpy(r, a, x, y, n):
         axpy[1, 32](r_ptr, a, x_ptr, y_ptr, N)
 
         expected = a * hx + hy
-        actual = dr.copy_to_host()
+        actual = dr.get()
         np.testing.assert_equal(expected, actual)
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
index 237542330..099d9115a 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
@@ -8,11 +8,13 @@
 from numba.cuda import void, int32, float32, float64
 from numba.cuda import guvectorize
 from numba import cuda
-from numba.cuda.testing import skip_on_cudasim, CUDATestCase
+from numba.cuda.testing import skip_on_cudasim, CUDATestCase, DeprecatedDeviceArrayApiWarning
 import unittest
 from numba.cuda.core.errors import NumbaPerformanceWarning, TypingError
 from numba.cuda.tests.support import override_config
 import cupy as cp
+import warnings
+
 
 
 def _get_matmulcore_gufunc(dtype=float32):
@@ -61,7 +63,7 @@ def test_gufunc_auto_transfer(self):
             matrix_ct, 4, 5
         )
 
-        dB = cuda.to_device(B)
+        dB = cp.asarray(B)
 
         C = gufunc(A, dB).copy_to_host()
         Gold = np.matmul(A, B)
@@ -120,10 +122,12 @@ def test_gufunc_stream(self):
         )
 
         stream = cuda.stream()
-        dA = cuda.to_device(A, stream)
-        dB = cuda.to_device(B, stream)
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore', DeprecatedDeviceArrayApiWarning)
+            dA = cuda.to_device(A, stream)
+            dB = cuda.to_device(B, stream)
 
-        dC = cuda.device_array(shape=(1001, 2, 5), dtype=A.dtype, stream=stream)
+            dC = cuda.device_array(shape=(1001, 2, 5), dtype=A.dtype, stream=stream)
         dC = gufunc(dA, dB, out=dC, stream=stream)
         C = dC.copy_to_host(stream=stream)
         stream.synchronize()
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py b/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py
index bff2b84dd..249ec3c6d 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py
@@ -16,6 +16,7 @@
     skip_on_cudasim,
     skip_under_cuda_memcheck,
     skip_on_wsl2,
+    CUDATestCase,
     DeprecatedDeviceArrayApiTest,
     ForeignArray,
 )
@@ -64,11 +65,18 @@ def ipc_array_test(ipcarr, parent_pid):
     return arr
 
 
+def _suppress_deprecated_warnings():
+    import warnings
+    # adjust the import path to the actual warning class
+    from numba.cuda import DeprecatedDeviceArrayApiWarning
+    warnings.filterwarnings("ignore", category=DeprecatedDeviceArrayApiWarning)
+
 class CUDAIpcTestCase(DeprecatedDeviceArrayApiTest):
     @classmethod
     def setUpClass(cls) -> None:
         cls.exe = concurrent.futures.ProcessPoolExecutor(
-            mp_context=mp.get_context("spawn")
+            mp_context=mp.get_context("spawn"),
+            initializer=_suppress_deprecated_warnings
         )
 
     @classmethod
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py b/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py
index 10df01fc0..ec15e5ef6 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py
@@ -6,6 +6,7 @@
 from numba.cuda.testing import skip_on_cudasim, CUDATestCase
 import threading
 import unittest
+import cupy as cp
 
 
 class TestMultiGPUContext(CUDATestCase):
@@ -55,7 +56,7 @@ def test_multithreaded(self):
         def work(gpu, dA, results, ridx):
             try:
                 with gpu:
-                    arr = dA.copy_to_host()
+                    arr = dA.get()
 
             except Exception as e:
                 results[ridx] = e
@@ -63,7 +64,7 @@ def work(gpu, dA, results, ridx):
             else:
                 results[ridx] = np.all(arr == np.arange(10))
 
-        dA = cuda.to_device(np.arange(10))
+        dA = cp.asarray(np.arange(10))
 
         nthreads = 10
         results = [None] * nthreads
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
index d432d2939..6e491a137 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
@@ -11,6 +11,7 @@
     CUDATestCase,
 )
 import unittest
+import cupy as cp
 
 from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
 
@@ -27,12 +28,12 @@ def use_foo(x):
         foo[1, 1](x)
         return x
 
-    arrays = [cuda.to_device(np.arange(10)) for i in range(10)]
+    arrays = [cp.arange(10) for i in range(10)]
     expected = np.arange(10)
     expected[0] += 1
     with ThreadPoolExecutor(max_workers=4) as e:
         for ary in e.map(use_foo, arrays):
-            np.testing.assert_equal(ary, expected)
+            np.testing.assert_equal(ary.get(), expected)
 
 
 @skip_under_cuda_memcheck("Hangs cuda-memcheck")
diff --git a/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py b/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py
index 5a27f2b87..bd6af0f93 100644
--- a/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py
+++ b/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py
@@ -5,7 +5,8 @@
 import itertools
 import numpy as np
 from numba.cuda.cudadrv.dummyarray import Array
-from numba.cuda.testing import skip_on_cudasim
+from numba.cuda.testing import skip_on_cudasim, DeprecatedDeviceArrayApiTest
+import cupy as cp
 
 
 @skip_on_cudasim("Tests internals of the CUDA driver device array")
@@ -420,9 +421,9 @@ def test_empty_array_flags(self):
                 self.assertTrue(arr.flags["C_CONTIGUOUS"])
                 self.assertTrue(arr.flags["F_CONTIGUOUS"])
 
-
+# Typing of DeviceNDarray is deprecated
 @skip_on_cudasim("Tests CUDA device array type inference")
-class TestEmptyArrayTypeInference(unittest.TestCase):
+class TestEmptyArrayTypeInference(DeprecatedDeviceArrayApiTest):
     def test_empty_array_typeof(self):
         from numba import cuda, typeof
 

From 238052b31b8bdac00ec81d2999e989f0bfb29762 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 9 Dec 2025 07:50:36 -0800
Subject: [PATCH 12/60] even more tests

---
 numba_cuda/numba/cuda/cudadrv/devicearray.py   |  3 +++
 .../tests/cudapy/test_cuda_array_interface.py  |  6 +++---
 .../cuda/tests/cudapy/test_gufunc_scalar.py    | 18 ++++++++++++------
 .../cuda/tests/cudapy/test_multithreads.py     |  3 ++-
 .../numba/cuda/tests/cudapy/test_vectorize.py  | 17 ++++++++++++-----
 numba_cuda/numba/cuda/vectorizers.py           | 14 ++++++++++----
 6 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/numba_cuda/numba/cuda/cudadrv/devicearray.py b/numba_cuda/numba/cuda/cudadrv/devicearray.py
index 6709916a3..0381baa05 100644
--- a/numba_cuda/numba/cuda/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/cudadrv/devicearray.py
@@ -312,6 +312,9 @@ def split(self, section, stream=0):
         If the array cannot be equally divided, the last section will be
         smaller.
         """
+        return self._split(section, stream)
+
+    def _split(self, section, stream=0):
         stream = self._default_stream(stream)
         if self.ndim != 1:
             raise ValueError("only support 1d array")
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
index 9d1b921d0..6c1eecfde 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
@@ -5,7 +5,7 @@
 
 from numba.cuda import vectorize, guvectorize
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase, ForeignArray
+from numba.cuda.testing import unittest, CUDATestCase, ForeignArray, DeprecatedDeviceArrayApiTest
 from numba.cuda.testing import skip_on_cudasim, skip_if_external_memmgr
 from numba.cuda.tests.support import linux_only, override_config
 from unittest.mock import call, patch
@@ -13,7 +13,7 @@
 
 
 @skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
-class TestCudaArrayInterface(CUDATestCase):
+class TestCudaArrayInterface(DeprecatedDeviceArrayApiTest):
     def assertPointersEqual(self, a, b):
         self.assertEqual(
             a.device_ctypes_pointer.value, b.device_ctypes_pointer.value
@@ -228,7 +228,7 @@ def test_masked_array(self):
 
     def test_zero_size_array(self):
         # for #4175
-        c_arr = cp.asarray(0)
+        c_arr = cuda.device_array(0)
         self.assertEqual(c_arr.__cuda_array_interface__["data"][0], 0)
 
         @cuda.jit
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
index 64769f585..1fb2d7427 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
@@ -9,9 +9,10 @@
 
 import numpy as np
 from numba import cuda, guvectorize
-from numba.cuda.testing import skip_on_cudasim, CUDATestCase
+from numba.cuda.testing import skip_on_cudasim, CUDATestCase, DeprecatedDeviceArrayApiWarning
 import unittest
-
+import cupy as cp
+import warnings
 
 @skip_on_cudasim("ufunc API unsupported in the simulator")
 class TestGUFuncScalar(CUDATestCase):
@@ -42,13 +43,13 @@ def sum_row(inp, out):
         out1 = np.empty(100, dtype=inp.dtype)
         out2 = np.empty(100, dtype=inp.dtype)
 
-        dev_inp = cuda.to_device(inp)  # alloc and copy input data
-        dev_out1 = cuda.to_device(out1, copy=False)  # alloc only
+        dev_inp = cp.asarray(inp)  # alloc and copy input data
+        dev_out1 = cp.empty(out1.shape, dtype=out1.dtype)  # alloc only
 
         sum_row(dev_inp, out=dev_out1)  # invoke the gufunc
         dev_out2 = sum_row(dev_inp)  # invoke the gufunc
 
-        dev_out1.copy_to_host(out1)  # retrieve the result
+        out1 = dev_out1.get()  # retrieve the result
         dev_out2.copy_to_host(out2)  # retrieve the result
 
         # verify result
@@ -119,7 +120,12 @@ def foo(a, b, out):
 
         # test error
         a = np.array(a)
-        da = cuda.to_device(a)
+
+        # As this test specifically tests the behavior of passing a DeviceNDArray,
+        # we'll catch the expected warning explicitly here. 
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore', DeprecatedDeviceArrayApiWarning)
+            da = cuda.to_device(a)
         self.assertEqual(da.dtype, np.int64)
         with self.assertRaises(TypeError) as raises:
             foo(da, b)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
index 6e491a137..13254c1f3 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
@@ -9,6 +9,7 @@
     skip_on_cudasim,
     skip_under_cuda_memcheck,
     CUDATestCase,
+    DeprecatedDeviceArrayApiTest,
 )
 import unittest
 import cupy as cp
@@ -38,7 +39,7 @@ def use_foo(x):
 
 @skip_under_cuda_memcheck("Hangs cuda-memcheck")
 @skip_on_cudasim("disabled for cudasim")
-class TestMultiThreadCompiling(CUDATestCase):
+class TestMultiThreadCompiling(DeprecatedDeviceArrayApiTest):
     def test_concurrent_compiling(self):
         check_concurrent_compiling()
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
index 3a41b1234..3ea864843 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
@@ -13,6 +13,7 @@
 from numba.cuda.testing import skip_on_cudasim
 from numba.cuda.testing import CUDATestCase
 import unittest
+import cupy as cp
 
 
 # Signatures to test with - these are all homogeneous in dtype, so the output
@@ -89,9 +90,13 @@ def vector_add(a, b):
 
             for ty in dtypes:
                 data = np.array(np.random.random(self.N), dtype=ty)
-                device_data = cuda.to_device(data, stream)
 
-                dresult = vector_add(device_data, device_data, stream=stream)
+                stream = cp.cuda.Stream()
+                nb_stream = cuda.api.external_stream(stream.ptr)
+                with stream:
+                    device_data = cp.asarray(data)
+
+                dresult = vector_add(device_data, device_data, stream=nb_stream)
                 actual = dresult.copy_to_host()
 
                 expected = np.add(data, data)
@@ -160,14 +165,16 @@ def test_reduce_async(self):
             def vector_add(a, b):
                 return a + b
 
-            stream = cuda.stream()
+            stream = cp.cuda.Stream()
+            nb_stream = cuda.api.external_stream(stream.ptr)
             dtype = np.int32
 
             for n in input_sizes:
                 x = np.arange(n, dtype=dtype)
                 expected = np.add.reduce(x)
-                dx = cuda.to_device(x, stream)
-                actual = vector_add.reduce(dx, stream=stream)
+                with stream:
+                    dx = cp.asarray(x)
+                actual = vector_add.reduce(dx, stream=nb_stream)
                 np.testing.assert_allclose(expected, actual)
                 # Compare against the input dtype as in test_reduce().
                 self.assertEqual(dtype, actual.dtype)
diff --git a/numba_cuda/numba/cuda/vectorizers.py b/numba_cuda/numba/cuda/vectorizers.py
index 2151cb320..edbee9326 100644
--- a/numba_cuda/numba/cuda/vectorizers.py
+++ b/numba_cuda/numba/cuda/vectorizers.py
@@ -10,6 +10,8 @@
     GUFuncCallSteps,
 )
 from numba.cuda import _api
+import warnings
+from numba.cuda.cudadrv.devicearray import DeprecatedDeviceArrayApiWarning
 
 
 class CUDAUFuncDispatcher(object):
@@ -55,7 +57,7 @@ def reduce(self, arg, stream=0):
             if cuda.cudadrv.devicearray.is_cuda_ndarray(arg):
                 mem = arg
             else:
-                mem = cuda.to_device(arg, stream)
+                mem = cuda._api._to_device(arg, stream)
                 # do reduction
             out = self.__reduce(mem, gpu_mems, stream)
             # use a small buffer to store the result element
@@ -67,7 +69,9 @@ def reduce(self, arg, stream=0):
     def __reduce(self, mem, gpu_mems, stream):
         n = mem.shape[0]
         if n % 2 != 0:  # odd?
-            fatcut, thincut = mem.split(n - 1)
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore", category=DeprecatedDeviceArrayApiWarning)
+                fatcut, thincut = mem.split(n - 1)
             # prevent freeing during async mode
             gpu_mems.append(fatcut)
             gpu_mems.append(thincut)
@@ -76,7 +80,9 @@ def __reduce(self, mem, gpu_mems, stream):
             gpu_mems.append(out)
             return self(out, thincut, out=out, stream=stream)
         else:  # even?
-            left, right = mem.split(n // 2)
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore", category=DeprecatedDeviceArrayApiWarning)
+                left, right = mem.split(n // 2)
             # prevent freeing during async mode
             gpu_mems.append(left)
             gpu_mems.append(right)
@@ -173,7 +179,7 @@ def as_device_array(self, obj):
         # When we have a Numba device array, we can simply return it.
         if cuda.cudadrv.devicearray.is_cuda_ndarray(obj):
             return obj
-        return cuda.as_cuda_array(obj)
+        return _api._as_cuda_array(obj)
 
     def to_device(self, hostary, stream):
         return _api._to_device(hostary, stream=stream)

From 1c6977601964e6758d83a13e637a67acefdc345f Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 10 Dec 2025 08:41:31 -0800
Subject: [PATCH 13/60] fix blackscholes test

---
 numba_cuda/numba/cuda/deviceufunc.py          |  1 +
 .../cuda/tests/cudapy/test_blackscholes.py    | 25 +++++++++++--------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/numba_cuda/numba/cuda/deviceufunc.py b/numba_cuda/numba/cuda/deviceufunc.py
index 588547e3a..f8e9ad328 100644
--- a/numba_cuda/numba/cuda/deviceufunc.py
+++ b/numba_cuda/numba/cuda/deviceufunc.py
@@ -751,6 +751,7 @@ def __call__(self, *args, **kws):
         return callsteps.post_process_outputs(outputs)
 
     def _schedule(self, inputs, outs):
+        breakpoint()
         input_shapes = [a.shape for a in inputs]
         schedule = self.engine.schedule(input_shapes)
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
index 61388c469..7ad2a873d 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
@@ -6,7 +6,6 @@
 from numba import cuda
 from numba.cuda import double, void
 from numba.cuda.testing import unittest, CUDATestCase
-import cupy as cp
 
 
 RISKFREE = 0.02
@@ -128,15 +127,18 @@ def black_scholes_cuda(callResult, putResult, S, X, T, R, V):
         # numba
         blockdim = 512, 1
         griddim = int(math.ceil(float(OPT_N) / blockdim[0])), 1
-        stream = cuda.stream()
-        d_callResult = cp.asarray(callResultNumba, stream)
-        d_putResult = cp.asarray(putResultNumba, stream)
-        d_stockPrice = cp.asarray(stockPrice, stream)
-        d_optionStrike = cp.asarray(optionStrike, stream)
-        d_optionYears = cp.asarray(optionYears, stream)
+        stream = cp.cuda.Stream()
+        nb_stream = cuda.api.external_stream(stream.ptr)
+
+        with stream:
+            d_callResult = cp.asarray(callResultNumba)
+            d_putResult = cp.asarray(putResultNumba)
+            d_stockPrice = cp.asarray(stockPrice)
+            d_optionStrike = cp.asarray(optionStrike)
+            d_optionYears = cp.asarray(optionYears)
 
         for i in range(iterations):
-            black_scholes_cuda[griddim, blockdim, stream](
+            black_scholes_cuda[griddim, blockdim, nb_stream](
                 d_callResult,
                 d_putResult,
                 d_stockPrice,
@@ -145,8 +147,11 @@ def black_scholes_cuda(callResult, putResult, S, X, T, R, V):
                 RISKFREE,
                 VOLATILITY,
             )
-        d_callResult.get(callResultNumba, stream)
-        d_putResult.get(putResultNumba, stream)
+
+        with stream:
+            callResultNumba = d_callResult.get()
+            putResultNumba = d_putResult.get()
+
         stream.synchronize()
 
         delta = np.abs(callResultNumpy - callResultNumba)

From ec67eb5430decfd881e45e816daf65a3d8ff9931 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 10 Dec 2025 08:50:06 -0800
Subject: [PATCH 14/60] fix test_gufunc_arg

---
 numba_cuda/numba/cuda/deviceufunc.py |  1 -
 numba_cuda/numba/cuda/vectorizers.py | 12 ++++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/numba_cuda/numba/cuda/deviceufunc.py b/numba_cuda/numba/cuda/deviceufunc.py
index f8e9ad328..588547e3a 100644
--- a/numba_cuda/numba/cuda/deviceufunc.py
+++ b/numba_cuda/numba/cuda/deviceufunc.py
@@ -751,7 +751,6 @@ def __call__(self, *args, **kws):
         return callsteps.post_process_outputs(outputs)
 
     def _schedule(self, inputs, outs):
-        breakpoint()
         input_shapes = [a.shape for a in inputs]
         schedule = self.engine.schedule(input_shapes)
 
diff --git a/numba_cuda/numba/cuda/vectorizers.py b/numba_cuda/numba/cuda/vectorizers.py
index edbee9326..9766ead9e 100644
--- a/numba_cuda/numba/cuda/vectorizers.py
+++ b/numba_cuda/numba/cuda/vectorizers.py
@@ -70,7 +70,9 @@ def __reduce(self, mem, gpu_mems, stream):
         n = mem.shape[0]
         if n % 2 != 0:  # odd?
             with warnings.catch_warnings():
-                warnings.filterwarnings("ignore", category=DeprecatedDeviceArrayApiWarning)
+                warnings.filterwarnings(
+                    "ignore", category=DeprecatedDeviceArrayApiWarning
+                )
                 fatcut, thincut = mem.split(n - 1)
             # prevent freeing during async mode
             gpu_mems.append(fatcut)
@@ -81,7 +83,9 @@ def __reduce(self, mem, gpu_mems, stream):
             return self(out, thincut, out=out, stream=stream)
         else:  # even?
             with warnings.catch_warnings():
-                warnings.filterwarnings("ignore", category=DeprecatedDeviceArrayApiWarning)
+                warnings.filterwarnings(
+                    "ignore", category=DeprecatedDeviceArrayApiWarning
+                )
                 left, right = mem.split(n // 2)
             # prevent freeing during async mode
             gpu_mems.append(left)
@@ -112,9 +116,9 @@ def as_device_array(self, obj):
         # Producer then importing it as a Consumer, which causes a
         # synchronization on the array's stream (if it has one) by default.
         # When we have a Numba device array, we can simply return it.
-        if _api._is_cuda_array(obj):
+        if cuda.cudadrv.devicearray.is_cuda_ndarray(obj):
             return obj
-        return _api._as_cuda_array(obj)
+        return cuda.as_cuda_array(obj)
 
     def to_device(self, hostary):
         return _api._to_device(hostary, stream=self._stream)

From 40a89a8d319aa395b7e5a7edb319a839a7d6503c Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 10 Dec 2025 10:37:47 -0800
Subject: [PATCH 15/60] tests

---
 numba_cuda/numba/cuda/api.py                  |  3 +--
 numba_cuda/numba/cuda/kernels/reduction.py    |  4 +++-
 .../tests/cudapy/test_cuda_jit_no_types.py    |  7 ++++---
 .../numba/cuda/tests/cudapy/test_datetime.py  | 19 ++++++-------------
 .../cuda/tests/doc_examples/test_cpointer.py  |  1 +
 .../cuda/tests/doc_examples/test_matmul.py    |  2 +-
 6 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/numba_cuda/numba/cuda/api.py b/numba_cuda/numba/cuda/api.py
index 78ae5f3f4..d9966890f 100644
--- a/numba_cuda/numba/cuda/api.py
+++ b/numba_cuda/numba/cuda/api.py
@@ -31,7 +31,6 @@ def from_cuda_array_interface(desc, owner=None, sync=True):
     If ``sync`` is ``True``, then the imported stream (if present) will be
     synchronized.
     """
-    assert False
     warnings.warn(
         "Constructing DeviceNDArray objects via the __cuda_array_interface__ "
         "is now deprecated. Please prefer cupy for constructing device arrays."
@@ -453,7 +452,7 @@ def mapped(*arylist, **kws):
             mapped=True,
         )
         pmlist.append(pm)
-        devary = devicearray.from_array_like(ary, gpu_data=pm, stream=stream)
+        devary = devicearray._from_array_like(ary, gpu_data=pm, stream=stream)
         devarylist.append(devary)
     try:
         if len(devarylist) == 1:
diff --git a/numba_cuda/numba/cuda/kernels/reduction.py b/numba_cuda/numba/cuda/kernels/reduction.py
index 0cb609585..c06c8063d 100644
--- a/numba_cuda/numba/cuda/kernels/reduction.py
+++ b/numba_cuda/numba/cuda/kernels/reduction.py
@@ -259,7 +259,9 @@ def __call__(self, arr, size=None, res=None, init=0, stream=0):
 
         # handle return value
         if res is not None:
-            res[:1].copy_to_device(partials[:1], stream=stream)
+            cuda._api._from_cuda_array_interface(res.__cuda_array_interface__)[
+                :1
+            ].copy_to_device(partials[:1], stream=stream)
             return
         else:
             return partials[0]
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
index 32fea035b..20adf1bae 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
@@ -71,13 +71,14 @@ def outer(argin, argout):
         a = np.zeros(1)
         b = np.zeros(1)
 
-        stream = cp.cuda.stream()
+        stream = cp.cuda.Stream()
+        nb_stream = cuda.api.external_stream(stream.ptr)
         with stream:
             d_a = cp.asarray(a)
             d_b = cp.asarray(b)
 
-            outer[1, 1, stream](d_a, d_b)
-        
+            outer[1, 1, nb_stream](d_a, d_b)
+
             b = d_b.get()
 
         self.assertEqual(b[0], (a[0] + 1) + (2 + 1))
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py b/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
index 3a5e16b1a..f2301b3d3 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
@@ -5,12 +5,11 @@
 
 from numba import cuda, vectorize, guvectorize
 from numba.cuda.np.numpy_support import from_dtype
-from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import skip_on_cudasim, DeprecatedDeviceArrayApiTest
 import unittest
-import cupy as cp
 
 
-class TestCudaDateTime(CUDATestCase):
+class TestCudaDateTime(DeprecatedDeviceArrayApiTest):
     def test_basic_datetime_kernel(self):
         @cuda.jit
         def foo(start, end, delta):
@@ -81,26 +80,20 @@ def timediff(start, end, out):
     @skip_on_cudasim("no .copy_to_host() in the simulator")
     def test_datetime_view_as_int64(self):
         arr = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
-        darr = cp.asarray(arr)
+        darr = cuda.to_device(arr)
         viewed = darr.view(np.int64)
         self.assertPreciseEqual(arr.view(np.int64), viewed.copy_to_host())
-        self.assertEqual(
-            viewed.__cuda_array_interface__["descr"][0],
-            darr.__cuda_array_interface__["descr"][0],
-        )
+        self.assertEqual(viewed.gpu_data, darr.gpu_data)
 
     @skip_on_cudasim("no .copy_to_host() in the simulator")
     def test_timedelta_view_as_int64(self):
         arr = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
         arr = arr - (arr - 1)
         self.assertEqual(arr.dtype, np.dtype("timedelta64[D]"))
-        darr = cp.asarray(arr)
+        darr = cuda.to_device(arr)
         viewed = darr.view(np.int64)
         self.assertPreciseEqual(arr.view(np.int64), viewed.copy_to_host())
-        self.assertEqual(
-            viewed.__cuda_array_interface__["descr"][0],
-            darr.__cuda_array_interface__["descr"][0],
-        )
+        self.assertEqual(viewed.gpu_data, darr.gpu_data)
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py b/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py
index 4626cc76f..565e93b09 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py
@@ -5,6 +5,7 @@
 
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
 from numba.cuda.tests.support import captured_stdout
+import cupy as cp
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py b/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
index 1ab6047c8..ee7450be7 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
@@ -141,7 +141,7 @@ def fast_matmul(A, B, C):
         blockspergrid = (blockspergrid_x, blockspergrid_y)
 
         fast_matmul[blockspergrid, threadsperblock](x_d, y_d, z_d)
-        z_h = z_d.copy_to_host()
+        z_h = z_d.get()
         print(z_h)
         print(x_h @ y_h)
         # magictoken.ex_run_fast_matmul.end

From bec33c4e55903061d151c286e6ab7fb7cfc1945d Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Thu, 11 Dec 2025 07:59:22 -0800
Subject: [PATCH 16/60] fix remaining tests

---
 .../cuda/tests/cudapy/test_blackscholes.py    |  2 +-
 .../numba/cuda/tests/cudapy/test_laplace.py   | 28 +++++++++----------
 numba_cuda/numba/cuda/tests/cudapy/test_sm.py |  5 ++--
 .../cuda/tests/doc_examples/test_random.py    |  2 +-
 .../cuda/tests/doc_examples/test_vecadd.py    | 10 +++----
 5 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
index 7ad2a873d..5af622a2a 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
@@ -6,7 +6,7 @@
 from numba import cuda
 from numba.cuda import double, void
 from numba.cuda.testing import unittest, CUDATestCase
-
+import cupy as cp
 
 RISKFREE = 0.02
 VOLATILITY = 0.30
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py b/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
index 6a423dc97..4673cc2a9 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
@@ -94,32 +94,30 @@ def jocabi_relax_core(A, Anew, error):
 
         error_grid = np.zeros(griddim)
 
-        stream = cupy.cuda.stream()
+        cp_stream = cp.cuda.Stream()
+        stream = cuda.api.external_stream(cp_stream.ptr)
 
-        with stream:
+        with cp_stream:
             dA = cp.asarray(A)  # to device and don't come back
             dAnew = cp.asarray(Anew)  # to device and don't come back
-        
             derror_grid = cp.asarray(error_grid)
 
-            while error > tol and iter < iter_max:
-                self.assertTrue(error_grid.dtype == np.float64)
+        while error > tol and iter < iter_max:
+            self.assertTrue(error_grid.dtype == np.float64)
 
-                jocabi_relax_core[griddim, blockdim, stream](dA, dAnew, derror_grid)
+            jocabi_relax_core[griddim, blockdim, stream](dA, dAnew, derror_grid)
 
+            with cp_stream:
                 error_grid = derror_grid.get()
 
-                # error_grid is available on host
-                stream.synchronize()
+            error = np.abs(error_grid).max()
 
-                error = np.abs(error_grid).max()
+            # swap dA and dAnew
+            tmp = dA
+            dA = dAnew
+            dAnew = tmp
 
-                # swap dA and dAnew
-                tmp = dA
-                dA = dAnew
-                dAnew = tmp
-
-                iter += 1
+            iter += 1
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
index 6922757b3..35a58bc8c 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
@@ -4,7 +4,6 @@
 from numba import cuda
 from numba.cuda import int32, float64, void
 from numba.cuda import HAS_NUMBA
-import cupy as cp
 
 if HAS_NUMBA:
     from numba.core.errors import TypingError as NumbaTypingError
@@ -14,7 +13,7 @@
 
 import numpy as np
 from numba.cuda.np import numpy_support as nps
-
+import cupy as cp
 from .extensions_usecases import struct_model_type, MyStruct
 
 recordwith2darray = np.dtype([("i", np.int32), ("j", np.float32, (3, 2))])
@@ -92,7 +91,7 @@ def costs_func(d_block_costs):
             d_block_costs[0] = s_initialcost[0] + prediction
 
         block_costs = np.zeros(num_blocks, dtype=np.float64)
-        d_block_costs = cp.asarray(block_costs)
+        d_block_costs = cuda.to_device(block_costs)
 
         costs_func[num_blocks, threads_per_block](d_block_costs)
 
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_random.py b/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
index 8c2ce352b..3a1ca4643 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
@@ -51,7 +51,7 @@ def random_3d(arr, rng_states):
         rng_states = create_xoroshiro128p_states(nthreads, seed=1)
 
         # Generate random numbers
-        arr = cp.asarray((X, Y, Z), dtype=np.float32)
+        arr = cp.zeros((X, Y, Z), dtype=np.float32)
         random_3d[(gx, gy, gz), (bx, by, bz)](arr, rng_states)
         # magictoken.ex_3d_grid.end
 
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py b/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
index 1dc13d660..ddfa49336 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
@@ -49,9 +49,9 @@ def f(a, b, c):
 
         # ex_vecadd.allocate.begin
         N = 100000
-        a = cp.random.random(N)
-        b = cp.random.random(N)
-        c = cp.asarray(a)
+        a = cp.asarray(np.random.random(N))
+        b = cp.asarray(np.random.random(N))
+        c = cp.empty(a.shape)
         # ex_vecadd.allocate.end
 
         # ex_vecadd.forall.begin
@@ -68,9 +68,7 @@ def f(a, b, c):
         print(c.get())
         # ex_vecadd.launch.end
 
-        np.testing.assert_equal(
-            c.get(), a.get() + b.get()
-        )
+        np.testing.assert_equal(c.get(), a.get() + b.get())
 
 
 if __name__ == "__main__":

From b8f37909850451a7e6cf049d6150830e507e0627 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 5 Jan 2026 08:20:02 -0800
Subject: [PATCH 17/60] fix new test failures

---
 numba_cuda/numba/cuda/api.py                  |  4 +-
 numba_cuda/numba/cuda/cudadrv/devicearray.py  |  2 +-
 .../cuda/tests/cudadrv/test_nvjitlink.py      |  7 +-
 .../tests/cudapy/test_device_array_capture.py | 37 +++++----
 numba_cuda/numba/cuda/tests/cudapy/test_sm.py |  2 +-
 .../numba/cuda/tests/cudapy/test_vectorize.py | 83 ++++++++++---------
 .../cuda/tests/cudapy/test_vectorize_decor.py |  4 +-
 .../tests/cudapy/test_vectorize_scalar_arg.py |  3 +-
 .../tests/doc_examples/test_cpu_gpu_compat.py | 13 +--
 .../cuda/tests/doc_examples/test_globals.py   | 28 +++----
 .../cuda/tests/doc_examples/test_reduction.py |  6 +-
 numba_cuda/numba/cuda/tests/nrt/test_nrt.py   |  3 +-
 12 files changed, 99 insertions(+), 93 deletions(-)

diff --git a/numba_cuda/numba/cuda/api.py b/numba_cuda/numba/cuda/api.py
index 2e3c70155..aab24580d 100644
--- a/numba_cuda/numba/cuda/api.py
+++ b/numba_cuda/numba/cuda/api.py
@@ -33,7 +33,8 @@ def from_cuda_array_interface(desc, owner=None, sync=True):
     """
     warnings.warn(
         "Constructing DeviceNDArray objects via the __cuda_array_interface__ "
-        "is now deprecated. Please prefer cupy for constructing device arrays."
+        "is now deprecated. Please prefer cupy for constructing device arrays.",
+        DeprecatedDeviceArrayApiWarning,
     )
     return _api._from_cuda_array_interface(desc, owner=owner, sync=sync)
 
@@ -116,7 +117,6 @@ def to_device(obj, stream=0, copy=True, to=None):
 
         hary = d_ary.copy_to_host(stream=stream)
     """
-    breakpoint()
     warnings.warn(
         "to_device is deprecated. Please prefer cupy for moving numpy arrays to the device.",
         DeprecatedDeviceArrayApiWarning,
diff --git a/numba_cuda/numba/cuda/cudadrv/devicearray.py b/numba_cuda/numba/cuda/cudadrv/devicearray.py
index d25df5a7e..a3e5f79f6 100644
--- a/numba_cuda/numba/cuda/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/cudadrv/devicearray.py
@@ -674,7 +674,7 @@ def ravel(self, order="C", stream=0):
         newarr, extents = self._dummy.ravel(order=order)
 
         if extents == [self._dummy.extent]:
-            return cls(
+            return cls._create_nowarn(
                 shape=newarr.shape,
                 strides=newarr.strides,
                 dtype=self.dtype,
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py b/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
index ff51db4f1..de5b4d649 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
@@ -10,6 +10,7 @@
 
 from numba import cuda
 from numba.cuda import config
+import cupy as cp
 
 import os
 import io
@@ -101,7 +102,7 @@ def test_nvjitlink_jit_with_linkable_code(self):
                     def kernel(result):
                         result[0] = add_from_numba(1, 2)
 
-                    result = cuda.device_array(1)
+                    result = cp.zeros(1)
                     kernel[1, 1](result)
                     assert result[0] == 3
 
@@ -158,7 +159,7 @@ def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly(self):
                     def kernel(result):
                         result[0] = add_from_numba(1, 2)
 
-                    result = cuda.device_array(1)
+                    result = cp.zeros(1)
                     kernel[1, 1](result)
                     assert result[0] == 3
 
@@ -190,7 +191,7 @@ def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly_warn(self):
                 def kernel(result):
                     result[0] = add_from_numba(1, 2)
 
-                result = cuda.device_array(1)
+                result = cp.zeros(1)
                 func = kernel[1, 1]
                 with pytest.warns(
                     UserWarning,
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py
index f0899475c..bd92a07a1 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py
@@ -15,23 +15,24 @@
 from numba import cuda
 from numba.cuda.testing import unittest, CUDATestCase, ForeignArray
 from numba.cuda.testing import skip_on_cudasim
+import cupy as cp
 
 
 def make_numba_array(host_arr):
     """Create a Numba device array from host array."""
-    return cuda.to_device(host_arr)
+    return cp.asarray(host_arr)
 
 
 def make_foreign_array(host_arr):
     """Create a ForeignArray wrapping a Numba device array."""
-    return ForeignArray(cuda.to_device(host_arr))
+    return ForeignArray(cp.asarray(host_arr))
 
 
 def get_host_data(arr):
     """Copy array data back to host."""
     if isinstance(arr, ForeignArray):
-        return arr._arr.copy_to_host()
-    return arr.copy_to_host()
+        return arr._arr.get()
+    return arr.get()
 
 
 # Array factories to test: (name, factory)
@@ -65,10 +66,10 @@ def kernel(output):
                         output[i] = read_global(i)
 
                 n = len(host_data)
-                output = cuda.device_array(n, dtype=np.float32)
+                output = cp.zeros(n, dtype=np.float32)
                 kernel[1, n](output)
 
-                result = output.copy_to_host()
+                result = output.get()
                 np.testing.assert_array_equal(result, host_data)
 
     def test_computation(self):
@@ -91,10 +92,10 @@ def kernel(output):
                         output[i] = double_global_value(i)
 
                 n = len(host_data)
-                output = cuda.device_array(n, dtype=np.float32)
+                output = cp.zeros(n, dtype=np.float32)
                 kernel[1, n](output)
 
-                result = output.copy_to_host()
+                result = output.get()
                 expected = host_data * 2.0
                 np.testing.assert_array_equal(result, expected)
 
@@ -136,10 +137,10 @@ def kernel(output):
                     if i < output.size:
                         output[i] = add_globals(i)
 
-                output = cuda.device_array(3, dtype=np.float32)
+                output = cp.zeros(3, dtype=np.float32)
                 kernel[1, 3](output)
 
-                result = output.copy_to_host()
+                result = output.get()
                 expected = np.array([11.0, 22.0, 33.0], dtype=np.float32)
                 np.testing.assert_array_equal(result, expected)
 
@@ -164,10 +165,10 @@ def kernel(output):
                         col = i % 2
                         output[i] = read_2d(row, col)
 
-                output = cuda.device_array(6, dtype=np.float32)
+                output = cp.zeros(6, dtype=np.float32)
                 kernel[1, 6](output)
 
-                result = output.copy_to_host()
+                result = output.get()
                 expected = host_2d.flatten()
                 np.testing.assert_array_equal(result, expected)
 
@@ -194,10 +195,10 @@ def kernel(output):
                         if i < output.size:
                             output[i] = read_arr(i)
 
-                    output = cuda.device_array(len(host_data), dtype=dtype)
+                    output = cp.zeros(len(host_data), dtype=dtype)
                     kernel[1, len(host_data)](output)
                     np.testing.assert_array_equal(
-                        output.copy_to_host(), host_data
+                        output.get(), host_data
                     )
 
     def test_direct_kernel_access(self):
@@ -213,10 +214,10 @@ def direct_access_kernel(output):
                     if i < output.size:
                         output[i] = global_direct[i] + 1.0
 
-                output = cuda.device_array(3, dtype=np.float32)
+                output = cp.zeros(3, dtype=np.float32)
                 direct_access_kernel[1, 3](output)
 
-                result = output.copy_to_host()
+                result = output.get()
                 expected = np.array([8.0, 9.0, 10.0], dtype=np.float32)
                 np.testing.assert_array_equal(result, expected)
 
@@ -231,10 +232,10 @@ def test_zero_dimensional(self):
                 def kernel_0d(output):
                     output[()] = global_0d[()] * 2.0
 
-                output = cuda.device_array((), dtype=np.float32)
+                output = cp.zeros((), dtype=np.float32)
                 kernel_0d[1, 1](output)
 
-                result = output.copy_to_host()
+                result = output.get()
                 expected = 84.0
                 self.assertEqual(result, expected)
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
index 35a58bc8c..6d2cd2f89 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
@@ -91,7 +91,7 @@ def costs_func(d_block_costs):
             d_block_costs[0] = s_initialcost[0] + prediction
 
         block_costs = np.zeros(num_blocks, dtype=np.float64)
-        d_block_costs = cuda.to_device(block_costs)
+        d_block_costs = cp.asarray(block_costs)
 
         costs_func[num_blocks, threads_per_block](d_block_costs)
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
index 3ea864843..1013974d3 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
@@ -11,7 +11,7 @@
 from numba.cuda.types import int32, float32, float64
 from numba.cuda.cudadrv.driver import CudaAPIError, driver
 from numba.cuda.testing import skip_on_cudasim
-from numba.cuda.testing import CUDATestCase
+from numba.cuda.testing import CUDATestCase, DeprecatedDeviceArrayApiTest
 import unittest
 import cupy as cp
 
@@ -188,7 +188,7 @@ def vector_add(a, b):
 
             n = 10
             x = np.arange(n, dtype=np.int32)
-            dx = cuda.to_device(x)
+            dx = cp.asarray(x)
             expected = x + x
             actual = vector_add(x, dx).copy_to_host()
             np.testing.assert_equal(expected, actual)
@@ -203,11 +203,11 @@ def vector_add(a, b):
 
             n = 10
             x = np.arange(n, dtype=np.int32).reshape(2, 5)
-            dx = cuda.to_device(x)
+            dx = cp.asarray(x)
             vector_add(dx, dx, out=dx)
 
             expected = x + x
-            actual = dx.copy_to_host()
+            actual = dx.get()
             np.testing.assert_equal(expected, actual)
             self.assertEqual(expected.dtype, actual.dtype)
 
@@ -263,57 +263,60 @@ def bar(x):
 
             self.assertEqual(bar.__name__, "bar")
 
-    def test_no_transfer_for_device_data(self):
-        for vectorize in vectorize_funcs:
-            # Initialize test data on the device prior to banning host <-> device
-            # transfer
+@skip_on_cudasim("ufunc API unsupported in the simulator")
+class TestCUDAVectorizeTransfers(DeprecatedDeviceArrayApiTest):
+
+    def setUp(self):    
+        noise = np.random.randn(1, 3, 64, 64).astype(np.float32)
+        self.noise = cuda._api._to_device(noise)
+
+        # A mock of a CUDA function that always raises a CudaAPIError
+        def raising_transfer(*args, **kwargs):
+            raise CudaAPIError(999, "Transfer not allowed")
+
+        self.old_HtoD = getattr(driver, "cuMemcpyHtoD", None)
+        self.old_DtoH = getattr(driver, "cuMemcpyDtoH", None)
 
-            noise = np.random.randn(1, 3, 64, 64).astype(np.float32)
-            noise = cuda.to_device(noise)
+        setattr(driver, "cuMemcpyHtoD", raising_transfer)
+        setattr(driver, "cuMemcpyDtoH", raising_transfer)
 
-            # A mock of a CUDA function that always raises a CudaAPIError
 
-            def raising_transfer(*args, **kwargs):
-                raise CudaAPIError(999, "Transfer not allowed")
+        super().setUp()
 
-            # Use the mock for transfers between the host and device
+    def tearDown(self):
+        if self.old_HtoD is not None:
+            setattr(driver, "cuMemcpyHtoD", self.old_HtoD)
+        else:
+            del driver.cuMemcpyHtoD
+        if self.old_DtoH is not None:
+            setattr(driver, "cuMemcpyDtoH", self.old_DtoH)
+        else:
+            del driver.cuMemcpyDtoH
 
-            old_HtoD = getattr(driver, "cuMemcpyHtoD", None)
-            old_DtoH = getattr(driver, "cuMemcpyDtoH", None)
+        super().tearDown()
+
+    def test_no_transfer_for_device_data(self):
+        for vectorize in vectorize_funcs:
+            # Initialize test data on the device prior to banning host <-> device
+            # transfer
 
-            setattr(driver, "cuMemcpyHtoD", raising_transfer)
-            setattr(driver, "cuMemcpyDtoH", raising_transfer)
 
             # Ensure that the mock functions are working as expected
 
             with self.assertRaisesRegex(CudaAPIError, "Transfer not allowed"):
-                noise.copy_to_host()
+                self.noise.copy_to_host()
 
             with self.assertRaisesRegex(CudaAPIError, "Transfer not allowed"):
                 cuda.to_device([1])
 
-            try:
-                # Check that defining and calling a ufunc with data on the device
-                # induces no transfers
-
-                @vectorize(["float32(float32)"])
-                def func(noise):
-                    return noise + 1.0
-
-                func(noise)
-            finally:
-                # Replace our mocks with the original implementations. If there was
-                # no original implementation, simply remove ours.
-
-                if old_HtoD is not None:
-                    setattr(driver, "cuMemcpyHtoD", old_HtoD)
-                else:
-                    del driver.cuMemcpyHtoD
-                if old_DtoH is not None:
-                    setattr(driver, "cuMemcpyDtoH", old_DtoH)
-                else:
-                    del driver.cuMemcpyDtoH
+            # Check that defining and calling a ufunc with data on the device
+            # induces no transfers
 
+            @vectorize(["float32(float32)"])
+            def func(noise):
+                return noise + 1.0
 
+            func(self.noise)
+        
 if __name__ == "__main__":
     unittest.main()
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
index d9d7c78ec..dc95765b7 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
@@ -8,7 +8,7 @@
 from numba.cuda import vectorize, int32, uint32, float32, float64
 from numba.cuda.testing import skip_on_cudasim, CUDATestCase
 from numba.cuda.tests.support import CheckWarningsMixin
-
+import cupy as cp
 import unittest
 
 
@@ -162,7 +162,7 @@ def fngpu(a, b):
             return a - b
 
         expect = fn(a, b)
-        got = fngpu(cuda.to_device(a), cuda.to_device(b))
+        got = fngpu(cp.asarray(a), cp.asarray(b))
         np.testing.assert_almost_equal(expect, got.copy_to_host())
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py
index 169e2c6b8..74b268202 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py
@@ -7,6 +7,7 @@
 from numba.cuda import float64
 from numba.cuda.testing import skip_on_cudasim, CUDATestCase
 import unittest
+import cupy as cp
 
 sig = [float64(float64, float64)]
 
@@ -19,7 +20,7 @@ def vector_add(a, b):
             return a + b
 
         A = np.arange(10, dtype=np.float64)
-        dA = cuda.to_device(A)
+        dA = cp.asarray(A)
         v = vector_add(1.0, dA)
 
         np.testing.assert_array_almost_equal(
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py b/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
index 1a578a9af..810b17230 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
@@ -10,6 +10,7 @@
 )
 from numba.cuda.tests.support import captured_stdout
 import numpy as np
+import cupy as cp 
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
@@ -39,10 +40,10 @@ def test_ex_cpu_gpu_compat(self):
         # ex_cpu_gpu_compat.import.end
 
         # ex_cpu_gpu_compat.allocate.begin
-        X = cuda.to_device([1, 10, 234])
-        Y = cuda.to_device([2, 2, 4014])
-        Z = cuda.to_device([3, 14, 2211])
-        results = cuda.to_device([0.0, 0.0, 0.0])
+        X = cp.asarray([1, 10, 234])
+        Y = cp.asarray([2, 2, 4014])
+        Z = cp.asarray([3, 14, 2211])
+        results = cp.asarray([0.0, 0.0, 0.0])
         # ex_cpu_gpu_compat.allocate.end
 
         # ex_cpu_gpu_compat.define.begin
@@ -72,9 +73,9 @@ def f(res, xarr, yarr, zarr):
         # [-126.79644737231007, 416.28324559588634, -218912930.2987788]
         # ex_cpu_gpu_compat.launch.end
 
-        expect = [business_logic(x, y, z) for x, y, z in zip(X, Y, Z)]
+        expect = [business_logic(x, y, z) for x, y, z in zip(X.get(), Y.get(), Z.get())]
 
-        np.testing.assert_equal(expect, results.copy_to_host())
+        np.testing.assert_equal(expect, results.get())
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py b/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
index 40913a150..a07ad1c5a 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
@@ -5,7 +5,7 @@
 
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
 from numba.cuda.tests.support import captured_stdout
-
+import cupy as cp
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
 class TestGlobals(CUDATestCase):
@@ -42,14 +42,14 @@ def compute_totals(quantities, totals):
             if i < totals.size:
                 totals[i] = quantities[i] * PRICES[i] * (1 + TAX_RATE)
 
-        d_quantities = cuda.to_device(
+        d_quantities = cp.asarray(
             np.array([1, 2, 3, 4, 5], dtype=np.float64)
         )
-        d_totals = cuda.device_array(5, dtype=np.float64)
+        d_totals = cp.asarray(5, dtype=np.float64)
 
         # First kernel call - compiles and captures values
         compute_totals[1, 32](d_quantities, d_totals)
-        print("Value of d_totals:", d_totals.copy_to_host())
+        print("Value of d_totals:", d_totals.get())
 
         # These modifications have no effect on subsequent kernel calls
         TAX_RATE = 0.10  # noqa: F841
@@ -57,12 +57,12 @@ def compute_totals(quantities, totals):
 
         # Second kernel call still uses the original values
         compute_totals[1, 32](d_quantities, d_totals)
-        print("Value of d_totals:", d_totals.copy_to_host())
+        print("Value of d_totals:", d_totals.get())
         # magictoken.ex_globals_constant_capture.end
 
         # Verify the values are the same (original values were captured)
         expected = np.array([10.8, 54.0, 16.2, 64.8, 162.0])
-        np.testing.assert_allclose(d_totals.copy_to_host(), expected)
+        np.testing.assert_allclose(d_totals.get(), expected)
 
     def test_ex_globals_device_array_capture(self):
         """
@@ -73,7 +73,7 @@ def test_ex_globals_device_array_capture(self):
         from numba import cuda
 
         # Global device array - pointer is captured, not data
-        PRICES = cuda.to_device(
+        PRICES = cp.asarray(
             np.array([10.0, 25.0, 5.0, 15.0, 30.0], dtype=np.float32)
         )
 
@@ -83,28 +83,26 @@ def compute_totals(quantities, totals):
             if i < totals.size:
                 totals[i] = quantities[i] * PRICES[i]
 
-        d_quantities = cuda.to_device(
+        d_quantities = cp.asarray(
             np.array([1.0, 1.0, 1.0, 1.0, 1.0], dtype=np.float32)
         )
-        d_totals = cuda.device_array(5, dtype=np.float32)
+        d_totals = cp.zeros(5, dtype=np.float32)
 
         # First kernel call
         compute_totals[1, 32](d_quantities, d_totals)
-        print(d_totals.copy_to_host())  # [10. 25.  5. 15. 30.]
+        print(d_totals.get())  # [10. 25.  5. 15. 30.]
 
         # Mutate the device array in-place
-        PRICES.copy_to_device(
-            np.array([20.0, 50.0, 10.0, 30.0, 60.0], dtype=np.float32)
-        )
+        PRICES[:] = cp.array([20.0, 50.0, 10.0, 30.0, 60.0], dtype=np.float32)
 
         # Second kernel call sees the updated values
         compute_totals[1, 32](d_quantities, d_totals)
-        print(d_totals.copy_to_host())  # [20. 50. 10. 30. 60.]
+        print(d_totals.get())  # [20. 50. 10. 30. 60.]
         # magictoken.ex_globals_device_array_capture.end
 
         # Verify the second call sees updated values
         expected = np.array([20.0, 50.0, 10.0, 30.0, 60.0], dtype=np.float32)
-        np.testing.assert_allclose(d_totals.copy_to_host(), expected)
+        np.testing.assert_allclose(d_totals.get(), expected)
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py b/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
index dbb9dc079..1f6280bb6 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
@@ -5,7 +5,7 @@
 
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
 from numba.cuda.tests.support import captured_stdout
-
+import cupy as cp
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
 class TestReduction(CUDATestCase):
@@ -33,7 +33,7 @@ def test_ex_reduction(self):
 
         # ex_reduction.allocate.begin
         # generate data
-        a = cuda.to_device(np.arange(1024))
+        a = cp.asarray(np.arange(1024))
         nelem = len(a)
         # ex_reduction.allocate.end
 
@@ -73,7 +73,7 @@ def array_sum(data):
         print(sum(np.arange(1024)))  # 523776
         # ex_reduction.launch.end
 
-        np.testing.assert_equal(a[0], sum(np.arange(1024)))
+        np.testing.assert_equal(a.get()[0], sum(np.arange(1024)))
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/nrt/test_nrt.py b/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
index 459032d71..77f04afe2 100644
--- a/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
+++ b/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
@@ -23,6 +23,7 @@
     Archive,
     Object,
 )
+import cupy as cp
 
 TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
 
@@ -398,7 +399,7 @@ def kernel(out):
                 out = out.reshape(out.shape)
                 out[0] = 1
 
-            out = cuda.to_device(np.zeros(1, dtype=np.float64))
+            out = cp.zeros(1, dtype=np.float64)
             kernel[1, 1](out)
 
         with override_config("CUDA_ENABLE_NRT", False):

From 0e9fc8e7982d1dc03fceca1166b5801ba9ea05a2 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 5 Jan 2026 08:20:14 -0800
Subject: [PATCH 18/60] style

---
 numba_cuda/numba/cuda/cudadrv/devicearray.py             | 4 +++-
 .../numba/cuda/tests/cudapy/test_device_array_capture.py | 4 +---
 numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py     | 9 ++++-----
 .../numba/cuda/tests/cudapy/test_vectorize_decor.py      | 1 -
 .../numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py | 1 -
 .../numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py | 7 +++++--
 numba_cuda/numba/cuda/tests/doc_examples/test_globals.py | 5 ++---
 .../numba/cuda/tests/doc_examples/test_reduction.py      | 1 +
 8 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/numba_cuda/numba/cuda/cudadrv/devicearray.py b/numba_cuda/numba/cuda/cudadrv/devicearray.py
index a3e5f79f6..e88281077 100644
--- a/numba_cuda/numba/cuda/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/cudadrv/devicearray.py
@@ -987,7 +987,9 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
 
 def check_array_compatibility(ary1, ary2):
     with warnings.catch_warnings():
-        warnings.filterwarnings("ignore", category=DeprecatedDeviceArrayApiWarning)
+        warnings.filterwarnings(
+            "ignore", category=DeprecatedDeviceArrayApiWarning
+        )
         ary1sq = ary1.squeeze()
         ary2sq = ary2.squeeze()
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py
index bd92a07a1..6ecf8a26c 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py
@@ -197,9 +197,7 @@ def kernel(output):
 
                     output = cp.zeros(len(host_data), dtype=dtype)
                     kernel[1, len(host_data)](output)
-                    np.testing.assert_array_equal(
-                        output.get(), host_data
-                    )
+                    np.testing.assert_array_equal(output.get(), host_data)
 
     def test_direct_kernel_access(self):
         """Test direct kernel access (not via device function)."""
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
index 1013974d3..0df523dcc 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
@@ -263,10 +263,10 @@ def bar(x):
 
             self.assertEqual(bar.__name__, "bar")
 
+
 @skip_on_cudasim("ufunc API unsupported in the simulator")
 class TestCUDAVectorizeTransfers(DeprecatedDeviceArrayApiTest):
-
-    def setUp(self):    
+    def setUp(self):
         noise = np.random.randn(1, 3, 64, 64).astype(np.float32)
         self.noise = cuda._api._to_device(noise)
 
@@ -280,7 +280,6 @@ def raising_transfer(*args, **kwargs):
         setattr(driver, "cuMemcpyHtoD", raising_transfer)
         setattr(driver, "cuMemcpyDtoH", raising_transfer)
 
-
         super().setUp()
 
     def tearDown(self):
@@ -300,7 +299,6 @@ def test_no_transfer_for_device_data(self):
             # Initialize test data on the device prior to banning host <-> device
             # transfer
 
-
             # Ensure that the mock functions are working as expected
 
             with self.assertRaisesRegex(CudaAPIError, "Transfer not allowed"):
@@ -317,6 +315,7 @@ def func(noise):
                 return noise + 1.0
 
             func(self.noise)
-        
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
index dc95765b7..2d757d6fe 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
@@ -4,7 +4,6 @@
 import numpy as np
 import math
 
-from numba import cuda
 from numba.cuda import vectorize, int32, uint32, float32, float64
 from numba.cuda.testing import skip_on_cudasim, CUDATestCase
 from numba.cuda.tests.support import CheckWarningsMixin
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py
index 74b268202..6091ad454 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py
@@ -3,7 +3,6 @@
 
 import numpy as np
 from numba.cuda import vectorize
-from numba import cuda
 from numba.cuda import float64
 from numba.cuda.testing import skip_on_cudasim, CUDATestCase
 import unittest
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py b/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
index 810b17230..e253c84fa 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
@@ -10,7 +10,7 @@
 )
 from numba.cuda.tests.support import captured_stdout
 import numpy as np
-import cupy as cp 
+import cupy as cp
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
@@ -73,7 +73,10 @@ def f(res, xarr, yarr, zarr):
         # [-126.79644737231007, 416.28324559588634, -218912930.2987788]
         # ex_cpu_gpu_compat.launch.end
 
-        expect = [business_logic(x, y, z) for x, y, z in zip(X.get(), Y.get(), Z.get())]
+        expect = [
+            business_logic(x, y, z)
+            for x, y, z in zip(X.get(), Y.get(), Z.get())
+        ]
 
         np.testing.assert_equal(expect, results.get())
 
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py b/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
index a07ad1c5a..1624d676c 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
@@ -7,6 +7,7 @@
 from numba.cuda.tests.support import captured_stdout
 import cupy as cp
 
+
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
 class TestGlobals(CUDATestCase):
     """
@@ -42,9 +43,7 @@ def compute_totals(quantities, totals):
             if i < totals.size:
                 totals[i] = quantities[i] * PRICES[i] * (1 + TAX_RATE)
 
-        d_quantities = cp.asarray(
-            np.array([1, 2, 3, 4, 5], dtype=np.float64)
-        )
+        d_quantities = cp.asarray(np.array([1, 2, 3, 4, 5], dtype=np.float64))
         d_totals = cp.asarray(5, dtype=np.float64)
 
         # First kernel call - compiles and captures values
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py b/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
index 1f6280bb6..7ae7fc786 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
@@ -7,6 +7,7 @@
 from numba.cuda.tests.support import captured_stdout
 import cupy as cp
 
+
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
 class TestReduction(CUDATestCase):
     """

From 185da928a2ef2fbc4a0d7d36b97e272a20378d3e Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 5 Jan 2026 08:59:35 -0800
Subject: [PATCH 19/60] address some greptile reviews

---
 numba_cuda/numba/cuda/_api.py                       | 13 +++++++++----
 numba_cuda/numba/cuda/api.py                        |  1 -
 numba_cuda/numba/cuda/kernels/transpose.py          |  7 +++++--
 numba_cuda/numba/cuda/testing.py                    |  4 +++-
 .../numba/cuda/tests/cudadrv/test_profiler.py       |  2 +-
 .../numba/cuda/tests/cudapy/test_vectorize.py       |  2 --
 .../numba/cuda/tests/doc_examples/test_globals.py   |  2 +-
 .../numba/cuda/tests/doc_examples/test_reduction.py |  2 +-
 numba_cuda/numba/cuda/vectorizers.py                |  2 +-
 9 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/numba_cuda/numba/cuda/_api.py b/numba_cuda/numba/cuda/_api.py
index 85adc6d65..ad5613070 100644
--- a/numba_cuda/numba/cuda/_api.py
+++ b/numba_cuda/numba/cuda/_api.py
@@ -12,7 +12,10 @@
 from .cudadrv import devicearray, devices, driver
 from numba.cuda.core import config
 from numba.cuda.api_util import prepare_shape_strides_dtype
-from numba.cuda.cudadrv.devicearray import DeprecatedDeviceArrayApiWarning, DeviceNDArray
+from numba.cuda.cudadrv.devicearray import (
+    DeprecatedDeviceArrayApiWarning,
+    DeviceNDArray,
+)
 
 # NDarray device helper
 
@@ -20,6 +23,7 @@
 current_context = devices.get_context
 gpus = devices.gpus
 
+
 @require_context
 def external_stream(ptr):
     """Create a Numba stream object for a stream allocated outside Numba.
@@ -29,6 +33,7 @@ def external_stream(ptr):
     """
     return current_context().create_external_stream(ptr)
 
+
 def _from_cuda_array_interface(desc, owner=None, sync=True):
     """Create a _DeviceNDArray from a cuda-array-interface description.
     The ``owner`` is the owner of the underlying memory.
@@ -321,7 +326,7 @@ def _device_array_like(ary, stream=0):
     """
     strides = _contiguous_strides_like_array(ary)
     order = _order_like_array(ary)
-    return device_array(
+    return _device_array(
         shape=ary.shape,
         dtype=ary.dtype,
         strides=strides,
@@ -337,7 +342,7 @@ def _mapped_array_like(ary, stream=0, portable=False, wc=False):
     """
     strides = _contiguous_strides_like_array(ary)
     order = _order_like_array(ary)
-    return mapped_array(
+    return _mapped_array(
         shape=ary.shape,
         dtype=ary.dtype,
         strides=strides,
@@ -355,6 +360,6 @@ def _pinned_array_like(ary):
     """
     strides = _contiguous_strides_like_array(ary)
     order = _order_like_array(ary)
-    return pinned_array(
+    return _pinned_array(
         shape=ary.shape, dtype=ary.dtype, strides=strides, order=order
     )
diff --git a/numba_cuda/numba/cuda/api.py b/numba_cuda/numba/cuda/api.py
index aab24580d..b105e94f7 100644
--- a/numba_cuda/numba/cuda/api.py
+++ b/numba_cuda/numba/cuda/api.py
@@ -67,7 +67,6 @@ def is_cuda_array(obj):
         DeprecatedDeviceArrayApiWarning,
     )
     return _api._is_cuda_array(obj)
-    return hasattr(obj, "__cuda_array_interface__")
 
 
 def is_float16_supported():
diff --git a/numba_cuda/numba/cuda/kernels/transpose.py b/numba_cuda/numba/cuda/kernels/transpose.py
index 48a5a4175..e23820547 100644
--- a/numba_cuda/numba/cuda/kernels/transpose.py
+++ b/numba_cuda/numba/cuda/kernels/transpose.py
@@ -8,6 +8,7 @@
 from numba.cuda.cudadrv.devicearray import DeprecatedDeviceArrayApiWarning
 import warnings
 
+
 def transpose(a, b=None):
     """Compute the transpose of 'a' and store it into 'b', if given,
     and return it. If 'b' is not given, allocate a new array
@@ -21,11 +22,13 @@ def transpose(a, b=None):
         `b` to the device if necessary).
     """
     warnings.warn(
-        "The DeviceNDArray class and its transpose method are deprecated. "
-        "Please prefer cupy for device array operations.", DeprecatedDeviceArrayApiWarning
+        "The DeviceNDArray API for transposing device arrays is deprecated. "
+        "Please prefer cupy for device array operations.",
+        DeprecatedDeviceArrayApiWarning,
     )
     return _transpose(a, b=b)
 
+
 def _transpose(a, b=None):
     # prefer `a`'s stream if
     stream = getattr(a, "stream", 0)
diff --git a/numba_cuda/numba/cuda/testing.py b/numba_cuda/numba/cuda/testing.py
index 8c93a9375..729cab307 100644
--- a/numba_cuda/numba/cuda/testing.py
+++ b/numba_cuda/numba/cuda/testing.py
@@ -188,13 +188,15 @@ def assertFileCheckMatches(
 
 class DeprecatedDeviceArrayApiTest(CUDATestCase):
     def setUp(self):
+        self._warnings_filters = warnings.filters[:]
+
         warnings.filterwarnings(
             "ignore", category=DeprecatedDeviceArrayApiWarning
         )
         super().setUp()
 
     def tearDown(self):
-        warnings.resetwarnings()
+        warnings.filters = self._warnings_filters
         super().tearDown()
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py b/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py
index 62525af0a..26a5548cb 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py
@@ -16,7 +16,7 @@ def test_profiling(self):
             del a
 
         with cuda.profiling():
-            a = cp.zeros(10)
+            a = cp.zeros(100)
             del a
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
index 0df523dcc..fae710bb4 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
@@ -86,8 +86,6 @@ def test_1d_async(self):
             def vector_add(a, b):
                 return a + b
 
-            stream = cuda.stream()
-
             for ty in dtypes:
                 data = np.array(np.random.random(self.N), dtype=ty)
 
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py b/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
index 1624d676c..96e2a6bc9 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
@@ -44,7 +44,7 @@ def compute_totals(quantities, totals):
                 totals[i] = quantities[i] * PRICES[i] * (1 + TAX_RATE)
 
         d_quantities = cp.asarray(np.array([1, 2, 3, 4, 5], dtype=np.float64))
-        d_totals = cp.asarray(5, dtype=np.float64)
+        d_totals = cp.zeros(5, dtype=np.float64)
 
         # First kernel call - compiles and captures values
         compute_totals[1, 32](d_quantities, d_totals)
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py b/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
index 7ae7fc786..79f686ee9 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
@@ -70,7 +70,7 @@ def array_sum(data):
 
         # ex_reduction.launch.begin
         array_sum[1, nelem](a)
-        print(a[0])  # 523776
+        print(a.get()[0])  # 523776
         print(sum(np.arange(1024)))  # 523776
         # ex_reduction.launch.end
 
diff --git a/numba_cuda/numba/cuda/vectorizers.py b/numba_cuda/numba/cuda/vectorizers.py
index 9766ead9e..f247f7e8c 100644
--- a/numba_cuda/numba/cuda/vectorizers.py
+++ b/numba_cuda/numba/cuda/vectorizers.py
@@ -116,7 +116,7 @@ def as_device_array(self, obj):
         # Producer then importing it as a Consumer, which causes a
         # synchronization on the array's stream (if it has one) by default.
         # When we have a Numba device array, we can simply return it.
-        if cuda.cudadrv.devicearray.is_cuda_ndarray(obj):
+        if cuda._api._is_cuda_ndarray(obj):
             return obj
         return cuda.as_cuda_array(obj)
 

From 18bdae3e7d21cf9f8e1d9309ab01aa35aa116b01 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Mon, 5 Jan 2026 13:52:31 -0600
Subject: [PATCH 20/60] Apply suggestions from code review

Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
---
 numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py        | 4 ++--
 numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
index 099d9115a..c6e9dbb60 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
@@ -122,9 +122,9 @@ def test_gufunc_stream(self):
         )
 
         stream = cuda.stream()
-        with warnings.catch_warnings():
-            warnings.simplefilter('ignore', DeprecatedDeviceArrayApiWarning)
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
             dA = cuda.to_device(A, stream)
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
             dB = cuda.to_device(B, stream)
 
             dC = cuda.device_array(shape=(1001, 2, 5), dtype=A.dtype, stream=stream)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
index 1fb2d7427..ba1463530 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
@@ -123,8 +123,7 @@ def foo(a, b, out):
 
         # As this test specifically tests the behavior of passing a DeviceNDArray,
         # we'll catch the expected warning explicitly here. 
-        with warnings.catch_warnings():
-            warnings.simplefilter('ignore', DeprecatedDeviceArrayApiWarning)
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
             da = cuda.to_device(a)
         self.assertEqual(da.dtype, np.int64)
         with self.assertRaises(TypeError) as raises:

From d9ba682cc92266f37da6b5bbf9743906f227c4a8 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 5 Jan 2026 11:56:53 -0800
Subject: [PATCH 21/60] address reviews

---
 numba_cuda/numba/cuda/_api.py                        | 10 ----------
 numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py |  8 ++++----
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/numba_cuda/numba/cuda/_api.py b/numba_cuda/numba/cuda/_api.py
index ad5613070..24284f56e 100644
--- a/numba_cuda/numba/cuda/_api.py
+++ b/numba_cuda/numba/cuda/_api.py
@@ -8,12 +8,10 @@
 import contextlib
 
 import numpy as np
-import warnings
 from .cudadrv import devicearray, devices, driver
 from numba.cuda.core import config
 from numba.cuda.api_util import prepare_shape_strides_dtype
 from numba.cuda.cudadrv.devicearray import (
-    DeprecatedDeviceArrayApiWarning,
     DeviceNDArray,
 )
 
@@ -202,10 +200,6 @@ def _pinned_array(shape, dtype=np.float64, strides=None, order="C"):
     Allocate an :class:`ndarray <numpy.ndarray>` with a buffer that is pinned
     (pagelocked).  Similar to :func:`np.empty() <numpy.empty>`.
     """
-    warnings.warn(
-        "pinned_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
-        DeprecatedDeviceArrayApiWarning,
-    )
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
     )
@@ -238,10 +232,6 @@ def _mapped_array(
         to write by the host and to read by the device, but slower to
         write by the host and slower to write by the device.
     """
-    warnings.warn(
-        "mapped_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
-        DeprecatedDeviceArrayApiWarning,
-    )
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
     )
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
index fae710bb4..ad50ee202 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
@@ -275,18 +275,18 @@ def raising_transfer(*args, **kwargs):
         self.old_HtoD = getattr(driver, "cuMemcpyHtoD", None)
         self.old_DtoH = getattr(driver, "cuMemcpyDtoH", None)
 
-        setattr(driver, "cuMemcpyHtoD", raising_transfer)
-        setattr(driver, "cuMemcpyDtoH", raising_transfer)
+        driver.cuMemcpyHtoD = raising_transfer
+        driver.cuMemcpyDtoH = raising_transfer
 
         super().setUp()
 
     def tearDown(self):
         if self.old_HtoD is not None:
-            setattr(driver, "cuMemcpyHtoD", self.old_HtoD)
+            driver.cuMemcpyHtoD = self.old_HtoD
         else:
             del driver.cuMemcpyHtoD
         if self.old_DtoH is not None:
-            setattr(driver, "cuMemcpyDtoH", self.old_DtoH)
+            driver.cuMemcpyDtoH = self.old_DtoH
         else:
             del driver.cuMemcpyDtoH
 

From a2de074d5c33968e2624058ef39c7b2047183d52 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 5 Jan 2026 11:58:48 -0800
Subject: [PATCH 22/60] import

---
 numba_cuda/numba/cuda/tests/cudapy/test_random.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_random.py b/numba_cuda/numba/cuda/tests/cudapy/test_random.py
index b1882346c..4c3faeb99 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_random.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_random.py
@@ -19,7 +19,6 @@
     xoroshiro128p_uniform_float64,
     xoroshiro128p_normal_float64,
 )
-import cupy as cp
 
 # Distributions
 UNIFORM = 1

From de510c2a6a6bebf4006fcea47a31e1553ecf53ab Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 5 Jan 2026 12:48:22 -0800
Subject: [PATCH 23/60] address reviews, greptile

---
 numba_cuda/numba/cuda/cudadrv/devicearray.py     |  2 +-
 .../numba/cuda/tests/cudapy/test_gufunc.py       | 16 ++++++++++------
 .../cuda/tests/cudapy/test_gufunc_scalar.py      | 11 ++++++++---
 .../cuda/tests/doc_examples/test_laplace.py      |  2 +-
 numba_cuda/numba/cuda/vectorizers.py             |  2 +-
 5 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/numba_cuda/numba/cuda/cudadrv/devicearray.py b/numba_cuda/numba/cuda/cudadrv/devicearray.py
index e88281077..e162fa0dd 100644
--- a/numba_cuda/numba/cuda/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/cudadrv/devicearray.py
@@ -770,7 +770,7 @@ def _do_setitem(self, key, value, stream=0):
             shape = arr.shape
             strides = arr.strides
 
-        lhs = type(self)(
+        lhs = type(self)._create_nowarn(
             shape=shape,
             strides=strides,
             dtype=self.dtype,
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
index c6e9dbb60..235090893 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
@@ -8,13 +8,15 @@
 from numba.cuda import void, int32, float32, float64
 from numba.cuda import guvectorize
 from numba import cuda
-from numba.cuda.testing import skip_on_cudasim, CUDATestCase, DeprecatedDeviceArrayApiWarning
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    CUDATestCase,
+    DeprecatedDeviceArrayApiWarning,
+)
 import unittest
 from numba.cuda.core.errors import NumbaPerformanceWarning, TypingError
 from numba.cuda.tests.support import override_config
 import cupy as cp
-import warnings
-
 
 
 def _get_matmulcore_gufunc(dtype=float32):
@@ -127,9 +129,11 @@ def test_gufunc_stream(self):
         with pytest.warns(DeprecatedDeviceArrayApiWarning):
             dB = cuda.to_device(B, stream)
 
-            dC = cuda.device_array(shape=(1001, 2, 5), dtype=A.dtype, stream=stream)
-        dC = gufunc(dA, dB, out=dC, stream=stream)
-        C = dC.copy_to_host(stream=stream)
+            dC = cuda.device_array(
+                shape=(1001, 2, 5), dtype=A.dtype, stream=stream
+            )
+            dC = gufunc(dA, dB, out=dC, stream=stream)
+            C = dC.copy_to_host(stream=stream)
         stream.synchronize()
 
         Gold = np.matmul(A, B)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
index ba1463530..5bd39a002 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
@@ -9,10 +9,15 @@
 
 import numpy as np
 from numba import cuda, guvectorize
-from numba.cuda.testing import skip_on_cudasim, CUDATestCase, DeprecatedDeviceArrayApiWarning
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    CUDATestCase,
+    DeprecatedDeviceArrayApiWarning,
+)
 import unittest
 import cupy as cp
-import warnings
+import pytest
+
 
 @skip_on_cudasim("ufunc API unsupported in the simulator")
 class TestGUFuncScalar(CUDATestCase):
@@ -122,7 +127,7 @@ def foo(a, b, out):
         a = np.array(a)
 
         # As this test specifically tests the behavior of passing a DeviceNDArray,
-        # we'll catch the expected warning explicitly here. 
+        # we'll catch the expected warning explicitly here.
         with pytest.warns(DeprecatedDeviceArrayApiWarning):
             da = cuda.to_device(a)
         self.assertEqual(da.dtype, np.int64)
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py b/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
index 2c17cceea..73c963a6a 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
@@ -64,7 +64,7 @@ def test_ex_laplace(self):
             fig, ax = plt.subplots(figsize=(16 * 0.66, 9 * 0.66))
             plt.plot(
                 np.arange(len(buf_0)),
-                buf_0.copy_to_host(),
+                buf_0.get(),
                 lw=3,
                 marker="*",
                 color="black",
diff --git a/numba_cuda/numba/cuda/vectorizers.py b/numba_cuda/numba/cuda/vectorizers.py
index f247f7e8c..87ac27d70 100644
--- a/numba_cuda/numba/cuda/vectorizers.py
+++ b/numba_cuda/numba/cuda/vectorizers.py
@@ -118,7 +118,7 @@ def as_device_array(self, obj):
         # When we have a Numba device array, we can simply return it.
         if cuda._api._is_cuda_ndarray(obj):
             return obj
-        return cuda.as_cuda_array(obj)
+        return cuda._api._as_cuda_array(obj)
 
     def to_device(self, hostary):
         return _api._to_device(hostary, stream=self._stream)

From 5a08fd45b51436fbc2190e35f688625a05e02348 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 5 Jan 2026 12:53:07 -0800
Subject: [PATCH 24/60] lockfile

---
 pixi.lock | 60 -------------------------------------------------------
 1 file changed, 60 deletions(-)

diff --git a/pixi.lock b/pixi.lock
index 2d3375112..738c1fb62 100644
--- a/pixi.lock
+++ b/pixi.lock
@@ -18053,10 +18053,6 @@ packages:
   - python_abi 3.10.* *_cp310
   - numpy >=1.21,<3
   license: BSD-2-Clause
-  input:
-    hash: 47a4db81d699e6ce6c04456c3a0ddeb7a00b34d45a6f7d2458d6f9e98ab582f0
-    globs:
-    - pyproject.toml
 - conda: .
   name: numba-cuda
   version: 0.22.1
@@ -18077,10 +18073,6 @@ packages:
   - python_abi 3.10.* *_cp310
   - numpy >=1.21,<3
   license: BSD-2-Clause
-  input:
-    hash: 47a4db81d699e6ce6c04456c3a0ddeb7a00b34d45a6f7d2458d6f9e98ab582f0
-    globs:
-    - pyproject.toml
 - conda: .
   name: numba-cuda
   version: 0.22.1
@@ -18101,10 +18093,6 @@ packages:
   - python_abi 3.10.* *_cp310
   - numpy >=1.21,<3
   license: BSD-2-Clause
-  input:
-    hash: 47a4db81d699e6ce6c04456c3a0ddeb7a00b34d45a6f7d2458d6f9e98ab582f0
-    globs:
-    - pyproject.toml
 - conda: .
   name: numba-cuda
   version: 0.22.1
@@ -18125,10 +18113,6 @@ packages:
   - python_abi 3.11.* *_cp311
   - numpy >=1.23,<3
   license: BSD-2-Clause
-  input:
-    hash: 47a4db81d699e6ce6c04456c3a0ddeb7a00b34d45a6f7d2458d6f9e98ab582f0
-    globs:
-    - pyproject.toml
 - conda: .
   name: numba-cuda
   version: 0.22.1
@@ -18149,10 +18133,6 @@ packages:
   - python_abi 3.11.* *_cp311
   - numpy >=1.23,<3
   license: BSD-2-Clause
-  input:
-    hash: 47a4db81d699e6ce6c04456c3a0ddeb7a00b34d45a6f7d2458d6f9e98ab582f0
-    globs:
-    - pyproject.toml
 - conda: .
   name: numba-cuda
   version: 0.22.1
@@ -18173,10 +18153,6 @@ packages:
   - python_abi 3.11.* *_cp311
   - numpy >=1.23,<3
   license: BSD-2-Clause
-  input:
-    hash: 47a4db81d699e6ce6c04456c3a0ddeb7a00b34d45a6f7d2458d6f9e98ab582f0
-    globs:
-    - pyproject.toml
 - conda: .
   name: numba-cuda
   version: 0.22.1
@@ -18197,10 +18173,6 @@ packages:
   - python_abi 3.12.* *_cp312
   - numpy >=1.23,<3
   license: BSD-2-Clause
-  input:
-    hash: 47a4db81d699e6ce6c04456c3a0ddeb7a00b34d45a6f7d2458d6f9e98ab582f0
-    globs:
-    - pyproject.toml
 - conda: .
   name: numba-cuda
   version: 0.22.1
@@ -18221,10 +18193,6 @@ packages:
   - python_abi 3.12.* *_cp312
   - numpy >=1.23,<3
   license: BSD-2-Clause
-  input:
-    hash: 47a4db81d699e6ce6c04456c3a0ddeb7a00b34d45a6f7d2458d6f9e98ab582f0
-    globs:
-    - pyproject.toml
 - conda: .
   name: numba-cuda
   version: 0.22.1
@@ -18245,10 +18213,6 @@ packages:
   - python_abi 3.12.* *_cp312
   - numpy >=1.23,<3
   license: BSD-2-Clause
-  input:
-    hash: 47a4db81d699e6ce6c04456c3a0ddeb7a00b34d45a6f7d2458d6f9e98ab582f0
-    globs:
-    - pyproject.toml
 - conda: .
   name: numba-cuda
   version: 0.22.1
@@ -18269,10 +18233,6 @@ packages:
   - python_abi 3.13.* *_cp313
   - numpy >=1.23,<3
   license: BSD-2-Clause
-  input:
-    hash: 47a4db81d699e6ce6c04456c3a0ddeb7a00b34d45a6f7d2458d6f9e98ab582f0
-    globs:
-    - pyproject.toml
 - conda: .
   name: numba-cuda
   version: 0.22.1
@@ -18293,10 +18253,6 @@ packages:
   - python_abi 3.13.* *_cp313
   - numpy >=1.23,<3
   license: BSD-2-Clause
-  input:
-    hash: 47a4db81d699e6ce6c04456c3a0ddeb7a00b34d45a6f7d2458d6f9e98ab582f0
-    globs:
-    - pyproject.toml
 - conda: .
   name: numba-cuda
   version: 0.22.1
@@ -18317,10 +18273,6 @@ packages:
   - python_abi 3.13.* *_cp313
   - numpy >=1.23,<3
   license: BSD-2-Clause
-  input:
-    hash: 47a4db81d699e6ce6c04456c3a0ddeb7a00b34d45a6f7d2458d6f9e98ab582f0
-    globs:
-    - pyproject.toml
 - conda: .
   name: numba-cuda
   version: 0.22.1
@@ -18341,10 +18293,6 @@ packages:
   - python_abi 3.14.* *_cp314
   - numpy >=1.23,<3
   license: BSD-2-Clause
-  input:
-    hash: 47a4db81d699e6ce6c04456c3a0ddeb7a00b34d45a6f7d2458d6f9e98ab582f0
-    globs:
-    - pyproject.toml
 - conda: .
   name: numba-cuda
   version: 0.22.1
@@ -18365,10 +18313,6 @@ packages:
   - python_abi 3.14.* *_cp314
   - numpy >=1.23,<3
   license: BSD-2-Clause
-  input:
-    hash: 47a4db81d699e6ce6c04456c3a0ddeb7a00b34d45a6f7d2458d6f9e98ab582f0
-    globs:
-    - pyproject.toml
 - conda: .
   name: numba-cuda
   version: 0.22.1
@@ -18389,10 +18333,6 @@ packages:
   - python_abi 3.14.* *_cp314
   - numpy >=1.23,<3
   license: BSD-2-Clause
-  input:
-    hash: 47a4db81d699e6ce6c04456c3a0ddeb7a00b34d45a6f7d2458d6f9e98ab582f0
-    globs:
-    - pyproject.toml
 - pypi: https://files.pythonhosted.org/packages/5e/a6/9ca0eecc489640615642a6cbc0ca9e10df70df38c4d43f5a928ff18d8827/numpy-2.3.5-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl
   name: numpy
   version: 2.3.5

From e3355253e8f224039f7be833c31de1ad28b727e0 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 5 Jan 2026 12:59:52 -0800
Subject: [PATCH 25/60] run precommit

---
 numba_cuda/numba/cuda/random.py               |  1 -
 .../numba/cuda/tests/cudadrv/test_events.py   |  3 +-
 .../numba/cuda/tests/cudadrv/test_pinned.py   |  1 +
 .../tests/cudapy/test_array_reductions.py     | 60 ++++++++++---------
 .../tests/cudapy/test_cuda_array_interface.py |  6 +-
 .../numba/cuda/tests/cudapy/test_ipc.py       |  6 +-
 .../numba/cuda/tests/cudapy/test_matmul.py    |  1 -
 .../cuda/tests/cudapy/test_multithreads.py    |  1 -
 .../numba/cuda/tests/cudapy/test_transpose.py |  1 -
 .../cuda/tests/nocuda/test_dummyarray.py      |  2 +-
 10 files changed, 44 insertions(+), 38 deletions(-)

diff --git a/numba_cuda/numba/cuda/random.py b/numba_cuda/numba/cuda/random.py
index f0d6c595f..d51df6106 100644
--- a/numba_cuda/numba/cuda/random.py
+++ b/numba_cuda/numba/cuda/random.py
@@ -3,7 +3,6 @@
 
 import math
 
-from numba import cuda
 from numba.cuda import (
     float32,
     float64,
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
index 33a033cae..eb832819d 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
@@ -16,7 +16,7 @@ def test_event_elapsed(self):
         evtend = cuda.event()
 
         evtstart.record()
-        dary = cp.array(np.arange(N, dtype=np.double))
+        dary = cp.array(np.arange(N, dtype=np.double))  # noqa: F841
         evtend.record()
         evtend.wait()
         evtend.synchronize()
@@ -39,7 +39,6 @@ def event_elapsed_inner(self, stream):
         def kernel():
             pass
 
-        N = 32
         evtstart = cuda.event()
         evtend = cuda.event()
 
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py b/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py
index 795f1417b..94222066b 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py
@@ -7,6 +7,7 @@
 from numba import cuda
 from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 
+
 # TODO
 class TestPinned(DeprecatedDeviceArrayApiTest):
     def _run_copies(self, A):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
index 02fd31471..79053c6df 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
@@ -46,9 +46,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.all(a) for a in cases], dtype=np.bool_)
-        out = cp.zeros(len(cases), dtype=cp.bool_)        
+        out = cp.zeros(len(cases), dtype=cp.bool_)
         kernel[1, 1](out)
-        got = out.get()                                    
+        got = out.get()
         self.assertPreciseEqual(expected, got)
 
     def test_any_basic(self):
@@ -69,9 +69,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.any(a) for a in cases], dtype=np.bool_)
-        out = cp.zeros(len(cases), dtype=cp.bool_)         
+        out = cp.zeros(len(cases), dtype=cp.bool_)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.get())       
+        self.assertPreciseEqual(expected, out.get())
 
     def test_sum_basic(self):
         arrays = (
@@ -81,7 +81,9 @@ def test_sum_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64([np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]),
+            np.float64(
+                [np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]
+            ),
             np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
@@ -94,9 +96,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.sum(a) for a in arrays], dtype=np.float64)
-        out = cp.zeros(len(arrays), dtype=cp.float64)      
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.get())       
+        self.assertPreciseEqual(expected, out.get())
 
     def test_mean_basic(self):
         arrays = (
@@ -106,7 +108,9 @@ def test_mean_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64([np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]),
+            np.float64(
+                [np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]
+            ),
             np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
@@ -119,9 +123,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.mean(a) for a in arrays], dtype=np.float64)
-        out = cp.zeros(len(arrays), dtype=cp.float64)      
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.get())       
+        self.assertPreciseEqual(expected, out.get())
 
     def test_var_basic(self):
         arrays = (
@@ -142,9 +146,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.var(a) for a in arrays], dtype=np.float64)
-        out = cp.zeros(len(arrays), dtype=cp.float64)      
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.get(), prec="double")   
+        self.assertPreciseEqual(expected, out.get(), prec="double")
 
     def test_std_basic(self):
         arrays = (
@@ -165,9 +169,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.std(a) for a in arrays], dtype=np.float64)
-        out = cp.zeros(len(arrays), dtype=cp.float64)      
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.get())       
+        self.assertPreciseEqual(expected, out.get())
 
     def test_min_basic(self):
         arrays = (
@@ -188,9 +192,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.min(a) for a in arrays], dtype=np.float64)
-        out = cp.zeros(len(arrays), dtype=cp.float64)      
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.get())       
+        self.assertPreciseEqual(expected, out.get())
 
     def test_max_basic(self):
         arrays = (
@@ -211,9 +215,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.max(a) for a in arrays], dtype=np.float64)
-        out = cp.zeros(len(arrays), dtype=cp.float64)      
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.get())       
+        self.assertPreciseEqual(expected, out.get())
 
     def test_nanmin_basic(self):
         arrays = (
@@ -235,9 +239,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.nanmin(a) for a in arrays], dtype=np.float64)
-        out = cp.zeros(len(arrays), dtype=cp.float64)      
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.get())       
+        self.assertPreciseEqual(expected, out.get())
 
     def test_nanmax_basic(self):
         arrays = (
@@ -259,9 +263,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.nanmax(a) for a in arrays], dtype=np.float64)
-        out = cp.zeros(len(arrays), dtype=cp.float64)      
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.get())       
+        self.assertPreciseEqual(expected, out.get())
 
     def test_nanmean_basic(self):
         arrays = (
@@ -280,9 +284,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.nanmean(a) for a in arrays], dtype=np.float64)
-        out = cp.zeros(len(arrays), dtype=cp.float64)      
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.get())       
+        self.assertPreciseEqual(expected, out.get())
 
     def test_nansum_basic(self):
         arrays = (
@@ -304,9 +308,9 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.nansum(a) for a in arrays], dtype=np.float64)
-        out = cp.zeros(len(arrays), dtype=cp.float64)      
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.get())       
+        self.assertPreciseEqual(expected, out.get())
 
     def test_nanprod_basic(self):
         arrays = (
@@ -328,6 +332,6 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.nanprod(a) for a in arrays], dtype=np.float64)
-        out = cp.zeros(len(arrays), dtype=cp.float64)      
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.get())       
+        self.assertPreciseEqual(expected, out.get())
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
index 6c1eecfde..f0f1d9e22 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
@@ -5,7 +5,11 @@
 
 from numba.cuda import vectorize, guvectorize
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase, ForeignArray, DeprecatedDeviceArrayApiTest
+from numba.cuda.testing import (
+    unittest,
+    ForeignArray,
+    DeprecatedDeviceArrayApiTest,
+)
 from numba.cuda.testing import skip_on_cudasim, skip_if_external_memmgr
 from numba.cuda.tests.support import linux_only, override_config
 from unittest.mock import call, patch
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py b/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py
index 249ec3c6d..126a1d93a 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py
@@ -16,7 +16,6 @@
     skip_on_cudasim,
     skip_under_cuda_memcheck,
     skip_on_wsl2,
-    CUDATestCase,
     DeprecatedDeviceArrayApiTest,
     ForeignArray,
 )
@@ -67,16 +66,19 @@ def ipc_array_test(ipcarr, parent_pid):
 
 def _suppress_deprecated_warnings():
     import warnings
+
     # adjust the import path to the actual warning class
     from numba.cuda import DeprecatedDeviceArrayApiWarning
+
     warnings.filterwarnings("ignore", category=DeprecatedDeviceArrayApiWarning)
 
+
 class CUDAIpcTestCase(DeprecatedDeviceArrayApiTest):
     @classmethod
     def setUpClass(cls) -> None:
         cls.exe = concurrent.futures.ProcessPoolExecutor(
             mp_context=mp.get_context("spawn"),
-            initializer=_suppress_deprecated_warnings
+            initializer=_suppress_deprecated_warnings,
         )
 
     @classmethod
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
index a2a242273..5742bbad8 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
@@ -58,7 +58,6 @@ def cu_square_matrix_mul(A, B, C):
         dB = cp.array(np.random.random((n, n)), dtype=np.float32)
         dC = cp.empty_like(dA)
 
-
         cu_square_matrix_mul[(bpg, bpg), (tpb, tpb)](dA, dB, dC)
         C = dC.get()
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
index 13254c1f3..73573daaf 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
@@ -8,7 +8,6 @@
 from numba.cuda.testing import (
     skip_on_cudasim,
     skip_under_cuda_memcheck,
-    CUDATestCase,
     DeprecatedDeviceArrayApiTest,
 )
 import unittest
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py b/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py
index 1bc84a2a1..63130fe83 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py
@@ -6,7 +6,6 @@
 from numba.cuda.kernels.transpose import transpose
 from numba.cuda.testing import unittest
 from numba.cuda.testing import skip_on_cudasim, DeprecatedDeviceArrayApiTest
-import cupy as cp
 
 
 recordwith2darray = np.dtype([("i", np.int32), ("j", np.float32, (3, 2))])
diff --git a/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py b/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py
index bd6af0f93..05f926a36 100644
--- a/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py
+++ b/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py
@@ -6,7 +6,6 @@
 import numpy as np
 from numba.cuda.cudadrv.dummyarray import Array
 from numba.cuda.testing import skip_on_cudasim, DeprecatedDeviceArrayApiTest
-import cupy as cp
 
 
 @skip_on_cudasim("Tests internals of the CUDA driver device array")
@@ -421,6 +420,7 @@ def test_empty_array_flags(self):
                 self.assertTrue(arr.flags["C_CONTIGUOUS"])
                 self.assertTrue(arr.flags["F_CONTIGUOUS"])
 
+
 # Typing of DeviceNDarray is deprecated
 @skip_on_cudasim("Tests CUDA device array type inference")
 class TestEmptyArrayTypeInference(DeprecatedDeviceArrayApiTest):

From 66648654b5e8ee7316dd4d49eecbf4d497600eb7 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 5 Jan 2026 14:42:18 -0800
Subject: [PATCH 26/60] bifurcate cupy

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0815b40d8..d4523f28a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,13 +47,14 @@ test = [
     "filecheck",
     "ml_dtypes",
     "statistics",
-    "cupy"
 ]
 test-cu12 = [
     "nvidia-curand-cu12",
+    "cupy-cuda12x",
     { include-group = "test" }
 ]
 test-cu13 = [
+    "cupy-cuda13x",
     "nvidia-curand==10.4.*",
     { include-group = "test" }
 ]

From b27e0a2aee93d72b934425dfc612e4e4d41968e3 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Fri, 9 Jan 2026 05:31:25 -0800
Subject: [PATCH 27/60] fixi

---
 pixi.lock | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/pixi.lock b/pixi.lock
index 738c1fb62..890f7690a 100644
--- a/pixi.lock
+++ b/pixi.lock
@@ -18035,7 +18035,7 @@ packages:
   timestamp: 1765466860567
 - conda: .
   name: numba-cuda
-  version: 0.22.1
+  version: 0.23.0
   build: py310h04c9772_0
   subdir: linux-64
   variants:
@@ -18055,7 +18055,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.22.1
+  version: 0.23.0
   build: py310h3ca6f64_0
   subdir: linux-aarch64
   variants:
@@ -18075,7 +18075,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.22.1
+  version: 0.23.0
   build: py310h5d23e43_0
   subdir: win-64
   variants:
@@ -18095,7 +18095,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.22.1
+  version: 0.23.0
   build: py311h2894be0_0
   subdir: linux-aarch64
   variants:
@@ -18115,7 +18115,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.22.1
+  version: 0.23.0
   build: py311hb9e802a_0
   subdir: win-64
   variants:
@@ -18135,7 +18135,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.22.1
+  version: 0.23.0
   build: py311he8c1319_0
   subdir: linux-64
   variants:
@@ -18155,7 +18155,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.22.1
+  version: 0.23.0
   build: py312h3eebbd5_0
   subdir: linux-64
   variants:
@@ -18175,7 +18175,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.22.1
+  version: 0.23.0
   build: py312h8e85db0_0
   subdir: linux-aarch64
   variants:
@@ -18195,7 +18195,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.22.1
+  version: 0.23.0
   build: py312ha067a5a_0
   subdir: win-64
   variants:
@@ -18215,7 +18215,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.22.1
+  version: 0.23.0
   build: py313h66129c8_0
   subdir: linux-aarch64
   variants:
@@ -18235,7 +18235,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.22.1
+  version: 0.23.0
   build: py313he80dd91_0
   subdir: win-64
   variants:
@@ -18255,7 +18255,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.22.1
+  version: 0.23.0
   build: py313hf75ce08_0
   subdir: linux-64
   variants:
@@ -18275,7 +18275,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.22.1
+  version: 0.23.0
   build: py314h59f3c06_0
   subdir: linux-64
   variants:
@@ -18295,7 +18295,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.22.1
+  version: 0.23.0
   build: py314h625260f_0
   subdir: win-64
   variants:
@@ -18315,7 +18315,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.22.1
+  version: 0.23.0
   build: py314ha479ada_0
   subdir: linux-aarch64
   variants:

From 89720dd49e45de88fd1c80c86391dc1250faf5f5 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Fri, 9 Jan 2026 07:01:18 -0800
Subject: [PATCH 28/60] fix tests

---
 numba_cuda/numba/cuda/tests/cudapy/test_sm.py  | 15 ++++++++++++---
 numba_cuda/numba/cuda/tests/cudapy/test_ssa.py |  3 +--
 numba_cuda/numba/cuda/vectorizers.py           |  2 +-
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
index 6d2cd2f89..4d31f71ec 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
@@ -9,12 +9,18 @@
     from numba.core.errors import TypingError as NumbaTypingError
 from numba.cuda.core.errors import TypingError
 from numba.cuda import types
-from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    unittest,
+    CUDATestCase,
+    skip_on_cudasim,
+    DeprecatedDeviceArrayApiWarning,
+)
 
 import numpy as np
 from numba.cuda.np import numpy_support as nps
 import cupy as cp
 from .extensions_usecases import struct_model_type, MyStruct
+import pytest
 
 recordwith2darray = np.dtype([("i", np.int32), ("j", np.float32, (3, 2))])
 
@@ -130,9 +136,12 @@ def use_sm_chunk_copy(x, y):
                 for j in range(nthreads):
                     y[bd * bx + j] = sm[j]
 
-        d_result = cp.asarray(arr)
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
+            # waiting on cupy support for record dtypes
+            d_result = cuda.to_device(arr)
+
         use_sm_chunk_copy[nblocks, nthreads](arr, d_result)
-        host_result = d_result.get()
+        host_result = d_result.copy_to_host()
         np.testing.assert_array_equal(arr, host_result)
 
     def test_shared_recarray(self):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py b/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
index 1508104cf..0f1378c58 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
@@ -20,7 +20,6 @@
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
 import cupy as cp
 
-
 _DEBUG = False
 
 if _DEBUG:
@@ -39,7 +38,7 @@ class SSABaseTest(CUDATestCase):
     def check_func(self, func, result_array, *args):
         # For CUDA kernels, we need to create output arrays and call with [1,1] launch config
         # Create GPU array with same shape as expected result array
-        gpu_result_array = cp.zeros(len(result_array), dtype=result_array.dtype)
+        gpu_result_array = cp.asarray(np.zeros_like(result_array))
 
         # Call the CUDA kernel
         func[1, 1](gpu_result_array, *copy.deepcopy(args))
diff --git a/numba_cuda/numba/cuda/vectorizers.py b/numba_cuda/numba/cuda/vectorizers.py
index 87ac27d70..0466d1f7a 100644
--- a/numba_cuda/numba/cuda/vectorizers.py
+++ b/numba_cuda/numba/cuda/vectorizers.py
@@ -116,7 +116,7 @@ def as_device_array(self, obj):
         # Producer then importing it as a Consumer, which causes a
         # synchronization on the array's stream (if it has one) by default.
         # When we have a Numba device array, we can simply return it.
-        if cuda._api._is_cuda_ndarray(obj):
+        if cuda._api._is_cuda_array(obj):
             return obj
         return cuda._api._as_cuda_array(obj)
 

From c13e007384038d20590f046617194d7ab40a33c1 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Fri, 9 Jan 2026 07:08:00 -0800
Subject: [PATCH 29/60] fix ndarray check

---
 numba_cuda/numba/cuda/vectorizers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numba_cuda/numba/cuda/vectorizers.py b/numba_cuda/numba/cuda/vectorizers.py
index 0466d1f7a..cfabac42f 100644
--- a/numba_cuda/numba/cuda/vectorizers.py
+++ b/numba_cuda/numba/cuda/vectorizers.py
@@ -116,7 +116,7 @@ def as_device_array(self, obj):
         # Producer then importing it as a Consumer, which causes a
         # synchronization on the array's stream (if it has one) by default.
         # When we have a Numba device array, we can simply return it.
-        if cuda._api._is_cuda_array(obj):
+        if cuda.cudadrv.devicearray.is_cuda_ndarray(obj):
             return obj
         return cuda._api._as_cuda_array(obj)
 

From 2db2d21e2153ba9d4c06677aedfc489242c5d9ce Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 12 Jan 2026 07:51:04 -0800
Subject: [PATCH 30/60] fix simulator

---
 numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py b/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
index 60103ea43..dcf4e6cc9 100644
--- a/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
@@ -272,6 +272,13 @@ def split(self, section, stream=0):
         ]
 
 
+DeviceNDArray = FakeCUDAArray
+
+
+class DeprecatedDeviceArrayApiWarning(Warning):
+    pass
+
+
 def array_core(ary):
     """
     Extract the repeated core of a broadcast array.

From ec053d18ed85a6ae6fa70c129b792953314abf42 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 12 Jan 2026 08:00:05 -0800
Subject: [PATCH 31/60] update bfloat16 tests

---
 .../numba/cuda/tests/cudapy/test_bfloat16.py  | 45 ++++++++++---------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py b/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
index f309c9531..169bae9d3 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
@@ -16,6 +16,7 @@
     uint64,
 )
 from numba.cuda import config
+import cupy as cp
 
 if not config.ENABLE_CUDASIM:
     from numba.cuda.bf16 import (
@@ -159,7 +160,7 @@ def kernel(arr):
                     y = f(x)
                     arr[0] = float32(y)
 
-                arr = cuda.device_array((1,), dtype="float32")
+                arr = cp.zeros((1,), dtype="float32")
                 kernel[1, 1](arr)
 
                 if f in exp_functions:
@@ -187,7 +188,7 @@ def kernel(out):
             out[8] = float32(hsub_rn(a, b))
             out[9] = float32(hmul_rn(a, b))
 
-        out = cuda.device_array((10,), dtype="float32")
+        out = cp.zeros((10,), dtype="float32")
         kernel[1, 1](out)
 
         a = 1.25
@@ -220,7 +221,7 @@ def kernel(out):
             out[2] = float32(hmul_sat(a, b))  # 1.125 -> 1.0
             out[3] = float32(hfma_sat(a, b, a))  # 1.125 + 1.5 -> 1.0
 
-        out = cuda.device_array((4,), dtype="float32")
+        out = cp.zeros((4,), dtype="float32")
         kernel[1, 1](out)
 
         self.assertAlmostEqual(out[0], 1.0, delta=1e-3)
@@ -244,7 +245,7 @@ def kernel(out):
 
             out[0] = float32(hfma_relu(a, b, c))  # -3.0 -> relu -> 0.0
 
-        out = cuda.device_array((1,), dtype="float32")
+        out = cp.zeros((1,), dtype="float32")
         kernel[1, 1](out)
 
         self.assertAlmostEqual(out[0], 0.0, delta=1e-3)
@@ -274,7 +275,7 @@ def kernel(out, a, b):
         for cmpfn, op in zip(comparisons, ops):
             with self.subTest(cmpfn=cmpfn):
                 kernel = make_kernel(cmpfn)
-                out = cuda.device_array((1,), dtype="bool")
+                out = cp.zeros((1,), dtype="bool")
 
                 a = 3.0
                 b = 3.0
@@ -301,7 +302,7 @@ def kernel(out):
             out[0] = float32(hmax(a, b))
             out[1] = float32(hmin(a, b))
 
-        out = cuda.device_array((2,), dtype="float32")
+        out = cp.zeros((2,), dtype="float32")
         kernel[1, 1](out)
         self.assertAlmostEqual(out[0], 4.0, delta=1e-3)
         self.assertAlmostEqual(out[1], 3.0, delta=1e-3)
@@ -316,8 +317,8 @@ def kernel(out_bool, out_int):
             out_bool[0] = hisnan(nanv)
             out_int[0] = hisinf(infv)
 
-        out_bool = cuda.device_array((1,), dtype="bool")
-        out_int = cuda.device_array((1,), dtype="int32")
+        out_bool = cp.zeros((1,), dtype="bool")
+        out_int = cp.zeros((1,), dtype="int32")
         kernel[1, 1](out_bool, out_int)
         self.assertTrue(bool(out_bool[0]))
         self.assertNotEqual(int(out_int[0]), 0)
@@ -334,7 +335,7 @@ def kernel(out):
             out[2] = float32(hmax(a, b))
             out[3] = float32(hmin(a, b))
 
-        out = cuda.device_array((4,), dtype="float32")
+        out = cp.zeros((4,), dtype="float32")
         kernel[1, 1](out)
         # NaN-propagating variants should produce NaN
         self.assertTrue(math.isnan(out[0]))
@@ -352,8 +353,8 @@ def roundtrip_kernel(test_val, i2, u2):
             u2[0] = uint16_as_bfloat16(bfloat16_as_uint16(test_val))
 
         test_val = np.int16(0x3FC0)  # 1.5 in bfloat16
-        i2 = cuda.device_array((1,), dtype="int16")
-        u2 = cuda.device_array((1,), dtype="uint16")
+        i2 = cp.zeros((1,), dtype="int16")
+        u2 = cp.zeros((1,), dtype="uint16")
         roundtrip_kernel[1, 1](test_val, i2, u2)
 
         self.assertEqual(i2[0], test_val)
@@ -394,17 +395,17 @@ def kernel(test_val, i1, i2, i3, i4, u1, u2, u3, u4):
             u4[3] = bfloat16_to_uint64_ru(a)
 
         # rz
-        i1 = cuda.device_array((1,), dtype="int8")
+        i1 = cp.zeros((1,), dtype="int8")
         # rn, rz, rd, ru
-        i2 = cuda.device_array((4,), dtype="int16")
-        i3 = cuda.device_array((4,), dtype="int32")
-        i4 = cuda.device_array((4,), dtype="int64")
+        i2 = cp.zeros((4,), dtype="int16")
+        i3 = cp.zeros((4,), dtype="int32")
+        i4 = cp.zeros((4,), dtype="int64")
         # rz
-        u1 = cuda.device_array((1,), dtype="uint8")
+        u1 = cp.zeros((1,), dtype="uint8")
         # rn, rz, rd, ru
-        u2 = cuda.device_array((4,), dtype="uint16")
-        u3 = cuda.device_array((4,), dtype="uint32")
-        u4 = cuda.device_array((4,), dtype="uint64")
+        u2 = cp.zeros((4,), dtype="uint16")
+        u3 = cp.zeros((4,), dtype="uint32")
+        u4 = cp.zeros((4,), dtype="uint64")
 
         test_val = np.int16(0x3FC0)  # 1.5 in bfloat16
 
@@ -489,7 +490,7 @@ def kernel(out):
             out[22] = bfloat16_as_int16(u4rd)
             out[23] = bfloat16_as_int16(u4ru)
 
-        out = cuda.device_array((24,), dtype="int16")
+        out = cp.zeros((24,), dtype="int16")
         kernel[1, 1](out)
         res = out.copy_to_host()
 
@@ -523,7 +524,7 @@ def kernel(out):
             a = bfloat16(1.5)
             out[0] = bfloat16_to_float32(a)
 
-        out = cuda.device_array((1,), dtype="float32")
+        out = cp.zeros((1,), dtype="float32")
         kernel[1, 1](out)
 
         self.assertAlmostEqual(out[0], 1.5, delta=1e-7)  # conversion is exact
@@ -553,7 +554,7 @@ def kernel(out):
             out[4] = bfloat16_as_int16(f4_default)
             out[5] = bfloat16_as_int16(f8_default)
 
-        out = cuda.device_array((6,), dtype="int16")
+        out = cp.zeros((6,), dtype="int16")
         kernel[1, 1](out)
         raw = out.copy_to_host()
 

From 0ae990d2a918c63617d683c04d890346a5bf47a7 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 20 Jan 2026 11:53:33 -0800
Subject: [PATCH 32/60] test cu-CUDA_MAJOR in run-tests

---
 ci/tools/run-tests | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/tools/run-tests b/ci/tools/run-tests
index 38a980546..aecde5d36 100755
--- a/ci/tools/run-tests
+++ b/ci/tools/run-tests
@@ -10,7 +10,7 @@ set -euo pipefail
 
 echo "Installing numba-cuda wheel with test dependencies"
 if [[ "${LOCAL_CTK}" == 1 ]]; then
-  pip install "${NUMBA_CUDA_ARTIFACTS_DIR}"/*.whl "cuda-bindings==${TEST_CUDA_MAJOR}.*" --group test
+  pip install "${NUMBA_CUDA_ARTIFACTS_DIR}"/*.whl "cuda-bindings==${TEST_CUDA_MAJOR}.*" --group test-cu${TEST_CUDA_MAJOR}
 else
   pip install $(ls "${NUMBA_CUDA_ARTIFACTS_DIR}"/*.whl)["cu${TEST_CUDA_MAJOR}"] "cuda-toolkit==${TEST_CUDA_MAJOR}.${TEST_CUDA_MINOR}.*" --group "test-cu${TEST_CUDA_MAJOR}"
 fi

From b4c03ad58c2e5dd177d9bbe2daad432f1ff8a671 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 20 Jan 2026 12:15:17 -0800
Subject: [PATCH 33/60] small fixes

---
 ci/tools/run-tests                                    |  2 +-
 .../numba/cuda/tests/benchmarks/test_kernel_launch.py | 11 ++++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/ci/tools/run-tests b/ci/tools/run-tests
index aecde5d36..fe7188a87 100755
--- a/ci/tools/run-tests
+++ b/ci/tools/run-tests
@@ -10,7 +10,7 @@ set -euo pipefail
 
 echo "Installing numba-cuda wheel with test dependencies"
 if [[ "${LOCAL_CTK}" == 1 ]]; then
-  pip install "${NUMBA_CUDA_ARTIFACTS_DIR}"/*.whl "cuda-bindings==${TEST_CUDA_MAJOR}.*" --group test-cu${TEST_CUDA_MAJOR}
+  pip install "${NUMBA_CUDA_ARTIFACTS_DIR}"/*.whl "cuda-bindings==${TEST_CUDA_MAJOR}.*" "cupy-cuda${TEST_CUDA_MAJOR}x" --group test
 else
   pip install $(ls "${NUMBA_CUDA_ARTIFACTS_DIR}"/*.whl)["cu${TEST_CUDA_MAJOR}"] "cuda-toolkit==${TEST_CUDA_MAJOR}.${TEST_CUDA_MINOR}.*" --group "test-cu${TEST_CUDA_MAJOR}"
 fi
diff --git a/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py b/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
index 2c4badbbc..b2738a03a 100644
--- a/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
+++ b/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
@@ -7,6 +7,7 @@
 import numpy as np
 import pytest
 from pytest import param
+from numba.cuda.testing import DeprecatedDeviceArrayApiWarning
 
 
 pytestmark = pytest.mark.skipif(
@@ -14,12 +15,15 @@
     reason="no reason to run benchmarks in the simulator",
 )
 
+with pytest.warns(DeprecatedDeviceArrayApiWarning):
+    devary_arg = cuda.device_array(128, dtype=np.float32)
+
 
 @pytest.mark.parametrize(
     "array_func",
     [
         param(
-            lambda: cuda.device_array(128, dtype=np.float32),
+            devary_arg,
             id="device_array",
         ),
         param(
@@ -57,10 +61,7 @@ def bench(func, arr):
     "array_func",
     [
         param(
-            lambda: [
-                cuda.device_array(128, dtype=np.float32)
-                for _ in range(len(string.ascii_lowercase))
-            ],
+            lambda: [devary_arg for _ in range(len(string.ascii_lowercase))],
             id="device_array",
         ),
         param(

From 733f8b76a6b766b5ee615c59d0db7648bf5a995d Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 20 Jan 2026 13:00:51 -0800
Subject: [PATCH 34/60] more fixes

---
 ci/coverage_report.sh                              |  2 ++
 .../numba/cuda/simulator/cudadrv/devicearray.py    |  4 ++++
 .../cuda/tests/benchmarks/test_kernel_launch.py    |  2 +-
 numba_cuda/numba/cuda/tests/cudapy/test_warning.py | 14 ++++++++++----
 4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/ci/coverage_report.sh b/ci/coverage_report.sh
index dce352e36..cad6ce976 100755
--- a/ci/coverage_report.sh
+++ b/ci/coverage_report.sh
@@ -5,6 +5,7 @@
 set -euo pipefail
 
 CUDA_VER_MAJOR_MINOR=${CUDA_VER%.*}
+CUDA_VER_MAJOR=${CUDA_VER%.*.*}
 
 rapids-logger "Install wheel with test dependencies and coverage tools"
 package=$(realpath "${NUMBA_CUDA_ARTIFACTS_DIR}"/*.whl)
@@ -13,6 +14,7 @@ python -m pip install \
     "${package}" \
     "cuda-python==${CUDA_VER_MAJOR_MINOR%.*}.*" \
     "cuda-core" \
+    "cupy-cuda${CUDA_VER_MAJOR}x" \
     pytest-cov \
     coverage \
     --group test
diff --git a/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py b/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
index dcf4e6cc9..ecf505f52 100644
--- a/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
@@ -111,6 +111,10 @@ def __init__(self, ary, stream=0):
         self._ary = ary
         self.stream = stream
 
+    @classmethod
+    def _create_nowarn(ary, stream):
+        return FakeCUDAArray(ary, stream)
+
     @property
     def _numba_type_(self):
         """
diff --git a/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py b/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
index b2738a03a..3950a3d08 100644
--- a/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
+++ b/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
@@ -23,7 +23,7 @@
     "array_func",
     [
         param(
-            devary_arg,
+            lambda: devary_arg,
             id="device_array",
         ),
         param(
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_warning.py b/numba_cuda/numba/cuda/tests/cudapy/test_warning.py
index 8d96ae965..7680981cd 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_warning.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_warning.py
@@ -21,6 +21,7 @@
 )
 from numba.cuda.core import config
 import warnings
+from numba.cuda.testing import DeprecatedDeviceArrayApiWarning
 
 
 @skip_on_cudasim("cudasim does not raise performance warnings")
@@ -89,7 +90,8 @@ def foo(r, x):
             r[0] = x + 1
 
         N = 10
-        ary = cuda.pinned_array(N, dtype=np.float32)
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
+            ary = cuda.pinned_array(N, dtype=np.float32)
 
         func = foo[1, N]
         with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
@@ -105,7 +107,8 @@ def foo(r, x):
             r[0] = x + 1
 
         N = 10
-        ary = cuda.mapped_array(N, dtype=np.float32)
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
+            ary = cuda.mapped_array(N, dtype=np.float32)
 
         with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
             with warnings.catch_warnings(record=True) as w:
@@ -120,7 +123,8 @@ def foo(r, x):
             r[0] = x + 1
 
         N = 10
-        ary = cuda.managed_array(N, dtype=np.float32)
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
+            ary = cuda.managed_array(N, dtype=np.float32)
 
         with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
             with warnings.catch_warnings(record=True) as w:
@@ -134,7 +138,9 @@ def foo(r, x):
             r[0] = x + 1
 
         N = 10
-        ary = cuda.device_array(N, dtype=np.float32)
+
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
+            ary = cuda.device_array(N, dtype=np.float32)
 
         with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
             with warnings.catch_warnings(record=True) as w:

From b70bbd97763d066e5ca747f2680ab8606370d133 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Fri, 23 Jan 2026 10:15:53 -0800
Subject: [PATCH 35/60] remove tests

---
 numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py
index 5774784fd..be5aa6796 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py
@@ -263,15 +263,6 @@ def test_cuda_driver_occupancy(self):
         )
         self.assertTrue(value > 0)
 
-        def b2d(bs):
-            return bs
-
-        grid, block = self.context.get_max_potential_block_size(
-            function, b2d, 128, 128
-        )
-        self.assertTrue(grid > 0)
-        self.assertTrue(block > 0)
-
     def test_cuda_cache_config(self):
         from numba import types
         import numpy as np

From 0278757ff232e9b9fc982ecf38212d4fa670ada8 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 27 Jan 2026 11:53:33 -0800
Subject: [PATCH 36/60] add cupy to test environments

---
 pixi.lock | 2223 +++++++++++++++++++++++++++++++++++++++++++++++++----
 pixi.toml |    1 +
 2 files changed, 2072 insertions(+), 152 deletions(-)

diff --git a/pixi.lock b/pixi.lock
index 490543069..9f7caa738 100644
--- a/pixi.lock
+++ b/pixi.lock
@@ -13,23 +13,44 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2
       - conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_8.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.1.4-hbd8a1cb_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cffi-2.0.0-py314h6fefde3_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cffi-2.0.0-py314h4a8dc5f_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cfgv-3.5.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_linux-64-13.1.115-ha770c72_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_linux-64-13.1.80-h376f20c_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_linux-64-13.1.80-h376f20c_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_linux-64-13.1.80-h376f20c_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvrtc-13.1.115-hecca717_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py314h972ecce_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py314h3ed1f13_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py314h8c728da_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/icu-78.2-h33c6efd_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-1.2.2-py_0.tar.bz2
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_105.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-13.2.1.1-h676940d_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-12.1.0.78-hecca717_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurand-10.4.1.81-h676940d_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-12.0.9.81-h676940d_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.7.3.1-hecca717_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran-15.2.0-h69a702a_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-15.2.0-h68bc16d_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_16.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.11.0-5_h47877c9_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.2-hb03c661_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb9d3cd8_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libnvjitlink-13.1.115-hecca717_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.30-pthreads_h94d23a6_4.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.51.2-hf4e2dac_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.3-h5347b49_0.conda
@@ -37,20 +58,21 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/make-4.4.1-hb9d3cd8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/nodeenv-1.10.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.4.1-py314h2b28147_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.0-h26f9b46_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.5.1-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pluggy-1.6.0-pyhf9edf01_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pre-commit-4.5.1-pyha770c72_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/psutil-7.2.1-py314h3f2afee_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/psutil-7.2.1-py314h0f05182_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/py-cpuinfo-9.0.0-pyhd8ed1ab_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-8.4.2-pyhcf101f3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-benchmark-5.2.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.8.0-pyhd8ed1ab_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.2-he1279bd_0_cp314t.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314t.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.2-h32b2ec7_101_cp314.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pyyaml-6.0.3-pyh7db6752_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.3-h853b02a_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-80.9.0-pyhff2d567_0.conda
@@ -62,29 +84,50 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h280c20c_3.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
-      - pypi: https://files.pythonhosted.org/packages/79/2b/a826ba18d2179a56e144aef69e57fb2ab7c464ef0b2111940ee8a3a223a2/ml_dtypes-0.5.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
-      - pypi: https://files.pythonhosted.org/packages/10/a7/cfbe475c35371cae1358e61f20c5f075badc18c4797ab4354140e1d283cf/numpy-2.4.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
+      - pypi: https://files.pythonhosted.org/packages/c6/bb/82c7dcf38070b46172a517e2334e665c5bf374a262f99a283ea454bece7c/ml_dtypes-0.5.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
       linux-aarch64:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-2_gnu.tar.bz2
+      - conda: https://conda.anaconda.org/conda-forge/noarch/arm-variant-1.2.0-sbsa.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-h4777abc_8.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.1.4-hbd8a1cb_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cffi-2.0.0-py314h0bd77cf_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cfgv-3.5.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_linux-aarch64-13.1.115-h579c4fd_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_linux-aarch64-13.1.80-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_linux-aarch64-13.1.80-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_linux-aarch64-13.1.80-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cuda-nvrtc-13.1.115-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py314h3ec1dcb_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py314heaf0aa5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py314h3642cf7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/icu-78.2-hb1525cb_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.45-default_h1979696_105.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.11.0-5_haddc8a3_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.11.0-5_hd72aa62_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-13.2.1.1-he38c790_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-12.1.0.78-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.4.1.81-he38c790_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-12.0.9.81-he38c790_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.7.3.1-h8f3c8d4_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.3-hfae3067_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-hd65408f_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran-15.2.0-he9431aa_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran5-15.2.0-h1b7bec0_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgomp-15.2.0-h8acb6b2_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblapack-3.11.0-5_h88aeb00_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblzma-5.8.1-h86ecc28_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libmpdec-4.0.0-h86ecc28_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libnvjitlink-13.1.115-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libopenblas-0.3.30-pthreads_h9d3fd7e_4.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.51.2-h10b116e_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-15.2.0-hef695bb_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.41.3-h1022ec0_0.conda
@@ -92,6 +135,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/make-4.4.1-h2a6d0cb_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.5-ha32ae93_3.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/nodeenv-1.10.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-2.4.1-py314haac167e_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.6.0-h8e36d6e_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.5.1-pyhcf101f3_0.conda
@@ -119,7 +163,6 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.7-h85ac4a6_6.conda
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/04/f9/067b84365c7e83bda15bba2b06c6ca250ce27b20630b1128c435fb7a09aa/ml_dtypes-0.5.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl
-      - pypi: https://files.pythonhosted.org/packages/c0/c4/2e7908915c0e32ca636b92e4e4a3bdec4cb1e7eb0f8aedf1ed3c68a0d8cd/numpy-2.4.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl
       win-64:
       - conda: https://conda.anaconda.org/conda-forge/win-64/_openmp_mutex-4.5-2_gnu.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/bzip2-1.0.8-h0ad9c76_8.conda
@@ -127,23 +170,49 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/cffi-2.0.0-py314h5a2d7ad_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cfgv-3.5.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_win-64-13.1.115-h57928b3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_win-64-13.1.80-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_win-64-13.1.80-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_win-64-13.1.80-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvrtc-13.1.115-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py314h59d4d8c_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py314hc101868_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py314h8b4fd5f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libblas-3.11.0-5_hf2e6a31_mkl.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcblas-3.11.0-5_h2a3cdd5_mkl.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-13.2.1.1-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-12.1.0.78-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.4.1.81-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-12.0.9.81-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.7.3.1-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libexpat-2.7.3-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libffi-3.5.2-h52bdfb6_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libgcc-15.2.0-h8ee18e1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libgomp-15.2.0-h8ee18e1_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libhwloc-2.12.2-default_h4379cf1_1000.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libiconv-1.18-hc1393d2_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/liblapack-3.11.0-5_hf9ab0e9_mkl.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/liblzma-5.8.1-h2466b09_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libmpdec-4.0.0-h2466b09_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libnvjitlink-13.1.115-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libsqlite-3.51.2-hf5d6505_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libwinpthread-12.0.0.r4.gg4f2fc60ca-h57928b3_10.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libxml2-16-2.15.1-h3cfd58e_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libxml2-2.15.1-h779ef1b_1.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libzlib-1.3.1-h2466b09_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/llvm-openmp-21.1.8-h4fa8253_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/make-4.4.1-h0e40799_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/mkl-2025.3.0-hac47afa_455.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/nodeenv-1.10.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/numpy-2.4.1-py314h06c3c77_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/openssl-3.6.0-h725018a_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.5.1-pyhcf101f3_0.conda
@@ -160,6 +229,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pyyaml-6.0.3-pyh7db6752_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-80.9.0-pyhff2d567_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/tbb-2022.3.0-h3155e25_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/tk-8.6.13-h2c6b04d_3.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/tomli-2.4.0-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.15.0-pyhcf101f3_0.conda
@@ -174,7 +244,6 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/e9/93/2bfed22d2498c468f6bcd0d9f56b033eaa19f33320389314c19ef6766413/ml_dtypes-0.5.4-cp314-cp314-win_amd64.whl
-      - pypi: https://files.pythonhosted.org/packages/7e/bb/c6513edcce5a831810e2dddc0d3452ce84d208af92405a0c2e58fd8e7881/numpy-2.4.1-cp314-cp314-win_amd64.whl
   cu-12-0-py310:
     channels:
     - url: https://conda.anaconda.org/conda-forge/
@@ -215,9 +284,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py310h8c3aed4_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py310hbc0d89f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py310h25320af_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-12.4.0-h26ba24d_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-12.4.0-h6b7512a_10.conda
@@ -230,8 +302,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.0.1.189-hd3aeb46_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.0.0.21-hd3aeb46_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufile-1.5.0.59-hd3aeb46_1.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurand-10.3.1.50-hd3aeb46_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.4.2.57-hd3aeb46_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.0.0.76-hd3aeb46_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_16.conda
@@ -321,9 +397,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py310h556c47b_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py310h967c7ba_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py310heccc163_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-12.4.0-h628656a_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-12.4.0-heb3b579_10.conda
@@ -336,7 +415,11 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.45-default_h1979696_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.11.0-5_haddc8a3_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.11.0-5_hd72aa62_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.0.1.189-hac28a21_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.0.0.21-hac28a21_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.3.1.50-hac28a21_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.4.2.57-hac28a21_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.0.0.76-hac28a21_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.3-hfae3067_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-hd65408f_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_16.conda
@@ -422,16 +505,23 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py310h9349102_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py310h867cfc4_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py310h699e580_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libblas-3.11.0-5_hf2e6a31_mkl.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcblas-3.11.0-5_h2a3cdd5_mkl.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.0.1.189-h63175ca_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.0.0.21-h63175ca_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.3.1.50-h63175ca_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.4.2.57-h63175ca_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.0.0.76-h63175ca_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libexpat-2.7.3-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libffi-3.5.2-h52bdfb6_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libgcc-15.2.0-h8ee18e1_16.conda
@@ -528,9 +618,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py311h72da3fd_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py311he30c881_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py311hc665b79_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-12.4.0-h26ba24d_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-12.4.0-h6b7512a_10.conda
@@ -543,8 +636,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.0.1.189-hd3aeb46_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.0.0.21-hd3aeb46_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufile-1.5.0.59-hd3aeb46_1.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurand-10.3.1.50-hd3aeb46_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.4.2.57-hd3aeb46_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.0.0.76-hd3aeb46_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_16.conda
@@ -634,9 +731,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py311h1f68eda_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py311h6a7bbfe_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py311h8e4e6a5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-12.4.0-h628656a_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-12.4.0-heb3b579_10.conda
@@ -649,7 +749,11 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.45-default_h1979696_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.11.0-5_haddc8a3_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.11.0-5_hd72aa62_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.0.1.189-hac28a21_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.0.0.21-hac28a21_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.3.1.50-hac28a21_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.4.2.57-hac28a21_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.0.0.76-hac28a21_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.3-hfae3067_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-hd65408f_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_16.conda
@@ -735,16 +839,23 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py311h3856ebc_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py311h3f47771_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py311h5dfdfe8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libblas-3.11.0-5_hf2e6a31_mkl.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcblas-3.11.0-5_h2a3cdd5_mkl.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.0.1.189-h63175ca_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.0.0.21-h63175ca_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.3.1.50-h63175ca_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.4.2.57-h63175ca_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.0.0.76-h63175ca_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libexpat-2.7.3-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libffi-3.5.2-h52bdfb6_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libgcc-15.2.0-h8ee18e1_16.conda
@@ -848,9 +959,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.2-he2b69de_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py311h72da3fd_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py311he30c881_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py311hc665b79_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-12.4.0-h26ba24d_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-12.4.0-h6b7512a_10.conda
@@ -863,8 +977,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.2.5.6-hd3aeb46_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.0.8.103-hd3aeb46_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufile-1.7.2.10-hd3aeb46_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurand-10.3.3.141-hd3aeb46_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.5.2.141-hd3aeb46_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.1.2.141-hd3aeb46_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_16.conda
@@ -961,9 +1079,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.2-he2b69de_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py311h1f68eda_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py311h6a7bbfe_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py311h8e4e6a5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-12.4.0-h628656a_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-12.4.0-heb3b579_10.conda
@@ -976,8 +1097,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.45-default_h1979696_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.11.0-5_haddc8a3_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.11.0-5_hd72aa62_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.2.5.6-hac28a21_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.0.8.103-hac28a21_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufile-1.7.2.10-hac28a21_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.3.3.141-hac28a21_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.5.2.141-hac28a21_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.1.2.141-hac28a21_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.3-hfae3067_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-hd65408f_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_16.conda
@@ -1070,16 +1195,23 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.2-he2b69de_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py311h3856ebc_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py311h3f47771_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py311h5dfdfe8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libblas-3.11.0-5_hf2e6a31_mkl.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcblas-3.11.0-5_h2a3cdd5_mkl.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.2.5.6-h63175ca_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.0.8.103-h63175ca_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.3.3.141-h63175ca_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.5.2.141-h63175ca_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.1.2.141-h63175ca_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libexpat-2.7.3-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libffi-3.5.2-h52bdfb6_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libgcc-15.2.0-h8ee18e1_16.conda
@@ -1187,9 +1319,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py310h8c3aed4_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py310hbc0d89f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py310h25320af_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-14.3.0-he8b2097_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-14.3.0-h298d278_17.conda
@@ -1316,9 +1451,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py310h556c47b_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py310h967c7ba_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py310heccc163_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-14.3.0-hda29b82_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-14.3.0-h118592a_17.conda
@@ -1439,9 +1577,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py310h9349102_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py310h867cfc4_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py310h699e580_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -1565,9 +1706,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py311h72da3fd_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py311he30c881_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py311hc665b79_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-14.3.0-he8b2097_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-14.3.0-h298d278_17.conda
@@ -1694,9 +1838,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py311h1f68eda_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py311h6a7bbfe_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py311h8e4e6a5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-14.3.0-hda29b82_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-14.3.0-h118592a_17.conda
@@ -1817,9 +1964,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py311h3856ebc_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py311h3f47771_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py311h5dfdfe8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -1943,9 +2093,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py312h0317cef_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py312h16a6543_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py312h8285ef7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-14.3.0-he8b2097_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-14.3.0-h298d278_17.conda
@@ -2072,9 +2225,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py312h500e0d2_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py312hdcd7d0a_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py312hf55c4e8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-14.3.0-hda29b82_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-14.3.0-h118592a_17.conda
@@ -2195,9 +2351,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py312hf676df9_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py312hc3434b0_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py312ha1a9051_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -2321,9 +2480,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py313h586c94b_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py313h28b6081_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py313h5d5ffb9_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-14.3.0-he8b2097_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-14.3.0-h298d278_17.conda
@@ -2448,9 +2610,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py313h7988abe_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py313h6b3a76b_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py313h59403f9_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-14.3.0-hda29b82_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-14.3.0-h118592a_17.conda
@@ -2570,9 +2735,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py313h5dfe2c3_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py313ha16128a_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py313h927ade5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -3188,9 +3356,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py312h045ee1a_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py312h1a70bb2_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py312h8285ef7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-15.2.0-hc5723f1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-15.2.0-h862fb80_17.conda
@@ -3319,9 +3490,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py312h63ce5a7_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py312hc495b10_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py312hf55c4e8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-15.2.0-habb1d5c_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-15.2.0-h0139441_17.conda
@@ -3444,9 +3618,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py312h050d4bf_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py312h7babc83_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py312ha1a9051_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -3572,9 +3749,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py313h727d180_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py313h0630d88_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py313h5d5ffb9_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-15.2.0-hc5723f1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-15.2.0-h862fb80_17.conda
@@ -3701,9 +3881,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py313h1bad292_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py313h407dc6c_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py313h59403f9_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-15.2.0-habb1d5c_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-15.2.0-h0139441_17.conda
@@ -3825,9 +4008,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py313h670e13b_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py313haef2af9_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py313h927ade5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -3954,9 +4140,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py314h972ecce_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py314h3ed1f13_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py314h8c728da_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-15.2.0-hc5723f1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-15.2.0-h862fb80_17.conda
@@ -4083,9 +4272,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py314h3ec1dcb_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py314heaf0aa5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py314h3642cf7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-15.2.0-habb1d5c_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-15.2.0-h0139441_17.conda
@@ -4207,9 +4399,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py314h59d4d8c_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py314hc101868_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py314h8b4fd5f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -4336,9 +4531,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py314h972ecce_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py314h3ed1f13_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py314h8c728da_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-15.2.0-hc5723f1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-15.2.0-h862fb80_17.conda
@@ -4465,9 +4663,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py314h3ec1dcb_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py314heaf0aa5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py314h3642cf7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-15.2.0-habb1d5c_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-15.2.0-h0139441_17.conda
@@ -4589,9 +4790,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py314h59d4d8c_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py314hc101868_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py314h8b4fd5f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -4718,9 +4922,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py314h972ecce_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py314h3ed1f13_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py314h8c728da_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-15.2.0-hc5723f1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-15.2.0-h862fb80_17.conda
@@ -4847,9 +5054,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py314h3ec1dcb_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py314heaf0aa5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py314h3642cf7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-15.2.0-habb1d5c_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-15.2.0-h0139441_17.conda
@@ -4971,9 +5181,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py314h59d4d8c_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py314hc101868_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py314h8b4fd5f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.16-pyhd8ed1ab_0.conda
@@ -5947,22 +6160,6 @@ packages:
   - pkg:pypi/cffi?source=hash-mapping
   size: 300271
   timestamp: 1761203085220
-- conda: https://conda.anaconda.org/conda-forge/linux-64/cffi-2.0.0-py314h6fefde3_1.conda
-  sha256: b5214aa3e0853240f41c79b6cbdb3b3c6b6bfc384713ffad1b41b1442d44737a
-  md5: 1598bfc06ced45b100bca5117c9b3a4b
-  depends:
-  - __glibc >=2.17,<3.0.a0
-  - libffi >=3.5.2,<3.6.0a0
-  - libgcc >=14
-  - pycparser
-  - python >=3.14,<3.15.0a0
-  - python_abi 3.14.* *_cp314t
-  license: MIT
-  license_family: MIT
-  purls:
-  - pkg:pypi/cffi?source=hash-mapping
-  size: 304412
-  timestamp: 1761202966547
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cffi-2.0.0-py310h0826a50_1.conda
   sha256: 63458040026be843a189e319190a0622486017c92ef251d4dff7ec847f9a8418
   md5: 152a5ba791642d8a81fe02d134ab3839
@@ -11588,6 +11785,18 @@ packages:
   purls: []
   size: 68354405
   timestamp: 1757018387981
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvrtc-13.1.115-hecca717_0.conda
+  sha256: 9cc4f9df70c02eea5121cdb0e865207b04cd52591f57ebcac2ba44fada10eb5b
+  md5: df16c9049d882cdaf4f83a5b90079589
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 35339417
+  timestamp: 1768272955912
 - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvrtc-13.1.80-hecca717_0.conda
   sha256: d6b326bdbf6fa7bfa0fa617dda547dc585159816b8f130f2535740c4e53fd12c
   md5: 7ef874b2dc4ca388ecef3b3893305459
@@ -11663,6 +11872,18 @@ packages:
   purls: []
   size: 32555050
   timestamp: 1757018424779
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cuda-nvrtc-13.1.115-h8f3c8d4_0.conda
+  sha256: a1ec61512cecb093797e00590ad381ecd5852d2a32440ff22b34f78c743f3d5a
+  md5: 34da2ff2c64054d65eb8f04d76c40cca
+  depends:
+  - arm-variant * sbsa
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 33616576
+  timestamp: 1768272976976
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cuda-nvrtc-13.1.80-h8f3c8d4_0.conda
   sha256: 5e10ce4dd84c22c73e58a9f8359fb1e5ef4596afd3a0bc12b9fbde73b388ec0d
   md5: 0473ebdb01f2f4024177b024fc19fa72
@@ -11735,6 +11956,18 @@ packages:
   purls: []
   size: 59235886
   timestamp: 1757018672897
+- conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvrtc-13.1.115-hac47afa_0.conda
+  sha256: a8869b7d997722f90b9f8a602dc0b1d0d497f2a6f3561dc89383aeb2cd379a66
+  md5: 372d3c612a832d5f87d8dd9702d487b2
+  depends:
+  - cuda-version >=13.1,<13.2.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 31006920
+  timestamp: 1768273107962
 - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvrtc-13.1.80-hac47afa_0.conda
   sha256: 3f67de8a9eb182fa20bbc80bda7185afb676cfe8894f6a0549173bd752a7d2f4
   md5: 7b42337a35cd887ec3eed254b5ed606f
@@ -12881,6 +13114,46 @@ packages:
   purls: []
   size: 19915
   timestamp: 1762823943653
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py310h8c3aed4_2.conda
+  sha256: bab72866e713729c4824323aa4ff9346a48d0c74dff21d2cebb49331c9c58f57
+  md5: 9e5f2f1fc83026ad80f0660895ea3994
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py310hbc0d89f_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.10,<3.11.0a0
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359719
+  timestamp: 1757733038131
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py311h72da3fd_2.conda
+  sha256: 01f0f69dbc66ca8fe7182678258915425573f5ae5aef338efb963aceb444ef1f
+  md5: 7ff80f6526ae96cff25f226544e72baa
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py311he30c881_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.11,<3.12.0a0
+  - python_abi 3.11.* *_cp311
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359669
+  timestamp: 1757732902729
 - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py312h0317cef_2.conda
   sha256: 078e83045e252b7c616c4e6b580acc1c12b4ade24b4ecd71be4d5dc767387bca
   md5: 8cee37f4bad743e108f904e902f65df1
@@ -12901,6 +13174,126 @@ packages:
   purls: []
   size: 359152
   timestamp: 1757733115653
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py312h045ee1a_2.conda
+  sha256: 806110d9c5c6802006eec55d012e2e82dddadf8f7c9743297a25eef5800d6a25
+  md5: 2be1fbddb4658b3325d531e3e8f62abe
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py312h1a70bb2_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.12,<3.13.0a0
+  - python_abi 3.12.* *_cp312
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359604
+  timestamp: 1757731606512
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py313h586c94b_2.conda
+  sha256: 8e1b0bf555b5ac78d620ccfd20d70c45b717eb6f074631b1a9e962c5d8f0e484
+  md5: 0685ae3980f823b2ca78552f7d8d4033
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py313h28b6081_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359537
+  timestamp: 1757732883343
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py313h727d180_2.conda
+  sha256: 0cf7e5f9461b144320ff2d30f1e7d74c7990e69aa15ec8211cc117f1214a9985
+  md5: 9a9af89f20555cbb1892f81d096b937d
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py313h0630d88_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359195
+  timestamp: 1757731600945
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py314h972ecce_2.conda
+  sha256: fc4c9e4286c943f8ce6e3f5f29e4ac750939b46cecd06ff70b00d6ba0472af02
+  md5: 5efa78fb77f5f07b02dde55a66bbff24
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py314h3ed1f13_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.14.0rc2,<3.15.0a0
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359816
+  timestamp: 1757731942829
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py310h556c47b_2.conda
+  sha256: 3595e84792c1e36fa79348a404d71b94ad7fd2db8d0ca2551377661dbe40a9ea
+  md5: 6a547864445662481528190824613fef
+  depends:
+  - cuda-cudart-dev_linux-aarch64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py310h967c7ba_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.10,<3.11.0a0
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359399
+  timestamp: 1757733587754
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py311h1f68eda_2.conda
+  sha256: 9bfa8bbc0a630e331a04359675c3a728bc9a856284807b5042e24bab4cb16f28
+  md5: 0c76272fc6fa05ff39c53ea5ea5d1154
+  depends:
+  - cuda-cudart-dev_linux-aarch64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py311h6a7bbfe_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.11,<3.12.0a0
+  - python_abi 3.11.* *_cp311
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359395
+  timestamp: 1757733506707
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py312h500e0d2_2.conda
   sha256: 05992a9fddee5bded2f68aeaaba937901ef3b5b246132f7f25478579cc99d48a
   md5: 73a45823cac7c3926192682b7a71ed94
@@ -12921,14 +13314,14 @@ packages:
   purls: []
   size: 359411
   timestamp: 1757733170501
-- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py312hf676df9_2.conda
-  sha256: 6636ac902b44dbc8f8e14d8d2593d057af2f7b722b704edbe01600a2c90c752c
-  md5: 270e90ae04455f4f85b8763ec1755373
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py312h63ce5a7_2.conda
+  sha256: b58e3b72197504103175bfccce853f71de94716e832faa3fb69a22508242185a
+  md5: 6a3767487d9c694dee98bced05c7d048
   depends:
-  - cuda-cudart-dev_win-64
+  - cuda-cudart-dev_linux-aarch64
   - cuda-nvrtc
-  - cuda-version >=12,<13.0a0
-  - cupy-core 13.6.0 py312hc3434b0_2
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py312hc495b10_2
   - libcublas
   - libcufft
   - libcurand
@@ -12939,70 +13332,735 @@ packages:
   license: MIT
   license_family: MIT
   purls: []
-  size: 361552
-  timestamp: 1757734756770
-- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py312h16a6543_2.conda
-  sha256: ebe205ad39f19067898f4513816d8c44ac8036d0c4b9f1ee5aa0233e0f5dc1d7
-  md5: e0667d2bf17e4ff3bd50861f245ed961
+  size: 359083
+  timestamp: 1757732404821
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py313h1bad292_2.conda
+  sha256: f41cf3dae5a43376eb47a172ebc684337f5ed623a16f165dc5ae1444598b5910
+  md5: 38504562c74c201725045cfbf54699ee
   depends:
-  - __glibc >=2.17,<3.0.a0
-  - fastrlock >=0.8.3,<0.9.0a0
-  - libgcc >=14
-  - libstdcxx >=14
-  - numpy >=1.22
-  - python >=3.12,<3.13.0a0
-  - python_abi 3.12.* *_cp312
-  constrains:
-  - cuda-nvrtc >=12,<13.0a0
-  - libcufft >=11,<12.0a0
-  - libcurand >=10,<11.0a0
-  - scipy >=1.7,<1.17
-  - optuna ~=3.0
-  - cutensor >=2.3.1.0,<3.0a0
-  - cuda-version >=12,<13.0a0
-  - cupy >=13.6.0,<13.7.0a0
-  - libcusparse >=12,<13.0a0
-  - libcusolver >=11,<12.0a0
-  - nccl >=2.27.7.1,<3.0a0
-  - libcublas >=12,<13.0a0
-  - __cuda >=12.0
+  - cuda-cudart-dev_linux-aarch64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py313h407dc6c_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
   license: MIT
   license_family: MIT
-  purls:
-  - pkg:pypi/cupy?source=hash-mapping
-  size: 56720768
-  timestamp: 1757733006716
-- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py312hdcd7d0a_2.conda
-  sha256: bc3cf5f1f0b0b4653d573507087ee56bfa04900232133e87c9baebfe6a128612
-  md5: 07720f931f710f3d2061b0bdcb808b82
+  purls: []
+  size: 359766
+  timestamp: 1757732380354
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py313h7988abe_2.conda
+  sha256: 3263457e1415b2695cbba24e45b6d200b05f98120169ce56ac266ef9b29f38b7
+  md5: d378f8038cb5acfb9e24650b7b581f48
   depends:
-  - fastrlock >=0.8.3,<0.9.0a0
-  - libgcc >=14
-  - libstdcxx >=14
-  - numpy >=1.22
-  - python >=3.12,<3.13.0a0
+  - cuda-cudart-dev_linux-aarch64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py313h6b3a76b_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359576
+  timestamp: 1757733613485
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py314h3ec1dcb_2.conda
+  sha256: 9b78e1d7c9f42ad09dcad9e8784bfe95aec35ff30c10bc0a8f7cc92033e4c11f
+  md5: ef63ce910ca3d9278fa7b411740e6064
+  depends:
+  - cuda-cudart-dev_linux-aarch64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py314heaf0aa5_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.14.0rc2,<3.15.0a0
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359844
+  timestamp: 1757732501296
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py310h9349102_2.conda
+  sha256: a9de522e66ff07d1567b4011f7a6e6c858f573053c989bf8a3a91276cf211bdc
+  md5: 3f610f7dce9af31ba31ff4bc8e4cc0ef
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py310h867cfc4_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.10,<3.11.0a0
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361800
+  timestamp: 1757734323240
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py311h3856ebc_2.conda
+  sha256: 7fff0c303355730c2e29386159fab97f31b3423bb5fd856e7e449ec735ef8e07
+  md5: 8e5df8d8969bf8dbf85740207e354e4c
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py311h3f47771_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.11,<3.12.0a0
+  - python_abi 3.11.* *_cp311
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361007
+  timestamp: 1757734548861
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py312h050d4bf_2.conda
+  sha256: 2f80b492e9bd02d36583caabc9933db381aa4313b25ff9b98e4386f39e2d6244
+  md5: 083c371b7832142e6ea9842088a96f55
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py312h7babc83_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.12,<3.13.0a0
+  - python_abi 3.12.* *_cp312
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361055
+  timestamp: 1757732736235
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py312hf676df9_2.conda
+  sha256: 6636ac902b44dbc8f8e14d8d2593d057af2f7b722b704edbe01600a2c90c752c
+  md5: 270e90ae04455f4f85b8763ec1755373
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py312hc3434b0_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.12,<3.13.0a0
+  - python_abi 3.12.* *_cp312
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361552
+  timestamp: 1757734756770
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py313h5dfe2c3_2.conda
+  sha256: b36285a74901926ddab1b49e86936957715c1db476207c6e524338867eef9683
+  md5: 01e63e587cf8c7477d53a3e98782e81d
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py313ha16128a_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361341
+  timestamp: 1757734712476
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py313h670e13b_2.conda
+  sha256: 13b870d34d8df1cd72a60892cc95f150d01e8915f4f11f92a7622602fbe847dc
+  md5: 1c75580206c0367647f7b23bfabb8a93
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py313haef2af9_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361420
+  timestamp: 1757731939881
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py314h59d4d8c_2.conda
+  sha256: b0755d67f3e501cdfbf6ac6ed5a8a3a37adbb41df25bb6e2922e9b6c59919bd0
+  md5: f60dd8b8db34ab07021459d9a0ad4a8a
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py314hc101868_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.14.0rc2,<3.15.0a0
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361792
+  timestamp: 1757732239805
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py310hbc0d89f_2.conda
+  sha256: 59ee4ca6f4166e575b0f0174941bef5759035e1098abf8f3c6816cc497206c6e
+  md5: 54e7f3bcf179555759acc4341921f3db
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.10,<3.11.0a0
+  - python_abi 3.10.* *_cp310
+  constrains:
+  - nccl >=2.27.7.1,<3.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - scipy >=1.7,<1.17
+  - libcusolver >=11,<12.0a0
+  - libcufft >=11,<12.0a0
+  - libcusparse >=12,<13.0a0
+  - cuda-version >=12,<13.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  - libcublas >=12,<13.0a0
+  - optuna ~=3.0
+  - libcurand >=10,<11.0a0
+  - __cuda >=12.0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 56537348
+  timestamp: 1757732911282
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py311he30c881_2.conda
+  sha256: 45e67d3a56d36935e4189b17e707bf6b887d21df6411fab9d835455a10250db8
+  md5: c9ca2bae852b83675f256aec6c518396
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.11,<3.12.0a0
+  - python_abi 3.11.* *_cp311
+  constrains:
+  - __cuda >=12.0
+  - cuda-version >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcusparse >=12,<13.0a0
+  - scipy >=1.7,<1.17
+  - libcufft >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  - optuna ~=3.0
+  - libcusolver >=11,<12.0a0
+  - libcublas >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 56743670
+  timestamp: 1757732786905
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py312h16a6543_2.conda
+  sha256: ebe205ad39f19067898f4513816d8c44ac8036d0c4b9f1ee5aa0233e0f5dc1d7
+  md5: e0667d2bf17e4ff3bd50861f245ed961
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.12,<3.13.0a0
+  - python_abi 3.12.* *_cp312
+  constrains:
+  - cuda-nvrtc >=12,<13.0a0
+  - libcufft >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  - scipy >=1.7,<1.17
+  - optuna ~=3.0
+  - cutensor >=2.3.1.0,<3.0a0
+  - cuda-version >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcusparse >=12,<13.0a0
+  - libcusolver >=11,<12.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - libcublas >=12,<13.0a0
+  - __cuda >=12.0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 56720768
+  timestamp: 1757733006716
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py312h1a70bb2_2.conda
+  sha256: 955e08c61145c77fcafe91d88bded3fe7bfe87e46a08db2f1345980d56a5444d
+  md5: b7613be94326f391c4b6edd7f114d3ee
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.12,<3.13.0a0
+  - python_abi 3.12.* *_cp312
+  constrains:
+  - optuna ~=3.0
+  - libcurand >=10,<11.0a0
+  - cuda-nvrtc >=13,<14.0a0
+  - __cuda >=13.0
+  - nccl >=2.27.7.1,<3.0a0
+  - cuda-version >=13,<14.0a0
+  - libcublas >=13,<14.0a0
+  - libcusolver >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcufft >=12,<13.0a0
+  - scipy >=1.7,<1.17
+  - libcusparse >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 31539281
+  timestamp: 1757731547163
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py313h0630d88_2.conda
+  sha256: 82c950c3118d81368ad0dee224ab946c963b57ccad34911cacdcc52fc046d792
+  md5: a2a6a0df7ef6e9ae482bae698cfd7476
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  constrains:
+  - libcusolver >=12,<13.0a0
+  - cuda-nvrtc >=13,<14.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcublas >=13,<14.0a0
+  - __cuda >=13.0
+  - libcufft >=12,<13.0a0
+  - optuna ~=3.0
+  - nccl >=2.27.7.1,<3.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcurand >=10,<11.0a0
+  - scipy >=1.7,<1.17
+  - cuda-version >=13,<14.0a0
+  - libcusparse >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 31734692
+  timestamp: 1757731531047
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py313h28b6081_2.conda
+  sha256: 56b2ebb09c8a74746f1dbf660bb7a50af562e9416a2f9733d8e8715503cca81a
+  md5: 388fb72307f756f7f2c7f5928647bc6b
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  constrains:
+  - libcusolver >=11,<12.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  - libcufft >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcusparse >=12,<13.0a0
+  - scipy >=1.7,<1.17
+  - optuna ~=3.0
+  - cutensor >=2.3.1.0,<3.0a0
+  - __cuda >=12.0
+  - cuda-version >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - libcublas >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 56733316
+  timestamp: 1757732780713
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py314h3ed1f13_2.conda
+  sha256: f377afaddccdaead2963bdabc3fa550e8d3e5d6aa6fc632cc01eadfd11442ef8
+  md5: d80c89a6489cb472feb8b009c34d3c11
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.14.0rc2,<3.15.0a0
+  - python_abi 3.14.* *_cp314
+  constrains:
+  - libcusparse >=12,<13.0a0
+  - scipy >=1.7,<1.17
+  - __cuda >=13.0
+  - cuda-version >=13,<14.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcusolver >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - optuna ~=3.0
+  - libcufft >=12,<13.0a0
+  - cuda-nvrtc >=13,<14.0a0
+  - libcublas >=13,<14.0a0
+  - libcurand >=10,<11.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 31824862
+  timestamp: 1757731889554
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py310h967c7ba_2.conda
+  sha256: ecc9ed490591577e7a0d9a994a1c4ae3a2f0b8605cdb3c67548fd8c1aeb48c95
+  md5: f77bbe8edf8f4c9e1be06aebb99bebec
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.10,<3.11.0a0
+  - python >=3.10,<3.11.0a0 *_cpython
+  - python_abi 3.10.* *_cp310
+  constrains:
+  - scipy >=1.7,<1.17
+  - __cuda >=12.0
+  - libcufft >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  - libcusolver >=11,<12.0a0
+  - cuda-version >=12,<13.0a0
+  - libcusparse >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - libcublas >=12,<13.0a0
+  - optuna ~=3.0
+  - cutensor >=2.3.1.0,<3.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 63896600
+  timestamp: 1757733496346
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py311h6a7bbfe_2.conda
+  sha256: 8ebdcc75e0cf89d5f73b34dde93dad9387b76b883b197fe3e41cee7b116376fb
+  md5: 5f61a21425c550d4badcdbf96c8723f9
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.11,<3.12.0a0
+  - python >=3.11,<3.12.0a0 *_cpython
+  - python_abi 3.11.* *_cp311
+  constrains:
+  - cupy >=13.6.0,<13.7.0a0
+  - libcufft >=11,<12.0a0
+  - cuda-version >=12,<13.0a0
+  - libcusolver >=11,<12.0a0
+  - __cuda >=12.0
+  - libcublas >=12,<13.0a0
+  - libcurand >=10,<11.0a0
+  - optuna ~=3.0
+  - cuda-nvrtc >=12,<13.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - scipy >=1.7,<1.17
+  - libcusparse >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 64073249
+  timestamp: 1757733413707
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py312hc495b10_2.conda
+  sha256: f862a404f82ca39e331802d3373d4b75dc4e53e885d8c5e6e222dfa59feab962
+  md5: 363b56bd0a936dc789f017ef904d4c75
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.12,<3.13.0a0
   - python >=3.12,<3.13.0a0 *_cpython
   - python_abi 3.12.* *_cp312
   constrains:
-  - cuda-nvrtc >=12,<13.0a0
+  - cuda-nvrtc >=13,<14.0a0
+  - __cuda >=13.0
+  - libcufft >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcusolver >=12,<13.0a0
+  - libcublas >=13,<14.0a0
+  - libcusparse >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - cuda-version >=13,<14.0a0
+  - scipy >=1.7,<1.17
+  - libcurand >=10,<11.0a0
+  - optuna ~=3.0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 36623787
+  timestamp: 1757732346566
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py312hdcd7d0a_2.conda
+  sha256: bc3cf5f1f0b0b4653d573507087ee56bfa04900232133e87c9baebfe6a128612
+  md5: 07720f931f710f3d2061b0bdcb808b82
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.12,<3.13.0a0
+  - python >=3.12,<3.13.0a0 *_cpython
+  - python_abi 3.12.* *_cp312
+  constrains:
+  - cuda-nvrtc >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcusparse >=12,<13.0a0
+  - cuda-version >=12,<13.0a0
+  - libcurand >=10,<11.0a0
+  - libcufft >=11,<12.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - libcusolver >=11,<12.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - __cuda >=12.0
+  - libcublas >=12,<13.0a0
+  - optuna ~=3.0
+  - scipy >=1.7,<1.17
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 63865734
+  timestamp: 1757733078190
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py313h407dc6c_2.conda
+  sha256: 69a3cd03e492b76d29d66d83a533d132ffcb99f8f6831191ecb99e8372e8f76e
+  md5: ff191fa08a0238048035b0638e21220b
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python >=3.13,<3.14.0a0 *_cp313
+  - python_abi 3.13.* *_cp313
+  constrains:
+  - cuda-nvrtc >=13,<14.0a0
+  - scipy >=1.7,<1.17
+  - nccl >=2.27.7.1,<3.0a0
+  - libcurand >=10,<11.0a0
+  - cuda-version >=13,<14.0a0
+  - libcusparse >=12,<13.0a0
+  - __cuda >=13.0
+  - libcufft >=12,<13.0a0
+  - optuna ~=3.0
+  - libcublas >=13,<14.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcusolver >=12,<13.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 36458540
+  timestamp: 1757732319930
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py313h6b3a76b_2.conda
+  sha256: 004036b2bd95684b64ae4a56663988da4026136c57be580de080e137b4e48b43
+  md5: 0fb0ebad3a2eb9f4c860465c47955131
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python >=3.13,<3.14.0a0 *_cp313
+  - python_abi 3.13.* *_cp313
+  constrains:
+  - __cuda >=12.0
+  - nccl >=2.27.7.1,<3.0a0
   - cupy >=13.6.0,<13.7.0a0
+  - scipy >=1.7,<1.17
+  - libcurand >=10,<11.0a0
   - libcusparse >=12,<13.0a0
+  - cuda-nvrtc >=12,<13.0a0
   - cuda-version >=12,<13.0a0
-  - libcurand >=10,<11.0a0
+  - libcublas >=12,<13.0a0
   - libcufft >=11,<12.0a0
-  - nccl >=2.27.7.1,<3.0a0
+  - cutensor >=2.3.1.0,<3.0a0
   - libcusolver >=11,<12.0a0
+  - optuna ~=3.0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 63810519
+  timestamp: 1757733528386
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py314heaf0aa5_2.conda
+  sha256: 354bc9f675b433909ff8854371e4b3606817dc304c0cd95c63d67d480ab80462
+  md5: c37b0c4ab40f2b23de6ab31042b2476a
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.14.0rc2,<3.15.0a0
+  - python >=3.14.0rc2,<3.15.0a0 *_cp314
+  - python_abi 3.14.* *_cp314
+  constrains:
+  - scipy >=1.7,<1.17
+  - libcusparse >=12,<13.0a0
+  - libcusolver >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - cuda-version >=13,<14.0a0
   - cutensor >=2.3.1.0,<3.0a0
+  - cuda-nvrtc >=13,<14.0a0
+  - optuna ~=3.0
+  - libcufft >=12,<13.0a0
+  - __cuda >=13.0
+  - libcublas >=13,<14.0a0
+  - libcurand >=10,<11.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 36619576
+  timestamp: 1757732433081
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py310h867cfc4_2.conda
+  sha256: 115131c370be8c410fba05e78d83f197b4c48a8b30fd8a32c3bd8d9a3ad80215
+  md5: 2b22c1eb70d5f8f86c4babac37703437
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.10,<3.11.0a0
+  - python_abi 3.10.* *_cp310
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
+  - cuda-version >=12,<13.0a0
+  - optuna ~=3.0
+  - scipy >=1.7,<1.17
   - __cuda >=12.0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcublas >=12,<13.0a0
+  - libcurand >=10,<11.0a0
+  - libcusolver >=11,<12.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcusparse >=12,<13.0a0
+  - libcufft >=11,<12.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 54351742
+  timestamp: 1757734211315
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py311h3f47771_2.conda
+  sha256: c874fd562f9750b468f009dfcc4ed6de6bb41ccc3bb6d65957ab3c1498613675
+  md5: 16602f6836e4e866c52a78a21feb1560
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.11,<3.12.0a0
+  - python_abi 3.11.* *_cp311
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
+  - libcusolver >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  - libcufft >=11,<12.0a0
+  - libcusparse >=12,<13.0a0
+  - cuda-version >=12,<13.0a0
   - libcublas >=12,<13.0a0
   - optuna ~=3.0
+  - cutensor >=2.3.1.0,<3.0a0
+  - cuda-nvrtc >=12,<13.0a0
   - scipy >=1.7,<1.17
+  - __cuda >=12.0
+  - cupy >=13.6.0,<13.7.0a0
   license: MIT
   license_family: MIT
   purls:
   - pkg:pypi/cupy?source=hash-mapping
-  size: 63865734
-  timestamp: 1757733078190
+  size: 54764492
+  timestamp: 1757734470749
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py312h7babc83_2.conda
+  sha256: 18582d52c9abd1e5008af3e4bd38552b8410713777066ac8024415c99d7a83e8
+  md5: 33050f8f5af87ae8cbfbb4e40de61fbf
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.12,<3.13.0a0
+  - python_abi 3.12.* *_cp312
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
+  - cupy >=13.6.0,<13.7.0a0
+  - libcurand >=10,<11.0a0
+  - cuda-version >=13,<14.0a0
+  - libcufft >=12,<13.0a0
+  - libcusolver >=12,<13.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - __cuda >=13.0
+  - scipy >=1.7,<1.17
+  - libcusparse >=12,<13.0a0
+  - cuda-nvrtc >=13,<14.0a0
+  - optuna ~=3.0
+  - libcublas >=13,<14.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 29913800
+  timestamp: 1757732657370
 - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py312hc3434b0_2.conda
   sha256: 9aae135cb29962786adafa0b3bae094f9fce0b4ca386aaaa7d038ae518efcba6
   md5: 9e1c32b5b8172ae6666850b583355257
@@ -13033,6 +14091,96 @@ packages:
   - pkg:pypi/cupy?source=hash-mapping
   size: 54685402
   timestamp: 1757734676711
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py313ha16128a_2.conda
+  sha256: 7df3f437c45ba61754643a2c61f4e6c7c5b4be3bf58fa029d39e4fc8ddb7e54b
+  md5: 5a270c8af5e377ff40932ce8ec8472e3
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
+  - libcusolver >=11,<12.0a0
+  - cuda-version >=12,<13.0a0
+  - __cuda >=12.0
+  - scipy >=1.7,<1.17
+  - libcublas >=12,<13.0a0
+  - optuna ~=3.0
+  - cuda-nvrtc >=12,<13.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcusparse >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcufft >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 54751648
+  timestamp: 1757734626461
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py313haef2af9_2.conda
+  sha256: b207060087be5dcb79c533d4d160730f3a7de23d5e96253fe0770b1dc03cc124
+  md5: 60df31229f6e6084a0c8a7ee07976133
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
+  - __cuda >=13.0
+  - cuda-nvrtc >=13,<14.0a0
+  - scipy >=1.7,<1.17
+  - libcusolver >=12,<13.0a0
+  - libcublas >=13,<14.0a0
+  - libcusparse >=12,<13.0a0
+  - libcufft >=12,<13.0a0
+  - cuda-version >=13,<14.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcurand >=10,<11.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - optuna ~=3.0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 29792078
+  timestamp: 1757731883397
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py314hc101868_2.conda
+  sha256: f6533a698dd95c9d18efea957d3b524906f9fed4d69ba67b51b158be8aa51a64
+  md5: 06b1af7b5254c0864e82e1105f9f0f2e
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.14.0rc2,<3.15.0a0
+  - python_abi 3.14.* *_cp314
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcublas >=13,<14.0a0
+  - cuda-version >=13,<14.0a0
+  - scipy >=1.7,<1.17
+  - cuda-nvrtc >=13,<14.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcurand >=10,<11.0a0
+  - libcusparse >=12,<13.0a0
+  - optuna ~=3.0
+  - libcufft >=12,<13.0a0
+  - __cuda >=13.0
+  - libcusolver >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 29836019
+  timestamp: 1757732178441
 - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
   sha256: 6d977f0b2fc24fee21a9554389ab83070db341af6d6f09285360b2e09ef8b26e
   md5: 003b8ba0a94e2f1e117d0bd46aebc901
@@ -13073,25 +14221,120 @@ packages:
   license: MIT
   license_family: MIT
   purls:
-  - pkg:pypi/execnet?source=hash-mapping
-  size: 39499
-  timestamp: 1762974150770
-- conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py312h8285ef7_2.conda
-  sha256: b0e5b19d2148816914920fe5c3148d5b5bf7c46bc34a2cac5124883bd1b83d05
-  md5: 94fb93ec1751a3614d3a6f184832fd87
+  - pkg:pypi/execnet?source=hash-mapping
+  size: 39499
+  timestamp: 1762974150770
+- conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py310h25320af_2.conda
+  sha256: 95eea806cb216036e4d0446fcff724c334c8899d02be2368a430ec5361ed29a4
+  md5: 8dbd4fc06661c78fdc2daedf23824bfe
+  depends:
+  - python
+  - libgcc >=14
+  - libstdcxx >=14
+  - libgcc >=14
+  - __glibc >=2.17,<3.0.a0
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 40665
+  timestamp: 1756729198132
+- conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py311hc665b79_2.conda
+  sha256: 5299a4aeaf04fbc2f8f46e707ae16c1f4e594905e6df18457f18ba002a886110
+  md5: ac18884886449ce97b76f8906462ff27
+  depends:
+  - python
+  - libgcc >=14
+  - libstdcxx >=14
+  - libgcc >=14
+  - __glibc >=2.17,<3.0.a0
+  - python_abi 3.11.* *_cp311
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 41082
+  timestamp: 1756729161435
+- conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py312h8285ef7_2.conda
+  sha256: b0e5b19d2148816914920fe5c3148d5b5bf7c46bc34a2cac5124883bd1b83d05
+  md5: 94fb93ec1751a3614d3a6f184832fd87
+  depends:
+  - python
+  - __glibc >=2.17,<3.0.a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - libgcc >=14
+  - python_abi 3.12.* *_cp312
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 41672
+  timestamp: 1756729175159
+- conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py313h5d5ffb9_2.conda
+  sha256: 30498ed45133f457fd9ed14d5fac6512347f05d11fe1ed89842c7dfdb516f78f
+  md5: 9bcbd351966dc56a24fc0c368da5ad99
+  depends:
+  - python
+  - __glibc >=2.17,<3.0.a0
+  - libstdcxx >=14
+  - libgcc >=14
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 41201
+  timestamp: 1756729160955
+- conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py314h8c728da_2.conda
+  sha256: 1ea4fd24e37d27692b04b57fa51f14fd2217ea251087ce1c0701af234c1452d9
+  md5: f1f936bb0ff435f3190ca1c17fa327e7
   depends:
   - python
+  - libstdcxx >=14
+  - libgcc >=14
   - __glibc >=2.17,<3.0.a0
   - libgcc >=14
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 41496
+  timestamp: 1756729160091
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py310heccc163_2.conda
+  sha256: aad519c924568a72bd4dcab74c793d4b09e339dce6bd3c5c027bd498eef7ccc4
+  md5: caafa6b88cc2cff22a72280c8f083a31
+  depends:
+  - python
+  - python 3.10.* *_cpython
+  - libgcc >=14
   - libstdcxx >=14
   - libgcc >=14
-  - python_abi 3.12.* *_cp312
+  - python_abi 3.10.* *_cp310
   license: MIT
   license_family: MIT
   purls:
   - pkg:pypi/fastrlock?source=hash-mapping
-  size: 41672
-  timestamp: 1756729175159
+  size: 44918
+  timestamp: 1756729193056
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py311h8e4e6a5_2.conda
+  sha256: f186881661b83be9fb8d47c71340997b929fa5e0673ead9070082b8e390d6a73
+  md5: 9251413f2e3ea6eb586b21423f849536
+  depends:
+  - python
+  - libstdcxx >=14
+  - libgcc >=14
+  - python 3.11.* *_cpython
+  - libgcc >=14
+  - python_abi 3.11.* *_cp311
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 45171
+  timestamp: 1756729186510
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py312hf55c4e8_2.conda
   sha256: 5c5cfaf55a0165c45ee63beb92abf4aa2ae1ef28d8064f7c884749ec4bd00a22
   md5: 7ec9d6889be02f9bf66cfb9dd3112c8b
@@ -13108,6 +14351,73 @@ packages:
   - pkg:pypi/fastrlock?source=hash-mapping
   size: 45432
   timestamp: 1756729166837
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py313h59403f9_2.conda
+  sha256: e28da81b99c8970e19e4f3ef7758a7a695263e0d3ff7d9fbdf232690bef6519d
+  md5: 59043167df894cee605e4cf470302bda
+  depends:
+  - python
+  - python 3.13.* *_cp313
+  - libgcc >=14
+  - libstdcxx >=14
+  - libgcc >=14
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 44449
+  timestamp: 1756729165562
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py314h3642cf7_2.conda
+  sha256: 512662de1d9d4231feaf6f818014317dad4f2a60d8ef0d859f72116a69062583
+  md5: 685382bf317bd1d7f174e763c91d98a1
+  depends:
+  - python
+  - python 3.14.* *_cp314
+  - libstdcxx >=14
+  - libgcc >=14
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 44705
+  timestamp: 1756729193250
+- conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py310h699e580_2.conda
+  sha256: 57deb00090c09edc841a43499f23396bb35d51aa5aaa6886d4ae1d0ff969b3dd
+  md5: 3207527dea58c115e7e97856709465db
+  depends:
+  - python
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 36960
+  timestamp: 1756729187087
+- conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py311h5dfdfe8_2.conda
+  sha256: dd0a2552a36565545aedc65739ffc11574167c263340b32ff6314ce998168e08
+  md5: 4fb7d2650ac4a3967e8e57d68e801db3
+  depends:
+  - python
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - python_abi 3.11.* *_cp311
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 37145
+  timestamp: 1756729198099
 - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py312ha1a9051_2.conda
   sha256: bbcc17eb4acf110032fe8092d4e54a6d262b72d504597103e72a958fb248579f
   md5: b6ff9e7af087d51a24353f16d1a3ed06
@@ -13126,6 +14436,42 @@ packages:
   - pkg:pypi/fastrlock?source=hash-mapping
   size: 37498
   timestamp: 1756729168844
+- conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py313h927ade5_2.conda
+  sha256: 2a23cce182f04de8e522d47a9e41f9f9a85eb25a2d67d52356ce1d6522bbbe79
+  md5: 1fc8d6295c7ebff653118d2ba22cf226
+  depends:
+  - python
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 36385
+  timestamp: 1756729186432
+- conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py314h8b4fd5f_2.conda
+  sha256: 1d341146022014b0f0d4b33630ba1757246dd6b5ecefdada0d49e6db774a18a9
+  md5: ac8c973aff08071df98933eccd5a7fa5
+  depends:
+  - python
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 36661
+  timestamp: 1756729190828
 - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
   name: filecheck
   version: 1.0.3
@@ -13975,6 +15321,32 @@ packages:
   purls: []
   size: 68079
   timestamp: 1765819124349
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.0.1.189-hd3aeb46_3.conda
+  sha256: a3c89c1b6018d16c22fc583887f728b3065a1f50a82d8a40a793a973aac606c5
+  md5: 626745031f369cf70670283436cc6742
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-nvrtc
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 233989011
+  timestamp: 1701931830910
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.2.5.6-hd3aeb46_0.conda
+  sha256: 7af6a21b53736b5a53c1044808ffd781a6ee1f0a66b618bf3c834a71bdb706aa
+  md5: c216c28589360a5acee904b480911c14
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-nvrtc
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 258710189
+  timestamp: 1702976169266
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.8.4.1-h9ab20c4_1.conda
   sha256: 3d3f7344db000feced2f9154cf0b3f3d245a1d317a1981e43b8b15f7baaaf6f1
   md5: 3ba4fd8bef181c020173d29ac67cae68
@@ -14027,6 +15399,47 @@ packages:
   purls: []
   size: 393920044
   timestamp: 1764897195935
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-13.2.1.1-h676940d_0.conda
+  sha256: c38f5041d0a99d94cee17f26029e4c02f3247bfb39cbe12d8f2c3dcf5f656eaa
+  md5: f904a04f3e173de15d3c31bd3dfc21c7
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - cuda-nvrtc
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 376501341
+  timestamp: 1768276465220
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.0.1.189-hac28a21_3.conda
+  sha256: befa2389febbff1541fa2bf542c98b3b32f2c569c53fca95c439796224c0dae3
+  md5: b16ccbf9d633bdce9cf5b3363a468c41
+  depends:
+  - cuda-nvrtc
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 233886126
+  timestamp: 1701931743428
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.2.5.6-hac28a21_0.conda
+  sha256: 52ae33e756f22f7a82038a409d58ce52fa8a9c45896417662f101d83c36139a6
+  md5: b8f9003432a6b58e2bbd174910f9df84
+  depends:
+  - cuda-nvrtc
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 259896450
+  timestamp: 1702976080471
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.8.4.1-hd55a8e4_1.conda
   sha256: 7d10a5b2750faccc39dd66d28ca5b74cb618d3445ed8c933d51736dba2b7bcc4
   md5: 8d6b39fb6f62e3e1b278774c00b115ac
@@ -14088,6 +15501,48 @@ packages:
   purls: []
   size: 516220026
   timestamp: 1764897082131
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-13.2.1.1-he38c790_0.conda
+  sha256: ba0e73bc783f6eb34770dbd2296c437b1b4c8ea888ac76beb2fe30643eb62883
+  md5: 295ab160a641ff6f42b9ba50669f7e1a
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - arm-variant * sbsa
+  - cuda-nvrtc
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 481495336
+  timestamp: 1768276502914
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.0.1.189-h63175ca_3.conda
+  sha256: d39c6d2e01dad4e9b06707f7343150e423042fd2c65cc5772333ab82d4132bb1
+  md5: c69ce5f6ea90ad064df6960636acaf15
+  depends:
+  - cuda-nvrtc
+  - cuda-version >=12.0,<12.1.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 247048799
+  timestamp: 1701932385460
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.2.5.6-h63175ca_0.conda
+  sha256: 2e0abbb96a9aefd0e6c284df7ca6223e48ee55304cb0fea72cd7db59489eac8e
+  md5: d695bf389c6314948a130aa6334c58c2
+  depends:
+  - cuda-nvrtc
+  - cuda-version >=12.2,<12.3.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 288422075
+  timestamp: 1702976743208
 - conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.8.4.1-he0c23c2_1.conda
   sha256: 7a4c53bbcf77c37033777acd1ff60b4664615ae67fff245718d43db422feac59
   md5: 626453d0b7f7b9f3c3a92e4398314714
@@ -14140,6 +15595,19 @@ packages:
   purls: []
   size: 388564116
   timestamp: 1764897124611
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-13.2.1.1-hac47afa_0.conda
+  sha256: 0e7180aed3a41eff2c5a3df079abb3ea86612eea18f833febe858cebac0a3e96
+  md5: d56da2a29117df5d879594b5e58fc3a5
+  depends:
+  - cuda-nvrtc
+  - cuda-version >=13.1,<13.2.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 371899218
+  timestamp: 1768276556597
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcudnn-9.10.2.21-hf7e9902_0.conda
   sha256: dc6b89e874867b2cdf08224059bd1543cbb72ed646da177c1454596469c9a4bb
   md5: a178a1f3642521f104ecceeefa138d01
@@ -14293,6 +15761,30 @@ packages:
   purls: []
   size: 61127411
   timestamp: 1761105599209
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.0.0.21-hd3aeb46_2.conda
+  sha256: ed62279e20761c033525a550dc753327103f53aa37bf441c40db2f37950b7b50
+  md5: 5dbf17a732e01fed414a22bdf89aaaad
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 44795345
+  timestamp: 1701904310549
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.0.8.103-hd3aeb46_0.conda
+  sha256: af72a643d81c2401be7e5ccb8f2eb033e8254531ccd521101e9af8609817b5bf
+  md5: e6ca97f313721442e41e725ce7b3b75a
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 62856006
+  timestamp: 1702938780985
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.3.3.83-h5888daf_1.conda
   sha256: 1a38727a9666b7020ad844fd5074693b2c378d0161f58401d9f8488bdeb920a1
   md5: d0d12b6842be47267e3214e7ab2b1b02
@@ -14341,6 +15833,44 @@ packages:
   purls: []
   size: 192378644
   timestamp: 1764880073980
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-12.1.0.78-hecca717_0.conda
+  sha256: 4f8951e942210116ee6e1548c25774009afddc59e494b5eac0e5ca539196d1b5
+  md5: 58a7aa38206ea03a9eb6ccbcc012901e
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 192379210
+  timestamp: 1768273636415
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.0.0.21-hac28a21_2.conda
+  sha256: c9647dedc5da9a60ca1d88e8f82a42e7b1837f3d2bccd294bb46b218795d498e
+  md5: cbd87df968670b2d4d752b22657591fe
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 44814329
+  timestamp: 1701904278310
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.0.8.103-hac28a21_0.conda
+  sha256: ba19464e72391d1f7b45b862fa93c8e87cb0821148ae36b91cadcb3833f35b57
+  md5: b7a1c44db1312dd191ff21ecd82076c5
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 62915617
+  timestamp: 1702938781901
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.3.3.83-h3ae8b8a_1.conda
   sha256: d5cb9df683d7ea22184714b5c0569a5decf0a332d81c241b60ff68599a5ccc06
   md5: 093577dd6d3b9be7d3f7a6ecb01dcf01
@@ -14394,6 +15924,44 @@ packages:
   purls: []
   size: 192843651
   timestamp: 1764880098927
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-12.1.0.78-h8f3c8d4_0.conda
+  sha256: 82f4715e0c6aa59080531d816bb847e3096635625645fdd8046fa6c1d248ef2e
+  md5: 1bd80ebee861a876bdf7860d559f4866
+  depends:
+  - arm-variant * sbsa
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 192700443
+  timestamp: 1768273669731
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.0.0.21-h63175ca_2.conda
+  sha256: b06554c3106338de6dd85b6b697dfd27d823067adcf0e7236110fa0ea49cc6b9
+  md5: 403b53342b3588579e16772a18722739
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 44262157
+  timestamp: 1701904877029
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.0.8.103-h63175ca_0.conda
+  sha256: af4f043218b7584fe9c1d4f0cf40edfdfd01637fedbfaf100273a8ba131dafc0
+  md5: 3e0d3168dcaea961f6ffa665b0c27c40
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 63117431
+  timestamp: 1702939178613
 - conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.3.3.83-he0c23c2_1.conda
   sha256: 083ba1d13f5512dae13fd7e3785336d578bc66f01c88917bbf1f53923339a5e4
   md5: 6e4c0fa04966e643cbe847321bdeee54
@@ -14442,6 +16010,18 @@ packages:
   purls: []
   size: 192328577
   timestamp: 1764880153393
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-12.1.0.78-hac47afa_0.conda
+  sha256: 7bf34c7298350325e0f23b2483f53e015fff446c03dd8d75c500cc5dbb5cee62
+  md5: a8ce534392102f2b3109dcee4702468a
+  depends:
+  - cuda-version >=13.1,<13.2.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 192328586
+  timestamp: 1768273720164
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufile-1.13.1.3-h628e99a_1.conda
   sha256: 213f5df6ed25d19c4390666708a32ea457b1dcda64aca121f861b94671e2ed63
   md5: 9a97a35e7e63910013d638c389fa3514
@@ -14666,6 +16246,18 @@ packages:
   purls: []
   size: 43737577
   timestamp: 1764879942081
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcurand-10.4.1.81-h676940d_0.conda
+  sha256: bba28a650b35f221eaad9537df4a6f1d86b2fa617e52f56194ad2a959f84736c
+  md5: 5926fbc6df184a110130a310608cb5e8
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 43775293
+  timestamp: 1768273736749
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.3.1.50-hac28a21_1.conda
   sha256: 6c749658411c13e639977cce1da74dfacb693c4348fadffe09780c04fa4809b5
   md5: 72936062b7c649fc03b0a52e2ba54275
@@ -14749,6 +16341,21 @@ packages:
   purls: []
   size: 44154661
   timestamp: 1764879984766
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.4.1.81-he38c790_0.conda
+  sha256: ef4300b83ea202e459e917a4f159478074fdc10c51f3061374361e9b89b6ba04
+  md5: b02eb8fbb430bd99f7a870382a91c24d
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - arm-variant * sbsa
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 44099763
+  timestamp: 1768273767993
 - conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.3.1.50-h63175ca_1.conda
   sha256: 3030074dcf96f4e397e4ba778d802900249a61388876cde06dc97257b2a2bc16
   md5: af9c9c9ae729b884dcc5dc48b3bb205a
@@ -14821,6 +16428,47 @@ packages:
   purls: []
   size: 46140551
   timestamp: 1764880079531
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.4.1.81-hac47afa_0.conda
+  sha256: 807515b768161a684b097a6959fabd013fad813ca595b3fd25e9b53b0c796487
+  md5: 753cb0f8717a35b53215a18c009953b2
+  depends:
+  - cuda-version >=13.1,<13.2.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 46201230
+  timestamp: 1768273862521
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.4.2.57-hd3aeb46_2.conda
+  sha256: 65e4acdce5c358c57f0d263c87c39346695d0954855868bff60cb066043c7632
+  md5: a684e4ff8d2a6a100249377aa9d37a5c
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.0,<12.1.0a0
+  - libcublas >=12.0.1.189,<12.1.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 58748256
+  timestamp: 1701944344928
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.5.2.141-hd3aeb46_0.conda
+  sha256: a83322a1ede77e652acc3330d68f0428e28b198c3f7517bd3f1aeaf577232363
+  md5: 4ee6abbff18849a3036a1678771e4800
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.2,<12.3.0a0
+  - libcublas >=12.2.5.6,<12.3.0a0
+  - libcusparse >=12.1.2.141,<12.2.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 79957553
+  timestamp: 1703004799401
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.7.3.90-h9ab20c4_1.conda
   sha256: 868ba1b0b0ae15f7621ee960a459a74b9a17b69ba629c510a11bb37480e7b6df
   md5: 2d58a7eb9150525ea89195cf1bcfbc4c
@@ -14881,6 +16529,52 @@ packages:
   purls: []
   size: 161086488
   timestamp: 1764943396933
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-12.0.9.81-h676940d_0.conda
+  sha256: d6181d5fe7fbc36304577fbb50add02382ae9e7c6b1b598d310945bd12272f0b
+  md5: 17a342e69a0821ecf76a0e79a2044288
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - cuda-version >=13.1,<13.2.0a0
+  - libcublas >=13.2.1.1,<13.3.0a0
+  - libcusparse >=12.7.3.1,<12.8.0a0
+  - libgcc >=14
+  - libnvjitlink >=13.1.115,<14.0a0
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 161188241
+  timestamp: 1768286542683
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.4.2.57-hac28a21_2.conda
+  sha256: 83e01fddb31617623fc7475aa84db9efc0498cc76aca88e42e86f71442872f6c
+  md5: 7fbef3231f572b4b7c3bfe8efd6fcb5c
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - libcublas >=12.0.1.189,<12.1.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 58531702
+  timestamp: 1701944296106
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.5.2.141-hac28a21_0.conda
+  sha256: 8b84ec1fcee407676bc5dee930747ee6fd4b887d8a3f9ad69d98705260c7ae2d
+  md5: 4b628857805683900422fea3a166cd6f
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - libcublas >=12.2.5.6,<12.3.0a0
+  - libcusparse >=12.1.2.141,<12.2.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 79719976
+  timestamp: 1703004749317
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.7.3.90-hd55a8e4_1.conda
   sha256: 5016ad770146b3eb3739ee4213f82d3afed125626dbb77f0ee4b421cb9ab6d63
   md5: 7b044a3b61ea805e90e91f750c0e70dd
@@ -14950,6 +16644,53 @@ packages:
   purls: []
   size: 177727995
   timestamp: 1764943428002
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-12.0.9.81-he38c790_0.conda
+  sha256: ce671884833cfed45128a7be1d6102242c394524a654b4ba3921ec49a856a6e7
+  md5: c1aa3d742409b794d096fcaf6aaf3c1a
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - arm-variant * sbsa
+  - cuda-version >=13.1,<13.2.0a0
+  - libcublas >=13.2.1.1,<13.3.0a0
+  - libcusparse >=12.7.3.1,<12.8.0a0
+  - libgcc >=14
+  - libnvjitlink >=13.1.115,<14.0a0
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 177825017
+  timestamp: 1768286571769
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.4.2.57-h63175ca_2.conda
+  sha256: 1486f5ced304b13ec1e8fb2af3e4134aeb8c1bc98d5c13c864c48c2f9e42cfa6
+  md5: 11f11b1971bd9a2e39eade3206c6e63a
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - libcublas >=12.0.1.189,<12.1.0a0
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 56552114
+  timestamp: 1701944947700
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.5.2.141-h63175ca_0.conda
+  sha256: 7073d934f6d2dd607a4f987efa2c2d16b0e68340db7637b8e98ff4a1004d3ca3
+  md5: 0ff5423da121b524f887e8f24c6a55df
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - libcublas >=12.2.5.6,<12.3.0a0
+  - libcusparse >=12.1.2.141,<12.2.0a0
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 77792716
+  timestamp: 1703005402425
 - conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.7.3.90-he0c23c2_1.conda
   sha256: c967651aab88a4a9a761be0b027b460c36850a9cd9df03890ce5bf833cef8c9f
   md5: 830a8909cfd5427f57b93ca6e468c1dd
@@ -15010,6 +16751,47 @@ packages:
   purls: []
   size: 156777611
   timestamp: 1764943590003
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-12.0.9.81-hac47afa_0.conda
+  sha256: 660e6b88a56b9b125e9f3e44975baf75249bee32505960b7906c1e8ba84bc9e3
+  md5: 79dca8cbbf9f76e1b298f3538c6c3bb8
+  depends:
+  - cuda-version >=13.1,<13.2.0a0
+  - libcublas >=13.2.1.1,<13.3.0a0
+  - libcusparse >=12.7.3.1,<12.8.0a0
+  - libnvjitlink >=13.1.115,<14.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 156887400
+  timestamp: 1768286696520
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.0.0.76-hd3aeb46_2.conda
+  sha256: def44b0e57a59bc060bc69fb1c79c39cf281efe8980cd78840cb092ada5eda19
+  md5: 91072eaa64ea11a9f804547806dbacf0
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 98176542
+  timestamp: 1701931152417
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.1.2.141-hd3aeb46_0.conda
+  sha256: 48ab25898ae3315a9dce7f5a5ad2c1d5bce84c78c757f54dce4a43c65d436af4
+  md5: 3b4528c647c041ec53a883023ef4f054
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 112121082
+  timestamp: 1702970684025
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.5.10.65-hecca717_2.conda
   sha256: 7b511549a22df408d36dadbeabdfd9c35b124d9d6f000b29ffcbe4b38b7faeb7
   md5: 890ebfaad48c887d3d82847ec9d6bc79
@@ -15062,6 +16844,47 @@ packages:
   purls: []
   size: 144184696
   timestamp: 1764886592758
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.7.3.1-hecca717_0.conda
+  sha256: 86b31339206cb44c2cddeea4684de748d39ecc89c45c884a92e653f0af2986c6
+  md5: 915b747d67493ba94a0d9b79095cc06d
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libnvjitlink >=13.1.115,<14.0a0
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 145513192
+  timestamp: 1768280223267
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.0.0.76-hac28a21_2.conda
+  sha256: fa7b204b0b25ab4a61db98ef8c0d8ccc7d5fc158bcc89f95eedd4286af67ba9b
+  md5: 2d5bbfce1a53628178df9d711445cd60
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 98122549
+  timestamp: 1701931113993
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.1.2.141-hac28a21_0.conda
+  sha256: 50b0c4c09aa576dce40ae62fe45253b244fd01c4024b0efbc34bec3532db8ded
+  md5: 4cfd7e21691a81d22e483b08f384b594
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 112124005
+  timestamp: 1702970635167
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.5.10.65-h8f3c8d4_2.conda
   sha256: 9dbee8f1bfa9a876d24b12a34d4a022f33e584669c59bf93368b79d0bf55cd2f
   md5: 1e0731f3e9f303e6106a8fdd359a272e
@@ -15120,6 +16943,47 @@ packages:
   purls: []
   size: 160004278
   timestamp: 1764886666561
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.7.3.1-h8f3c8d4_0.conda
+  sha256: 85f20536cc261bf285ca2d9730d2b27669d862a38fa70a54a236d574be913f7b
+  md5: 73816ec8be4d675a1933cd0dc382382a
+  depends:
+  - arm-variant * sbsa
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libnvjitlink >=13.1.115,<14.0a0
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 161564363
+  timestamp: 1768280242337
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.0.0.76-h63175ca_2.conda
+  sha256: 7ac8438172e0712ae6e2ebe790f4a9117b1764a6a30f29513b0b4c6a36ae9211
+  md5: 18a3190fb1e98ce0765dca19a880997a
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 97602376
+  timestamp: 1701931624725
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.1.2.141-h63175ca_0.conda
+  sha256: d58adb5b76459c082c0c903ce798c9057b6c6e284b60117efc811b46b39abf96
+  md5: c689031410d83ceefe2c2299040f9de6
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 111548772
+  timestamp: 1702971058166
 - conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.5.10.65-hac47afa_2.conda
   sha256: fc911af27ab28af77d4b7203c6c9ebb15f4ddf27af8e8331d9a9983f4dd96483
   md5: 4e84a8282a9c1802ec4f516090164228
@@ -15172,6 +17036,19 @@ packages:
   purls: []
   size: 142426523
   timestamp: 1764886657256
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.7.3.1-hac47afa_0.conda
+  sha256: 1ac52f373db5c5e00c1978f0bc6b2c2c576c80fba8801086ccb142d46eff0a4e
+  md5: 36a861ab5d2c5fd0a63395bbd6bab7d2
+  depends:
+  - cuda-version >=13.1,<13.2.0a0
+  - libnvjitlink >=13.1.115,<14.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 143956601
+  timestamp: 1768280260283
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda
   sha256: 1e1b08f6211629cbc2efe7a5bca5953f8f6b3cae0eeb04ca4dacee1bd4e2db2f
   md5: 8b09ae86839581147ef2e5c5e229d164
@@ -15578,6 +17455,7 @@ packages:
   constrains:
   - xz 5.8.2.*
   license: 0BSD
+  purls: []
   size: 113207
   timestamp: 1768752626120
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblzma-5.8.1-h86ecc28_2.conda
@@ -16096,6 +17974,18 @@ packages:
   purls: []
   size: 31218311
   timestamp: 1757021832026
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libnvjitlink-13.1.115-hecca717_0.conda
+  sha256: 1ce8ac2f6fb3aaab065599f74b1e1bc68affc0804a081da239ab2c727abdc1cb
+  md5: 6cd0aefa03c679824ee5047ed39b0a09
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=13,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 31331227
+  timestamp: 1768274146966
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libnvjitlink-13.1.80-hecca717_0.conda
   sha256: 1ccfcadcd096e225a4d3a10c7d35363fa3ef02e97b54efb6ef50c8849aec4804
   md5: 12c045632ae898f40024b7a1d61fc100
@@ -16147,6 +18037,20 @@ packages:
   purls: []
   size: 29710724
   timestamp: 1757021907780
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libnvjitlink-13.1.115-h8f3c8d4_0.conda
+  sha256: 49ff65205602d2535586e646008ff0577a92bf6f16de9c4cc6a10473caf3d700
+  md5: e211b0e0846d538f23296214de1d35a6
+  depends:
+  - arm-variant * sbsa
+  - cuda-version >=13,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 29775481
+  timestamp: 1768274109937
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libnvjitlink-13.1.80-h8f3c8d4_0.conda
   sha256: 3ffb88544e6407cad2b82a9e6b405a28ba6d56d600f8f58c3b6cda62d844f94e
   md5: d69b83167de6fd594dcf3b93ef82cf90
@@ -16197,6 +18101,18 @@ packages:
   purls: []
   size: 27704690
   timestamp: 1757021910611
+- conda: https://conda.anaconda.org/conda-forge/win-64/libnvjitlink-13.1.115-hac47afa_0.conda
+  sha256: 7a07c089f3d58552caad6151a0aaa6366231078f4dec4c6b4bd15aa06490daf6
+  md5: 27d92a3cc46bebee72ad41931c8442f5
+  depends:
+  - cuda-version >=13,<13.2.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 28186019
+  timestamp: 1768274186462
 - conda: https://conda.anaconda.org/conda-forge/win-64/libnvjitlink-13.1.80-hac47afa_0.conda
   sha256: e83551c06b6594ad5bc3eeeed09ead80607b422dee660657262e77fa26648d51
   md5: 792c82dd2a996b65970ec5789c43840f
@@ -17776,22 +19692,6 @@ packages:
   - pylint>=2.6.0 ; extra == 'dev'
   - pyink ; extra == 'dev'
   requires_python: '>=3.9'
-- pypi: https://files.pythonhosted.org/packages/79/2b/a826ba18d2179a56e144aef69e57fb2ab7c464ef0b2111940ee8a3a223a2/ml_dtypes-0.5.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
-  name: ml-dtypes
-  version: 0.5.4
-  sha256: 0d2ffd05a2575b1519dc928c0b93c06339eb67173ff53acb00724502cda231cf
-  requires_dist:
-  - numpy>=1.21
-  - numpy>=1.21.2 ; python_full_version >= '3.10'
-  - numpy>=1.23.3 ; python_full_version >= '3.11'
-  - numpy>=1.26.0 ; python_full_version >= '3.12'
-  - numpy>=2.1.0 ; python_full_version >= '3.13'
-  - absl-py ; extra == 'dev'
-  - pytest ; extra == 'dev'
-  - pytest-xdist ; extra == 'dev'
-  - pylint>=2.6.0 ; extra == 'dev'
-  - pyink ; extra == 'dev'
-  requires_python: '>=3.9'
 - pypi: https://files.pythonhosted.org/packages/a9/80/19189ea605017473660e43762dc853d2797984b3c7bf30ce656099add30c/ml_dtypes-0.5.4-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
   name: ml-dtypes
   version: 0.5.4
@@ -18790,21 +20690,6 @@ packages:
   - python_abi 3.14.* *_cp314
   - numpy >=1.23,<3
   license: BSD-2-Clause
-- pypi: https://files.pythonhosted.org/packages/10/a7/cfbe475c35371cae1358e61f20c5f075badc18c4797ab4354140e1d283cf/numpy-2.4.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
-  name: numpy
-  version: 2.4.1
-  sha256: 52b5f61bdb323b566b528899cc7db2ba5d1015bda7ea811a8bcf3c89c331fa42
-  requires_python: '>=3.11'
-- pypi: https://files.pythonhosted.org/packages/7e/bb/c6513edcce5a831810e2dddc0d3452ce84d208af92405a0c2e58fd8e7881/numpy-2.4.1-cp314-cp314-win_amd64.whl
-  name: numpy
-  version: 2.4.1
-  sha256: 7d5d7999df434a038d75a748275cd6c0094b0ecdb0837342b332a82defc4dc4d
-  requires_python: '>=3.11'
-- pypi: https://files.pythonhosted.org/packages/c0/c4/2e7908915c0e32ca636b92e4e4a3bdec4cb1e7eb0f8aedf1ed3c68a0d8cd/numpy-2.4.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl
-  name: numpy
-  version: 2.4.1
-  sha256: 5d558123217a83b2d1ba316b986e9248a1ed1971ad495963d555ccd75dcb1556
-  requires_python: '>=3.11'
 - conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.2.6-py310hefbff90_0.conda
   sha256: 0ba94a61f91d67413e60fa8daa85627a8f299b5054b0eff8f93d26da83ec755e
   md5: b0cea2c364bf65cd19e023040eeab05d
@@ -18905,6 +20790,26 @@ packages:
   - pkg:pypi/numpy?source=hash-mapping
   size: 8983076
   timestamp: 1766383421113
+- conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.4.1-py314h2b28147_0.conda
+  sha256: 9af4bb8fef69f8b3c254b32da93bc63b7376b60b72c6ed9104fd3ad23a70891c
+  md5: 9536e29f857e5d0565e92fd1b54de16a
+  depends:
+  - python
+  - __glibc >=2.17,<3.0.a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - libcblas >=3.9.0,<4.0a0
+  - liblapack >=3.9.0,<4.0a0
+  - python_abi 3.14.* *_cp314
+  - libblas >=3.9.0,<4.0a0
+  constrains:
+  - numpy-base <0a0
+  license: BSD-3-Clause
+  license_family: BSD
+  purls:
+  - pkg:pypi/numpy?source=compressed-mapping
+  size: 8926121
+  timestamp: 1768085696128
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-2.2.6-py310h6e5608f_0.conda
   sha256: d7234b9c45e4863c7d4c5221c1e91d69b0e0009464bf361c3fea47e64dc4adc2
   md5: 9e9f1f279eb02c41bda162a42861adc0
@@ -19005,6 +20910,26 @@ packages:
   - pkg:pypi/numpy?source=hash-mapping
   size: 7815157
   timestamp: 1766383452981
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-2.4.1-py314haac167e_0.conda
+  sha256: 807cbff20a80d6975e6b52eb2a4629e53954ae36d3cf77ebf6caa525a01e06e8
+  md5: b95e050a9b3267193cda87e65b85c75d
+  depends:
+  - python
+  - libstdcxx >=14
+  - libgcc >=14
+  - python 3.14.* *_cp314
+  - libblas >=3.9.0,<4.0a0
+  - libcblas >=3.9.0,<4.0a0
+  - python_abi 3.14.* *_cp314
+  - liblapack >=3.9.0,<4.0a0
+  constrains:
+  - numpy-base <0a0
+  license: BSD-3-Clause
+  license_family: BSD
+  purls:
+  - pkg:pypi/numpy?source=hash-mapping
+  size: 8004218
+  timestamp: 1768085695526
 - conda: https://conda.anaconda.org/conda-forge/win-64/numpy-2.2.6-py310h4987827_0.conda
   sha256: 6f628e51763b86a535a723664e3aa1e38cb7147a2697f80b75c1980c1ed52f3e
   md5: d2596785ac2cf5bab04e2ee9e5d04041
@@ -19105,6 +21030,26 @@ packages:
   - pkg:pypi/numpy?source=compressed-mapping
   size: 7584934
   timestamp: 1766383321713
+- conda: https://conda.anaconda.org/conda-forge/win-64/numpy-2.4.1-py314h06c3c77_0.conda
+  sha256: 4bcbbe320525c49f2ddf61123e4281ff76d2ba9a737dea90e14370534c6ec1f9
+  md5: 794ac87f08dcca30be8c6ebfa8a5b2d1
+  depends:
+  - python
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - python_abi 3.14.* *_cp314
+  - liblapack >=3.9.0,<4.0a0
+  - libcblas >=3.9.0,<4.0a0
+  - libblas >=3.9.0,<4.0a0
+  constrains:
+  - numpy-base <0a0
+  license: BSD-3-Clause
+  license_family: BSD
+  purls:
+  - pkg:pypi/numpy?source=hash-mapping
+  size: 7306379
+  timestamp: 1768085588568
 - conda: https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.10.0-pyhcf101f3_0.conda
   sha256: 482d94fce136c4352b18c6397b9faf0a3149bfb12499ab1ffebad8db0cb6678f
   md5: 3aa4b625f20f55cf68e92df5e5bf3c39
@@ -19369,20 +21314,6 @@ packages:
   - pkg:pypi/psutil?source=hash-mapping
   size: 228170
   timestamp: 1767012382363
-- conda: https://conda.anaconda.org/conda-forge/linux-64/psutil-7.2.1-py314h3f2afee_0.conda
-  sha256: 3571148467c49837027099ec5c4bbb5473202917d66279a317f05896bd7586e7
-  md5: b2911c190fb8f5eb62be3a60adb105db
-  depends:
-  - python
-  - libgcc >=14
-  - __glibc >=2.17,<3.0.a0
-  - python_abi 3.14.* *_cp314t
-  license: BSD-3-Clause
-  license_family: BSD
-  purls:
-  - pkg:pypi/psutil?source=hash-mapping
-  size: 228971
-  timestamp: 1767012384426
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/psutil-7.2.1-py310hef25091_0.conda
   sha256: d6deeea23c1c40be15d8ac4171f00ebac2a2028bb09152151ec3d0a479018f31
   md5: 6d96240ee0dcba494ab8ed1b8517bdf5
@@ -19880,9 +21811,10 @@ packages:
   size: 36790521
   timestamp: 1765021515427
   python_site_packages_path: lib/python3.14/site-packages
-- conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.2-he1279bd_0_cp314t.conda
-  sha256: 79a4be7901d977858bdf1b0024b30360d8448e30fe38bece903f855b21b88cf6
-  md5: 08a2a24f4e6907bea0ebfe22eecae6be
+- conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.2-h32b2ec7_101_cp314.conda
+  build_number: 101
+  sha256: 24719868a471dd94041aa9873c6f87adf3b86c07878ad4e242ac97228f9e6460
+  md5: 051f60a9d1e3aae7160d173aeb7029f8
   depends:
   - __glibc >=2.17,<3.0.a0
   - bzip2 >=1.0.8,<2.0a0
@@ -19890,25 +21822,23 @@ packages:
   - libexpat >=2.7.3,<3.0a0
   - libffi >=3.5.2,<3.6.0a0
   - libgcc >=14
-  - liblzma >=5.8.1,<6.0a0
+  - liblzma >=5.8.2,<6.0a0
   - libmpdec >=4.0.0,<5.0a0
-  - libsqlite >=3.51.1,<4.0a0
-  - libuuid >=2.41.2,<3.0a0
+  - libsqlite >=3.51.2,<4.0a0
+  - libuuid >=2.41.3,<3.0a0
   - libzlib >=1.3.1,<2.0a0
   - ncurses >=6.5,<7.0a0
   - openssl >=3.5.4,<4.0a0
-  - python_abi 3.14.* *_cp314t
-  - readline >=8.2,<9.0a0
+  - python_abi 3.14.* *_cp314
+  - readline >=8.3,<9.0a0
   - tk >=8.6.13,<8.7.0a0
   - tzdata
   - zstd >=1.5.7,<1.6.0a0
-  track_features:
-  - py_freethreading
   license: Python-2.0
   purls: []
-  size: 47658766
-  timestamp: 1765021403755
-  python_site_packages_path: lib/python3.14t/site-packages
+  size: 36833080
+  timestamp: 1769458770373
+  python_site_packages_path: lib/python3.14/site-packages
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/python-3.10.19-h28be5d3_2_cpython.conda
   build_number: 2
   sha256: 9bdbc749cd9ee99ae4d72116aad5140e908fdf1215a417375f5e351f96372c77
@@ -20216,17 +22146,6 @@ packages:
   purls: []
   size: 6989
   timestamp: 1752805904792
-- conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314t.conda
-  build_number: 8
-  sha256: d9ed2538fba61265a330ee1b1afe99a4bb23ace706172b9464546c7e01259d63
-  md5: 3251796e09870c978e0f69fa05e38fb6
-  constrains:
-  - python 3.14.* *_cp314t
-  license: BSD-3-Clause
-  license_family: BSD
-  purls: []
-  size: 7020
-  timestamp: 1752805919426
 - conda: https://conda.anaconda.org/conda-forge/linux-64/pytorch-2.9.1-cuda129_mkl_py312_h2ff76c1_302.conda
   sha256: 3f27e7b4da22d6a6c61f16f3bf82b4766e128d8339380eca1ecb769a85a975de
   md5: 062a64a99d83ebf707bc5ee5fa32ff50
diff --git a/pixi.toml b/pixi.toml
index fa264f1bf..ac52447da 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -77,6 +77,7 @@ cffi = ">=1"
 pytest = ">=8,<9"
 pytest-xdist = ">=3.8"
 pytest-benchmark = ">=5.1"
+cupy = "*"
 
 [feature.test.pypi-dependencies]
 ml_dtypes = "*"

From 90213115d3b633bf6483c8300e4b7bb183e31fbb Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 27 Jan 2026 12:44:36 -0800
Subject: [PATCH 37/60] fix several tests

---
 .../numba/cuda/tests/cudapy/test_debuginfo.py |   2 +-
 .../numba/cuda/tests/cudapy/test_vectorize.py | 137 +++++++++++-------
 2 files changed, 83 insertions(+), 56 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
index c7ad945cd..cf36bf271 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
@@ -669,7 +669,7 @@ def foo(x):
             with warnings.catch_warnings(record=True) as w:
                 warnings.simplefilter("always", NumbaDebugInfoWarning)
                 ignore_internal_warnings()
-                foo[1, 1](cuda.to_device(np.zeros(1, dtype=np.int32)))
+                foo[1, 1](cp.asarray(np.zeros(1, dtype=np.int32)))
 
             # Filter for NumbaDebugInfoWarning specifically
             debug_warnings = [
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
index 222dee7d0..99d8f786d 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
@@ -13,6 +13,9 @@
 from numba.cuda.testing import skip_on_cudasim
 from numba.cuda.testing import CUDATestCase
 import unittest
+import cupy as cp
+import pytest
+from numba.cuda.testing import DeprecatedDeviceArrayApiWarning
 
 
 # Signatures to test with - these are all homogeneous in dtype, so the output
@@ -85,13 +88,15 @@ def test_1d_async(self):
             def vector_add(a, b):
                 return a + b
 
-            stream = cuda.stream()
+            nb_stream = cuda.stream()
+            stream = cp.cuda.Stream()
 
             for ty in dtypes:
                 data = np.array(np.random.random(self.N), dtype=ty)
-                device_data = cuda.to_device(data, stream)
+                with stream:
+                    device_data = cp.asarray(data)
 
-                dresult = vector_add(device_data, device_data, stream=stream)
+                dresult = vector_add(device_data, device_data, stream=nb_stream)
                 actual = dresult.copy_to_host()
 
                 expected = np.add(data, data)
@@ -160,14 +165,16 @@ def test_reduce_async(self):
             def vector_add(a, b):
                 return a + b
 
-            stream = cuda.stream()
+            nb_stream = cuda.stream()
+            stream = cp.cuda.Stream()
             dtype = np.int32
 
             for n in input_sizes:
                 x = np.arange(n, dtype=dtype)
                 expected = np.add.reduce(x)
-                dx = cuda.to_device(x, stream)
-                actual = vector_add.reduce(dx, stream=stream)
+                with stream:
+                    dx = cp.asarray(x)
+                actual = vector_add.reduce(dx, stream=nb_stream)
                 np.testing.assert_allclose(expected, actual)
                 # Compare against the input dtype as in test_reduce().
                 self.assertEqual(dtype, actual.dtype)
@@ -181,7 +188,7 @@ def vector_add(a, b):
 
             n = 10
             x = np.arange(n, dtype=np.int32)
-            dx = cuda.to_device(x)
+            dx = cp.asarray(x)
             expected = x + x
             actual = vector_add(x, dx).copy_to_host()
             np.testing.assert_equal(expected, actual)
@@ -196,11 +203,11 @@ def vector_add(a, b):
 
             n = 10
             x = np.arange(n, dtype=np.int32).reshape(2, 5)
-            dx = cuda.to_device(x)
+            dx = cp.asarray(x)
             vector_add(dx, dx, out=dx)
 
             expected = x + x
-            actual = dx.copy_to_host()
+            actual = dx.get()
             np.testing.assert_equal(expected, actual)
             self.assertEqual(expected.dtype, actual.dtype)
 
@@ -256,56 +263,76 @@ def bar(x):
 
             self.assertEqual(bar.__name__, "bar")
 
-    def test_no_transfer_for_device_data(self):
-        for vectorize in vectorize_funcs:
-            # Initialize test data on the device prior to banning host <-> device
-            # transfer
-
-            noise = np.random.randn(1, 3, 64, 64).astype(np.float32)
-            noise = cuda.to_device(noise)
-
-            # A mock of a CUDA function that always raises a CudaAPIError
-
-            def raising_transfer(*args, **kwargs):
-                raise CudaAPIError(999, "Transfer not allowed")
-
-            # Use the mock for transfers between the host and device
-
-            old_HtoD = getattr(driver, "cuMemcpyHtoD", None)
-            old_DtoH = getattr(driver, "cuMemcpyDtoH", None)
 
-            driver.cuMemcpyHtoD = raising_transfer
-            driver.cuMemcpyDtoH = raising_transfer
-
-            # Ensure that the mock functions are working as expected
+@skip_on_cudasim("ufunc API unsupported in the simulator")
+class TestCUDAVectorizeNoTransfer(CUDATestCase):
+    """Test that vectorize operations on device data don't induce transfers."""
+
+    def setUp(self):
+        """Set up mocks to block host <-> device transfers."""
+        super().setUp()
+
+        # Initialize test data on the device prior to banning host <-> device
+        # transfer
+        self.noise = np.random.randn(1, 3, 64, 64).astype(np.float32)
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
+            self.device_noise = cuda.to_device(self.noise)
+
+        # A mock of a CUDA function that always raises a CudaAPIError
+        def raising_transfer(*args, **kwargs):
+            raise CudaAPIError(999, "Transfer not allowed")
+
+        # Save the original implementations
+        self.old_HtoD = getattr(driver, "cuMemcpyHtoD", None)
+        self.old_DtoH = getattr(driver, "cuMemcpyDtoH", None)
+
+        # Replace with mocks that prevent transfers
+        driver.cuMemcpyHtoD = raising_transfer
+        driver.cuMemcpyDtoH = raising_transfer
+
+    def tearDown(self):
+        """Restore original transfer functions."""
+        # Replace our mocks with the original implementations. If there was
+        # no original implementation, simply remove ours.
+        if self.old_HtoD is not None:
+            driver.cuMemcpyHtoD = self.old_HtoD
+        else:
+            if hasattr(driver, "cuMemcpyHtoD"):
+                del driver.cuMemcpyHtoD
+
+        if self.old_DtoH is not None:
+            driver.cuMemcpyDtoH = self.old_DtoH
+        else:
+            if hasattr(driver, "cuMemcpyDtoH"):
+                del driver.cuMemcpyDtoH
+
+        super().tearDown()
+
+    def test_mock_blocks_device_to_host_transfer(self):
+        """Verify that the mock successfully blocks device-to-host transfers."""
+        with self.assertRaisesRegex(CudaAPIError, "Transfer not allowed"):
+            self.device_noise.copy_to_host()
+
+    def test_mock_blocks_host_to_device_transfer(self):
+        """Verify that the mock successfully blocks host-to-device transfers."""
+        with self.assertRaisesRegex(CudaAPIError, "Transfer not allowed"):
+            with pytest.warns(DeprecatedDeviceArrayApiWarning):
+                cuda.to_device([1])
 
-            with self.assertRaisesRegex(CudaAPIError, "Transfer not allowed"):
-                noise.copy_to_host()
+    def test_vectorize_with_device_data_no_transfer(self):
+        """Test that vectorize operations on device data don't induce transfers."""
+        for vectorize in vectorize_funcs:
+            # Define and call a ufunc with data on the device
+            # This should not induce any transfers (which would raise CudaAPIError)
+            @vectorize(["float32(float32)"])
+            def func(noise):
+                return noise + 1.0
 
-            with self.assertRaisesRegex(CudaAPIError, "Transfer not allowed"):
-                cuda.to_device([1])
+            # This should succeed without raising CudaAPIError
+            result = func(self.device_noise)
 
-            try:
-                # Check that defining and calling a ufunc with data on the device
-                # induces no transfers
-
-                @vectorize(["float32(float32)"])
-                def func(noise):
-                    return noise + 1.0
-
-                func(noise)
-            finally:
-                # Replace our mocks with the original implementations. If there was
-                # no original implementation, simply remove ours.
-
-                if old_HtoD is not None:
-                    driver.cuMemcpyHtoD = old_HtoD
-                else:
-                    del driver.cuMemcpyHtoD
-                if old_DtoH is not None:
-                    driver.cuMemcpyDtoH = old_DtoH
-                else:
-                    del driver.cuMemcpyDtoH
+            # Verify the result is still on the device and has the right shape
+            self.assertEqual(result.shape, self.device_noise.shape)
 
 
 if __name__ == "__main__":

From 23cafef8620b11f3a247a922048fe4c9ee8e62b0 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 27 Jan 2026 12:54:39 -0800
Subject: [PATCH 38/60] attempt to fix simulator

---
 .../cuda/simulator/cudadrv/devicearray.py     | 23 +++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py b/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
index ecf505f52..50d9b1ceb 100644
--- a/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
@@ -112,8 +112,27 @@ def __init__(self, ary, stream=0):
         self.stream = stream
 
     @classmethod
-    def _create_nowarn(ary, stream):
-        return FakeCUDAArray(ary, stream)
+    def _create_nowarn(cls, shape, strides, dtype, stream=0, gpu_data=None):
+        """Create a FakeCUDAArray without the deprecation warning.
+
+        This matches the signature of DeviceNDArrayBase.__init__() but
+        creates a numpy array from the parameters since FakeCUDAArray
+        wraps numpy arrays.
+        """
+        if isinstance(shape, int):
+            shape = (shape,)
+        else:
+            shape = tuple(shape)
+
+        # Create dtype
+        dtype = np.dtype(dtype)
+
+        # For the simulator, we create a simple numpy array with the given shape
+        # and dtype. The strides parameter is typically standard C-order strides,
+        # so numpy's default behavior should work fine for most cases.
+        ary = np.empty(shape, dtype=dtype)
+
+        return FakeCUDAArray(ary, stream=stream)
 
     @property
     def _numba_type_(self):

From f317bb1c1fb2f1420962c96f972e7b380952ec41 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 27 Jan 2026 15:18:34 -0800
Subject: [PATCH 39/60] fix more tests

---
 .../numba/cuda/tests/cudapy/test_bfloat16.py     | 16 ++++++++--------
 .../numba/cuda/tests/cudapy/test_dispatcher.py   |  8 ++++----
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py b/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
index 169bae9d3..1f700d2f9 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
@@ -414,12 +414,12 @@ def kernel(test_val, i1, i2, i3, i4, u1, u2, u3, u4):
         self.assertEqual(i1[0], 1)
         self.assertEqual(u1[0], 1)
 
-        np.testing.assert_equal(i2, np.array([2, 1, 1, 2], "int16"))
-        np.testing.assert_equal(i3, np.array([2, 1, 1, 2], "int32"))
-        np.testing.assert_equal(i4, np.array([2, 1, 1, 2], "int64"))
-        np.testing.assert_equal(u2, np.array([2, 1, 1, 2], "uint16"))
-        np.testing.assert_equal(u3, np.array([2, 1, 1, 2], "uint32"))
-        np.testing.assert_equal(u4, np.array([2, 1, 1, 2], "uint64"))
+        np.testing.assert_equal(i2.get(), np.array([2, 1, 1, 2], "int16"))
+        np.testing.assert_equal(i3.get(), np.array([2, 1, 1, 2], "int32"))
+        np.testing.assert_equal(i4.get(), np.array([2, 1, 1, 2], "int64"))
+        np.testing.assert_equal(u2.get(), np.array([2, 1, 1, 2], "uint16"))
+        np.testing.assert_equal(u3.get(), np.array([2, 1, 1, 2], "uint32"))
+        np.testing.assert_equal(u4.get(), np.array([2, 1, 1, 2], "uint64"))
 
     def test_from_integer_conversions(self):
         self.skip_unsupported()
@@ -492,7 +492,7 @@ def kernel(out):
 
         out = cp.zeros((24,), dtype="int16")
         kernel[1, 1](out)
-        res = out.copy_to_host()
+        res = out.get()
 
         i2 = np.int16(789).astype(mldtypes_bf16).view("int16")
         i3 = np.int32(789).astype(mldtypes_bf16).view("int16")
@@ -556,7 +556,7 @@ def kernel(out):
 
         out = cp.zeros((6,), dtype="int16")
         kernel[1, 1](out)
-        raw = out.copy_to_host()
+        raw = out.get()
 
         f4_expected = (
             np.array([test_val] * 4, "float32")
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py b/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
index 6d8bde719..798ef5d2d 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
@@ -863,9 +863,9 @@ def add_one(x):
                     if i < x.size:
                         x[i] = i + 1
 
-                d_x = cuda.to_device(x)
+                d_x = cp.asarray(x)
                 add_one[1, 10](d_x)
-                np.testing.assert_array_equal(d_x.copy_to_host(), expected)
+                np.testing.assert_array_equal(d_x.get(), expected)
 
                 # with signature
                 @cuda.jit("void(int32[:])", shared_memory_carveout=carveout)
@@ -874,9 +874,9 @@ def add_one_sig(x):
                     if i < x.size:
                         x[i] = i + 1
 
-                d_x = cuda.to_device(x)
+                d_x = cp.asarray(x)
                 add_one_sig[1, 10](d_x)
-                np.testing.assert_array_equal(d_x.copy_to_host(), expected)
+                np.testing.assert_array_equal(d_x.get(), expected)
 
 
 if __name__ == "__main__":

From a6614a3b9001de7a084b7e83e705f87c59e15c44 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 28 Jan 2026 10:32:38 -0800
Subject: [PATCH 40/60] numpy under cudasim, else cupy

---
 .../numba/cuda/tests/cudadrv/test_context_stack.py    |  7 ++++++-
 numba_cuda/numba/cuda/tests/cudadrv/test_events.py    |  7 ++++++-
 .../numba/cuda/tests/cudapy/test_array_methods.py     |  6 +++++-
 numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py   |  6 +++++-
 .../numba/cuda/tests/cudapy/test_blackscholes.py      |  8 ++++++--
 numba_cuda/numba/cuda/tests/cudapy/test_caching.py    |  7 ++++++-
 .../numba/cuda/tests/cudapy/test_cuda_jit_no_types.py |  7 ++++++-
 .../numba/cuda/tests/cudapy/test_device_func.py       |  8 ++++++--
 numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py |  6 +++++-
 numba_cuda/numba/cuda/tests/cudapy/test_idiv.py       |  8 ++++++--
 numba_cuda/numba/cuda/tests/cudapy/test_laplace.py    |  6 +++++-
 numba_cuda/numba/cuda/tests/cudapy/test_matmul.py     |  6 +++++-
 numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py   |  7 ++++++-
 numba_cuda/numba/cuda/tests/cudapy/test_nondet.py     |  8 ++++++--
 numba_cuda/numba/cuda/tests/cudapy/test_reduction.py  | 11 ++++++++---
 numba_cuda/numba/cuda/tests/cudapy/test_sm.py         |  7 ++++++-
 numba_cuda/numba/cuda/tests/cudapy/test_ssa.py        |  8 ++++++--
 .../numba/cuda/tests/cudapy/test_vectorize_decor.py   |  7 ++++++-
 .../numba/cuda/tests/doc_examples/test_matmul.py      |  1 +
 .../numba/cuda/tests/doc_examples/test_random.py      |  1 +
 numba_cuda/numba/cuda/tests/nrt/test_nrt.py           |  6 +++++-
 21 files changed, 112 insertions(+), 26 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py b/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
index 0a82dd6db..b8f0808da 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
@@ -4,9 +4,14 @@
 import numbers
 
 from numba import cuda
+from numba.cuda import config
 from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
 from numba.cuda.cudadrv import driver
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 
 
 class TestContextStack(CUDATestCase):
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
index eb832819d..36a2be623 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
@@ -3,10 +3,15 @@
 
 import numpy as np
 from numba import cuda
+from numba.cuda import config
 from numba.cuda.testing import unittest, CUDATestCase
 from numba.cuda._compat import Device
 from numba.cuda.testing import skip_on_cudasim
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 
 
 class TestCudaEvent(CUDATestCase):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py b/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
index f4d951a43..8a6c7a8dd 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
@@ -6,7 +6,11 @@
 from numba.cuda.testing import CUDATestCase
 import unittest
 from numba.cuda import config
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 
 
 def reinterpret_array_type(byte_arr, start, stop, output):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py b/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
index 1f700d2f9..8db9af11d 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
@@ -16,7 +16,11 @@
     uint64,
 )
 from numba.cuda import config
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 
 if not config.ENABLE_CUDASIM:
     from numba.cuda.bf16 import (
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
index 5af622a2a..61f2cd29b 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
@@ -4,9 +4,13 @@
 import numpy as np
 import math
 from numba import cuda
-from numba.cuda import double, void
+from numba.cuda import config, double, void
 from numba.cuda.testing import unittest, CUDATestCase
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 
 RISKFREE = 0.02
 VOLATILITY = 0.30
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_caching.py b/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
index 519f6ae1a..e554b6a6a 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
@@ -9,6 +9,7 @@
 import sys
 import stat
 import subprocess
+from numba.cuda import config
 
 from numba import cuda
 from numba.cuda.core.errors import NumbaWarning
@@ -26,7 +27,11 @@
     import_dynamic,
 )
 import numpy as np
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 from pickle import PicklingError
 
 # Module-level global for testing that caching rejects global device arrays
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
index 20adf1bae..1633cb1d3 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
@@ -2,11 +2,16 @@
 # SPDX-License-Identifier: BSD-2-Clause
 
 from numba import cuda
+from numba.cuda import config
 import numpy as np
 from numba.cuda.testing import CUDATestCase
 from numba.cuda.tests.support import override_config
 import unittest
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 
 
 class TestCudaJitNoTypes(CUDATestCase):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
index b8744853c..002165013 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
@@ -19,11 +19,15 @@
 from numba.cuda.tests.support import skip_unless_cffi
 from numba.cuda.testing import skip_on_standalone_numba_cuda
 from types import ModuleType
-from numba.cuda import HAS_NUMBA
+from numba.cuda import HAS_NUMBA, config
 
 if HAS_NUMBA:
     from numba import jit
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 
 
 class TestDeviceFunc(CUDATestCase):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py b/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
index 798ef5d2d..5c9d9ec1f 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
@@ -24,7 +24,11 @@
     CUDATestCase,
 )
 import math
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 
 
 def add(x, y):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py b/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
index 21242e833..f131da9fa 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
@@ -3,9 +3,13 @@
 
 import numpy as np
 from numba import cuda
-from numba.cuda import float32, float64, int32, void
+from numba.cuda import config, float32, float64, int32, void
 from numba.cuda.testing import unittest, CUDATestCase
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 
 
 class TestCudaIDiv(CUDATestCase):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py b/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
index 4673cc2a9..186042a15 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
@@ -6,7 +6,11 @@
 from numba.cuda import float64, void
 from numba.cuda.testing import unittest, CUDATestCase
 from numba.cuda.core import config
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 
 # NOTE: CUDA kernel does not return any value
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
index 5742bbad8..c0bdf3663 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
@@ -7,7 +7,11 @@
 from numba.cuda import float32, void
 from numba.cuda.testing import unittest, CUDATestCase
 from numba.cuda.core import config
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 
 # Ensure the test takes a reasonable amount of time in the simulator
 if config.ENABLE_CUDASIM:
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py b/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py
index ec15e5ef6..21c9b429f 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py
@@ -2,11 +2,16 @@
 # SPDX-License-Identifier: BSD-2-Clause
 
 from numba import cuda
+from numba.cuda import config
 import numpy as np
 from numba.cuda.testing import skip_on_cudasim, CUDATestCase
 import threading
 import unittest
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 
 
 class TestMultiGPUContext(CUDATestCase):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py b/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
index 6835d6cd0..6ca3aa84e 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
@@ -3,9 +3,13 @@
 
 import numpy as np
 from numba import cuda
-from numba.cuda import float32, void
+from numba.cuda import config, float32, void
 from numba.cuda.testing import unittest, CUDATestCase
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 
 
 def generate_input(n):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py b/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py
index 1c4ecbf17..70440a476 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py
@@ -3,10 +3,15 @@
 
 import numpy as np
 from numba import cuda
-from numba.cuda.core.config import ENABLE_CUDASIM
+from numba.cuda import config
 from numba.cuda.testing import CUDATestCase
 import unittest
-import cupy as cp
+from numba.cuda import config
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 
 # Avoid recompilation of the sum_reduce function by keeping it at global scope
 sum_reduce = cuda.Reduce(lambda a, b: a + b)
@@ -20,7 +25,7 @@ def _sum_reduce(self, n):
         self.assertEqual(expect, got)
 
     def test_sum_reduce(self):
-        if ENABLE_CUDASIM:
+        if config.ENABLE_CUDASIM:
             # Minimal test set for the simulator (which only wraps
             # functools.reduce)
             test_sizes = [1, 16]
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
index 4d31f71ec..0b012e8a5 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
@@ -17,8 +17,13 @@
 )
 
 import numpy as np
+from numba.cuda import config
 from numba.cuda.np import numpy_support as nps
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 from .extensions_usecases import struct_model_type, MyStruct
 import pytest
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py b/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
index 0f1378c58..1326334da 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
@@ -12,13 +12,17 @@
 
 from numba.cuda import types
 from numba import cuda
-from numba.cuda import jit
+from numba.cuda import config, jit
 from numba.cuda.core import errors
 
 from numba.cuda.extending import overload
 from numba.cuda.tests.support import override_config
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 
 _DEBUG = False
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
index 2d757d6fe..c04396b71 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
@@ -5,9 +5,14 @@
 import math
 
 from numba.cuda import vectorize, int32, uint32, float32, float64
+from numba.cuda import config
 from numba.cuda.testing import skip_on_cudasim, CUDATestCase
 from numba.cuda.tests.support import CheckWarningsMixin
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 import unittest
 
 
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py b/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
index ee7450be7..64947a16e 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
@@ -11,6 +11,7 @@
 """
 
 import unittest
+
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
 from numba.cuda.tests.support import captured_stdout
 import cupy as cp
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_random.py b/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
index 3a1ca4643..13be2930f 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
@@ -5,6 +5,7 @@
 # "magictoken" is used for markers as beginning and ending of example text.
 
 import unittest
+
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
 import cupy as cp
 
diff --git a/numba_cuda/numba/cuda/tests/nrt/test_nrt.py b/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
index 77f04afe2..bbeecbeb8 100644
--- a/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
+++ b/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
@@ -23,7 +23,11 @@
     Archive,
     Object,
 )
-import cupy as cp
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    import cupy as cp
 
 TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
 

From cf5b38dbd84cd56a96b71d13eae2b4db7675b643 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Fri, 30 Jan 2026 08:48:36 -0800
Subject: [PATCH 41/60] dont require cupy to find optional deps

---
 numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py | 2 +-
 numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
index aa9e8575c..0ea89ef94 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
@@ -96,7 +96,7 @@ def copy(arr, out):
                 for j in range(arr.shape[1]):
                     out[i, j] = arr[i, j]
 
-        arr = cp.asfortranarray(cp.random.random((10, 10)))
+        arr = cp.asarray(np.asfortranarray(np.random.random((10, 10))))
         out = cp.empty_like(arr)
         copy[1, 1](arr, out)
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
index 73573daaf..826f76d36 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
@@ -28,7 +28,7 @@ def use_foo(x):
         foo[1, 1](x)
         return x
 
-    arrays = [cp.arange(10) for i in range(10)]
+    arrays = [cp.asarray(np.arange(10)) for i in range(10)]
     expected = np.arange(10)
     expected[0] += 1
     with ThreadPoolExecutor(max_workers=4) as e:

From 51a6349de7751e8fd8e4f786b243e616022f31e7 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Sat, 31 Jan 2026 13:38:39 -0800
Subject: [PATCH 42/60] get()

---
 .../cuda/tests/cudapy/test_array_methods.py     |  2 +-
 .../cuda/tests/cudapy/test_blackscholes.py      | 10 ++++++++--
 .../cuda/tests/cudapy/test_cuda_jit_no_types.py |  4 ++--
 numba_cuda/numba/cuda/tests/cudapy/test_idiv.py |  4 ++--
 .../numba/cuda/tests/cudapy/test_laplace.py     | 17 ++++++++++++++---
 .../numba/cuda/tests/cudapy/test_matmul.py      |  7 +++++--
 .../numba/cuda/tests/cudapy/test_nondet.py      |  9 +++++++--
 numba_cuda/numba/cuda/tests/cudapy/test_sm.py   |  2 +-
 numba_cuda/numba/cuda/tests/cudapy/test_ssa.py  |  6 +++++-
 9 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py b/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
index 8a6c7a8dd..cb8a42054 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
@@ -60,7 +60,7 @@ def kernel(out):
         out = cp.asarray(np.zeros(len(val), dtype="float64"))
 
         kernel[1, 1](out)
-        for i, j in zip(out.get(), val):
+        for i, j in zip(out.get() if not config.ENABLE_CUDASIM else out, val):
             self.assertEqual(i, j)
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
index 61f2cd29b..eb9d503d1 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
@@ -153,8 +153,14 @@ def black_scholes_cuda(callResult, putResult, S, X, T, R, V):
             )
 
         with stream:
-            callResultNumba = d_callResult.get()
-            putResultNumba = d_putResult.get()
+            callResultNumba = (
+                d_callResult.get()
+                if not config.ENABLE_CUDASIM
+                else d_callResult
+            )
+            putResultNumba = (
+                d_putResult.get() if not config.ENABLE_CUDASIM else d_putResult
+            )
 
         stream.synchronize()
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
index 1633cb1d3..1cd3796f3 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
@@ -33,7 +33,7 @@ def foo(x, y):
 
         foo[10, 1](dx, dy)
 
-        y = dy.get()
+        y = dy.get() if not config.ENABLE_CUDASIM else dy
 
         self.assertTrue(np.all(x == y))
 
@@ -84,7 +84,7 @@ def outer(argin, argout):
 
             outer[1, 1, nb_stream](d_a, d_b)
 
-            b = d_b.get()
+            b = d_b.get() if not config.ENABLE_CUDASIM else d_b
 
         self.assertEqual(b[0], (a[0] + 1) + (2 + 1))
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py b/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
index f131da9fa..47a8cbcc2 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
@@ -22,7 +22,7 @@ def div(grid, l_x, l_y):
 
         grid = cp.ones((2, 2), dtype=np.float32)
         div[1, 1](grid, 2, 2)
-        y = grid.get()
+        y = grid.get() if not config.ENABLE_CUDASIM else grid
         self.assertTrue(np.all(y == 0.5))
 
     def test_inplace_div_double(self):
@@ -34,7 +34,7 @@ def div_double(grid, l_x, l_y):
 
         grid = cp.ones((2, 2), dtype=np.float64)
         div_double[1, 1](grid, 2, 2)
-        y = grid.get()
+        y = grid.get() if not config.ENABLE_CUDASIM else grid
         self.assertTrue(np.all(y == 0.5))
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py b/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
index 186042a15..9c1dff9d2 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
@@ -6,6 +6,7 @@
 from numba.cuda import float64, void
 from numba.cuda.testing import unittest, CUDATestCase
 from numba.cuda.core import config
+from contextlib import nullcontext
 
 if config.ENABLE_CUDASIM:
     import numpy as cp
@@ -98,8 +99,14 @@ def jocabi_relax_core(A, Anew, error):
 
         error_grid = np.zeros(griddim)
 
-        cp_stream = cp.cuda.Stream()
-        stream = cuda.api.external_stream(cp_stream.ptr)
+        cp_stream = (
+            cp.cuda.Stream() if not config.ENABLE_CUDASIM else nullcontext()
+        )
+        stream = (
+            cuda.api.external_stream(cp_stream.ptr)
+            if not config.ENABLE_CUDASIM
+            else cuda.stream()
+        )
 
         with cp_stream:
             dA = cp.asarray(A)  # to device and don't come back
@@ -112,7 +119,11 @@ def jocabi_relax_core(A, Anew, error):
             jocabi_relax_core[griddim, blockdim, stream](dA, dAnew, derror_grid)
 
             with cp_stream:
-                error_grid = derror_grid.get()
+                error_grid = (
+                    derror_grid.get()
+                    if not config.ENABLE_CUDASIM
+                    else derror_grid
+                )
 
             error = np.abs(error_grid).max()
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
index c0bdf3663..0d1b9aeca 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
@@ -63,10 +63,13 @@ def cu_square_matrix_mul(A, B, C):
         dC = cp.empty_like(dA)
 
         cu_square_matrix_mul[(bpg, bpg), (tpb, tpb)](dA, dB, dC)
-        C = dC.get()
+        C = dC.get() if not config.ENABLE_CUDASIM else dC
 
         # Host compute
-        Cans = np.dot(dA.get(), dB.get())
+        Cans = np.dot(
+            dA.get() if not config.ENABLE_CUDASIM else dA,
+            dB.get() if not config.ENABLE_CUDASIM else dB,
+        )
 
         # Check result
         np.testing.assert_allclose(C, Cans, rtol=1e-5)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py b/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
index 6ca3aa84e..6222a543a 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
@@ -46,8 +46,13 @@ def diagproduct(c, a, b):
 
         diagproduct[griddim, blockdim](dF, dA, dB)
 
-        E = np.dot(dA.get(), np.diag(dB.get()))
-        np.testing.assert_array_almost_equal(dF.get(), E)
+        E = np.dot(
+            dA.get() if not config.ENABLE_CUDASIM else dA,
+            np.diag(dB.get() if not config.ENABLE_CUDASIM else dB),
+        )
+        np.testing.assert_array_almost_equal(
+            dF.get() if not config.ENABLE_CUDASIM else dF, E
+        )
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
index 0b012e8a5..6a3fa8e2b 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
@@ -409,7 +409,7 @@ def sm_slice_copy(x, y, chunksize):
 
         d_result = cp.asarray(arr)
         sm_slice_copy[nblocks, nthreads, 0, nshared](arr, d_result, chunksize)
-        host_result = d_result.get()
+        host_result = d_result.get() if not config.ENABLE_CUDASIM else d_result
         np.testing.assert_array_equal(arr, host_result)
 
     @skip_on_cudasim("Can't check typing in simulator")
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py b/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
index 1326334da..62c3936eb 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
@@ -46,7 +46,11 @@ def check_func(self, func, result_array, *args):
 
         # Call the CUDA kernel
         func[1, 1](gpu_result_array, *copy.deepcopy(args))
-        gpu_result = gpu_result_array.get()
+        gpu_result = (
+            gpu_result_array.get()
+            if not config.ENABLE_CUDASIM
+            else gpu_result_array
+        )
 
         # Call the original Python function for expected result
         cpu_result = np.zeros_like(result_array)

From 0434364a8b750d5bc5a9778ecb4d9c4365f60323 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Sat, 31 Jan 2026 14:33:01 -0800
Subject: [PATCH 43/60] fix further tests

---
 numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py     | 4 ++++
 .../numba/cuda/tests/benchmarks/test_kernel_launch.py      | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_sm.py              | 7 +++++--
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py b/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
index 50d9b1ceb..8d294d67b 100644
--- a/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
@@ -359,6 +359,10 @@ def check_array_compatibility(ary1, ary2):
 
 
 def to_device(ary, stream=0, copy=True, to=None):
+    warnings.warn(
+        "to_device is deprecated. Please prefer cupy for array functions",
+        DeprecatedDeviceArrayApiWarning,
+    )
     ary = np.array(
         ary, copy=False if numpy_version < (2, 0) else None, subok=True
     )
diff --git a/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py b/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
index 3950a3d08..d922a21ea 100644
--- a/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
+++ b/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
@@ -15,7 +15,10 @@
     reason="no reason to run benchmarks in the simulator",
 )
 
-with pytest.warns(DeprecatedDeviceArrayApiWarning):
+if not config.ENABLE_CUDASIM:
+    with pytest.warns(DeprecatedDeviceArrayApiWarning):
+        devary_arg = cuda.device_array(128, dtype=np.float32)
+else:
     devary_arg = cuda.device_array(128, dtype=np.float32)
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
index 6a3fa8e2b..a3e11f505 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
@@ -141,8 +141,11 @@ def use_sm_chunk_copy(x, y):
                 for j in range(nthreads):
                     y[bd * bx + j] = sm[j]
 
-        with pytest.warns(DeprecatedDeviceArrayApiWarning):
-            # waiting on cupy support for record dtypes
+        if not config.ENABLE_CUDASIM:
+            with pytest.warns(DeprecatedDeviceArrayApiWarning):
+                # waiting on cupy support for record dtypes
+                d_result = cuda.to_device(arr)
+        else:
             d_result = cuda.to_device(arr)
 
         use_sm_chunk_copy[nblocks, nthreads](arr, d_result)

From 6854031e2760b86325acb20162ff3dfcda18ebad Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Sat, 31 Jan 2026 14:58:35 -0800
Subject: [PATCH 44/60] cupy skip if not available

---
 numba_cuda/numba/cuda/testing.py              | 25 +++++++++++++++++++
 .../cuda/tests/cudadrv/test_context_stack.py  |  2 ++
 .../cuda/tests/cudadrv/test_nvjitlink.py      |  4 +++
 .../numba/cuda/tests/cudadrv/test_profiler.py |  2 ++
 .../cuda/tests/cudapy/test_array_methods.py   |  1 +
 .../tests/cudapy/test_array_reductions.py     | 19 +++++++++++++-
 .../numba/cuda/tests/cudapy/test_bfloat16.py  | 15 ++++++++++-
 .../cuda/tests/cudapy/test_blackscholes.py    |  3 ++-
 .../numba/cuda/tests/cudapy/test_caching.py   |  3 +++
 .../tests/cudapy/test_cuda_array_interface.py |  8 +++++-
 .../tests/cudapy/test_cuda_jit_no_types.py    |  4 ++-
 .../numba/cuda/tests/cudapy/test_datetime.py  |  7 +++++-
 .../numba/cuda/tests/cudapy/test_debuginfo.py | 11 ++++++--
 .../tests/cudapy/test_device_array_capture.py |  9 +++++++
 .../cuda/tests/cudapy/test_device_func.py     |  1 +
 .../cuda/tests/cudapy/test_dispatcher.py      |  3 +++
 .../numba/cuda/tests/cudapy/test_gufunc.py    |  2 ++
 .../cuda/tests/cudapy/test_gufunc_scalar.py   |  2 ++
 .../numba/cuda/tests/cudapy/test_idiv.py      |  3 ++-
 .../numba/cuda/tests/cudapy/test_laplace.py   |  3 ++-
 .../numba/cuda/tests/cudapy/test_matmul.py    |  1 +
 .../numba/cuda/tests/cudapy/test_multigpu.py  |  7 +++++-
 .../cuda/tests/cudapy/test_multithreads.py    |  2 ++
 .../numba/cuda/tests/cudapy/test_nondet.py    |  3 ++-
 .../numba/cuda/tests/cudapy/test_reduction.py |  4 ++-
 numba_cuda/numba/cuda/tests/cudapy/test_sm.py |  3 +++
 .../numba/cuda/tests/cudapy/test_ssa.py       | 23 ++++++++++++++++-
 .../numba/cuda/tests/cudapy/test_vectorize.py |  5 ++++
 .../cuda/tests/cudapy/test_vectorize_decor.py |  7 +++++-
 .../tests/cudapy/test_vectorize_scalar_arg.py |  7 +++++-
 .../cuda/tests/doc_examples/test_cpointer.py  |  7 +++++-
 .../cuda/tests/doc_examples/test_globals.py   |  8 +++++-
 .../cuda/tests/doc_examples/test_matmul.py    |  7 +++++-
 .../tests/doc_examples/test_montecarlo.py     |  2 ++
 .../cuda/tests/doc_examples/test_reduction.py |  6 ++++-
 .../tests/doc_examples/test_sessionize.py     |  2 ++
 numba_cuda/numba/cuda/tests/nrt/test_nrt.py   |  7 +++++-
 37 files changed, 207 insertions(+), 21 deletions(-)

diff --git a/numba_cuda/numba/cuda/testing.py b/numba_cuda/numba/cuda/testing.py
index 65aac73e5..b2d1b9e02 100644
--- a/numba_cuda/numba/cuda/testing.py
+++ b/numba_cuda/numba/cuda/testing.py
@@ -217,6 +217,31 @@ def skip_unless_cudasim(reason):
     return unittest.skipUnless(config.ENABLE_CUDASIM, reason)
 
 
+def skip_if_cupy_unavailable(fn):
+    """
+    Skip test if CuPy is not available, unless running in simulator mode.
+
+    When running in simulator mode, the test will execute using NumPy arrays
+    (via 'import numpy as cp' pattern). When not in simulator mode, the test
+    is skipped if CuPy cannot be imported.
+
+    This decorator should be used for tests that:
+    1. Use device arrays via cupy (cp.asarray, cp.zeros, etc.)
+    2. Should still run in simulator mode with numpy arrays
+    3. Should be skipped on hardware when cupy is unavailable
+    """
+    if config.ENABLE_CUDASIM:
+        # In simulator mode, tests use numpy as cp, so don't skip
+        return fn
+
+    try:
+        import cupy
+
+        return fn
+    except ImportError:
+        return unittest.skip("CuPy not available")(fn)
+
+
 def skip_unless_conda_cudatoolkit(reason):
     """Skip test if the CUDA toolkit was not installed by Conda"""
     assert isinstance(reason, str)
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py b/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
index b8f0808da..1bb41b6c8 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
@@ -6,6 +6,7 @@
 from numba import cuda
 from numba.cuda import config
 from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import skip_if_cupy_unavailable
 from numba.cuda.cudadrv import driver
 
 if config.ENABLE_CUDASIM:
@@ -146,6 +147,7 @@ def test_attached_non_primary(self):
         finally:
             the_driver.cuCtxDestroy(hctx)
 
+    @skip_if_cupy_unavailable
     def test_cudajit_in_attached_primary_context(self):
         def do():
             from numba import cuda
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py b/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
index de5b4d649..3a2576d46 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
@@ -5,6 +5,7 @@
 from numba.cuda.testing import unittest
 from numba.cuda.testing import skip_on_cudasim
 from numba.cuda.testing import CUDATestCase
+from numba.cuda.testing import skip_if_cupy_unavailable
 from numba.cuda import get_current_device
 from numba.cuda.cudadrv.driver import _Linker, _have_nvjitlink
 
@@ -83,6 +84,7 @@ def test_nvjitlink_test_add_file_guess_ext_invalid_input(self):
             # because there's no way to know what kind of file to treat it as
             linker.add_file_guess_ext(content)
 
+    @skip_if_cupy_unavailable
     def test_nvjitlink_jit_with_linkable_code(self):
         files = (
             test_device_functions_a,
@@ -133,6 +135,7 @@ def tearDown(self):
         config.DUMP_ASSEMBLY = self._prev_dump_assembly
         super().tearDown()
 
+    @skip_if_cupy_unavailable
     def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly(self):
         files = (
             test_device_functions_cu,
@@ -165,6 +168,7 @@ def kernel(result):
 
                 self.assertTrue("ASSEMBLY (AFTER LTO)" in f.getvalue())
 
+    @skip_if_cupy_unavailable
     def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly_warn(self):
         files = (
             test_device_functions_a,
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py b/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py
index 26a5548cb..9a745cfdf 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py
@@ -5,11 +5,13 @@
 from numba.cuda.testing import CUDATestCase
 from numba import cuda
 from numba.cuda.testing import skip_on_cudasim
+from numba.cuda.testing import skip_if_cupy_unavailable
 import cupy as cp
 
 
 @skip_on_cudasim("CUDA Profiler unsupported in the simulator")
 class TestProfiler(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_profiling(self):
         with cuda.profiling():
             a = cp.zeros(10)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py b/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
index cb8a42054..70f10a4a9 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
@@ -48,6 +48,7 @@ def test_reinterpret_array_type(self):
             got = output[0]
             self.assertEqual(expect, got)
 
+    @skip_if_cupy_unavailable
     def test_array_copy(self):
         val = np.array([1, 2, 3])[::-1]
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
index f2fc69aac..62c888cfe 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
@@ -4,7 +4,11 @@
 
 from numba.cuda.tests.support import TestCase, MemoryLeakMixin
 from numba import cuda
-from numba.cuda.testing import skip_on_cudasim, skip_on_nvjitlink_13_1_sm_120
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    skip_on_nvjitlink_13_1_sm_120,
+    skip_if_cupy_unavailable,
+)
 from numba.cuda.misc.special import literal_unroll
 from numba.cuda import config
 import cupy as cp
@@ -29,6 +33,7 @@ def tearDown(self):
         config.DISABLE_PERFORMANCE_WARNINGS = self.old_perf_warnings_setting
         super(TestArrayReductions, self).tearDown()
 
+    @skip_if_cupy_unavailable
     def test_all_basic(self):
         cases = (
             np.float64([1.0, 0.0, float("inf"), float("nan")]),
@@ -51,6 +56,7 @@ def kernel(out):
         got = out.get()
         self.assertPreciseEqual(expected, got)
 
+    @skip_if_cupy_unavailable
     def test_any_basic(self):
         cases = (
             np.float64([0.0, -0.0, 0.0, 0.0]),
@@ -73,6 +79,7 @@ def kernel(out):
         kernel[1, 1](out)
         self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     @skip_on_nvjitlink_13_1_sm_120(
         "sum fails at link time on sm_120 + CUDA 13.1"
     )
@@ -103,6 +110,7 @@ def kernel(out):
         kernel[1, 1](out)
         self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     @skip_on_nvjitlink_13_1_sm_120(
         "mean fails at link time on sm_120 + CUDA 13.1"
     )
@@ -133,6 +141,7 @@ def kernel(out):
         kernel[1, 1](out)
         self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     def test_var_basic(self):
         arrays = (
             np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
@@ -156,6 +165,7 @@ def kernel(out):
         kernel[1, 1](out)
         self.assertPreciseEqual(expected, out.get(), prec="double")
 
+    @skip_if_cupy_unavailable
     def test_std_basic(self):
         arrays = (
             np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
@@ -179,6 +189,7 @@ def kernel(out):
         kernel[1, 1](out)
         self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     def test_min_basic(self):
         arrays = (
             np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
@@ -202,6 +213,7 @@ def kernel(out):
         kernel[1, 1](out)
         self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     def test_max_basic(self):
         arrays = (
             np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
@@ -225,6 +237,7 @@ def kernel(out):
         kernel[1, 1](out)
         self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     def test_nanmin_basic(self):
         arrays = (
             np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
@@ -249,6 +262,7 @@ def kernel(out):
         kernel[1, 1](out)
         self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     def test_nanmax_basic(self):
         arrays = (
             np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
@@ -273,6 +287,7 @@ def kernel(out):
         kernel[1, 1](out)
         self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     @skip_on_nvjitlink_13_1_sm_120(
         "nanmean fails at link time on sm_120 + CUDA 13.1"
     )
@@ -297,6 +312,7 @@ def kernel(out):
         kernel[1, 1](out)
         self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     @skip_on_nvjitlink_13_1_sm_120(
         "nansum fails at link time on sm_120 + CUDA 13.1"
     )
@@ -324,6 +340,7 @@ def kernel(out):
         kernel[1, 1](out)
         self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     @skip_on_nvjitlink_13_1_sm_120(
         "nanprod fails at link time on sm_120 + CUDA 13.1"
     )
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py b/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
index 8db9af11d..8c1512594 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
@@ -117,7 +117,7 @@
         float32_to_bfloat16_ru,
     )
 
-from numba.cuda.testing import CUDATestCase
+from numba.cuda.testing import CUDATestCase, skip_if_cupy_unavailable
 
 import math
 
@@ -136,6 +136,7 @@ def kernel():
 
         kernel[1, 1]()
 
+    @skip_if_cupy_unavailable
     def test_math_bindings(self):
         self.skip_unsupported()
 
@@ -172,6 +173,7 @@ def kernel(arr):
                 else:
                     self.assertAlmostEqual(arr[0], f(3.14), delta=1e-2)
 
+    @skip_if_cupy_unavailable
     def test_arithmetic_intrinsics_basic(self):
         self.skip_unsupported()
 
@@ -212,6 +214,7 @@ def kernel(out):
         for i, exp in enumerate(expected):
             self.assertAlmostEqual(out[i], exp, delta=1e-2)
 
+    @skip_if_cupy_unavailable
     def test_arithmetic_intrinsics_saturating(self):
         self.skip_unsupported()
 
@@ -238,6 +241,7 @@ def kernel(out):
             self.assertGreaterEqual(out[i], 0.0)
             self.assertLessEqual(out[i], 1.0)
 
+    @skip_if_cupy_unavailable
     def test_fma_relu_intrinsic(self):
         self.skip_unsupported()
 
@@ -254,6 +258,7 @@ def kernel(out):
 
         self.assertAlmostEqual(out[0], 0.0, delta=1e-3)
 
+    @skip_if_cupy_unavailable
     def test_comparison_intrinsics(self):
         self.skip_unsupported()
 
@@ -296,6 +301,7 @@ def kernel(out, a, b):
                 kernel[1, 1](out, a, b)
                 self.assertEqual(bool(out[0]), op(4.0, 3.0))
 
+    @skip_if_cupy_unavailable
     def test_hmax_hmin_intrinsics(self):
         self.skip_unsupported()
 
@@ -311,6 +317,7 @@ def kernel(out):
         self.assertAlmostEqual(out[0], 4.0, delta=1e-3)
         self.assertAlmostEqual(out[1], 3.0, delta=1e-3)
 
+    @skip_if_cupy_unavailable
     def test_nan_and_inf_intrinsics(self):
         self.skip_unsupported()
 
@@ -327,6 +334,7 @@ def kernel(out_bool, out_int):
         self.assertTrue(bool(out_bool[0]))
         self.assertNotEqual(int(out_int[0]), 0)
 
+    @skip_if_cupy_unavailable
     def test_hmax_nan_hmin_nan_intrinsics(self):
         self.skip_unsupported()
 
@@ -348,6 +356,7 @@ def kernel(out):
         self.assertAlmostEqual(out[2], 2.0, delta=1e-3)
         self.assertAlmostEqual(out[3], 2.0, delta=1e-3)
 
+    @skip_if_cupy_unavailable
     def test_bfloat16_as_bitcast(self):
         self.skip_unsupported()
 
@@ -364,6 +373,7 @@ def roundtrip_kernel(test_val, i2, u2):
         self.assertEqual(i2[0], test_val)
         self.assertEqual(u2[0], test_val)
 
+    @skip_if_cupy_unavailable
     def test_to_integer_conversions(self):
         self.skip_unsupported()
 
@@ -425,6 +435,7 @@ def kernel(test_val, i1, i2, i3, i4, u1, u2, u3, u4):
         np.testing.assert_equal(u3.get(), np.array([2, 1, 1, 2], "uint32"))
         np.testing.assert_equal(u4.get(), np.array([2, 1, 1, 2], "uint64"))
 
+    @skip_if_cupy_unavailable
     def test_from_integer_conversions(self):
         self.skip_unsupported()
 
@@ -520,6 +531,7 @@ def kernel(out):
         np.testing.assert_array_less(_bf16_ulp_distance(res[16:20], u3arr), two)
         np.testing.assert_array_less(_bf16_ulp_distance(res[20:24], u4arr), two)
 
+    @skip_if_cupy_unavailable
     def test_to_float_conversions(self):
         self.skip_unsupported()
 
@@ -533,6 +545,7 @@ def kernel(out):
 
         self.assertAlmostEqual(out[0], 1.5, delta=1e-7)  # conversion is exact
 
+    @skip_if_cupy_unavailable
     def test_from_float_conversions(self):
         self.skip_unsupported()
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
index eb9d503d1..5baa8b8c6 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
@@ -5,7 +5,7 @@
 import math
 from numba import cuda
 from numba.cuda import config, double, void
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, CUDATestCase, skip_if_cupy_unavailable
 
 if config.ENABLE_CUDASIM:
     import numpy as cp
@@ -63,6 +63,7 @@ def randfloat(rand_var, low, high):
 
 
 class TestBlackScholes(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_blackscholes(self):
         OPT_N = 400
         iterations = 2
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_caching.py b/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
index e554b6a6a..dc6a720a9 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
@@ -20,6 +20,7 @@
     skip_if_cudadevrt_missing,
     test_data_dir,
     skip_on_standalone_numba_cuda,
+    skip_if_cupy_unavailable,
 )
 from numba.cuda.tests.support import (
     TestCase,
@@ -378,6 +379,7 @@ def test_cannot_cache_linking_libraries(self):
             def f():
                 pass
 
+    @skip_if_cupy_unavailable
     def test_cannot_cache_captured_device_array(self):
         # Test that kernels capturing device arrays from closures cannot
         # be cached. The error can come from either NumbaPickler (for closure
@@ -397,6 +399,7 @@ def cached_kernel(output):
             output = cp.zeros(3, dtype=np.float32)
             cached_kernel[1, 3](output)
 
+    @skip_if_cupy_unavailable
     def test_cannot_cache_global_device_array(self):
         # Test that kernels referencing module-level global device arrays
         # cannot be cached.
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
index 0ea89ef94..2fbe2c161 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
@@ -10,7 +10,11 @@
     ForeignArray,
     DeprecatedDeviceArrayApiTest,
 )
-from numba.cuda.testing import skip_on_cudasim, skip_if_external_memmgr
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    skip_if_external_memmgr,
+    skip_if_cupy_unavailable,
+)
 from numba.cuda.tests.support import linux_only, override_config
 from unittest.mock import call, patch
 import cupy as cp
@@ -87,6 +91,7 @@ def mutate(arr, val):
         np.testing.assert_array_equal(wrapped.copy_to_host(), h_arr + val)
         np.testing.assert_array_equal(d_arr.copy_to_host(), h_arr + val)
 
+    @skip_if_cupy_unavailable
     def test_fortran_contiguous(self):
         cp = pytest.importorskip("cupy")
 
@@ -102,6 +107,7 @@ def copy(arr, out):
 
         np.testing.assert_array_equal(arr.get(), out.get())
 
+    @skip_if_cupy_unavailable
     def test_ufunc_arg(self):
         @vectorize(["f8(f8, f8)"], target="cuda")
         def vadd(a, b):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
index 1cd3796f3..17fde5ca9 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
@@ -4,7 +4,7 @@
 from numba import cuda
 from numba.cuda import config
 import numpy as np
-from numba.cuda.testing import CUDATestCase
+from numba.cuda.testing import CUDATestCase, skip_if_cupy_unavailable
 from numba.cuda.tests.support import override_config
 import unittest
 
@@ -19,6 +19,7 @@ class TestCudaJitNoTypes(CUDATestCase):
     Tests the jit decorator with no types provided.
     """
 
+    @skip_if_cupy_unavailable
     def test_device_array(self):
         @cuda.jit
         def foo(x, y):
@@ -64,6 +65,7 @@ def driver(A, B):
 
         np.testing.assert_allclose(Acopy + Acopy + Bcopy + Bcopy + 1, B)
 
+    @skip_if_cupy_unavailable
     def test_device_jit_2(self):
         @cuda.jit(device=True)
         def inner(arg):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py b/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
index 7837eeed1..87258f2d1 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
@@ -5,7 +5,11 @@
 
 from numba import cuda, vectorize, guvectorize
 from numba.cuda.np.numpy_support import from_dtype
-from numba.cuda.testing import skip_on_cudasim, DeprecatedDeviceArrayApiTest
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    skip_if_cupy_unavailable,
+    DeprecatedDeviceArrayApiTest,
+)
 import unittest
 
 import pytest
@@ -61,6 +65,7 @@ def timediff(start, end):
         self.assertPreciseEqual(delta, arr2 - arr1)
 
     @skip_on_cudasim("API unsupported in the simulator")
+    @skip_if_cupy_unavailable
     def test_datetime_cupy_inputs(self):
         cp = pytest.importorskip("cupy")
         datetime_t = from_dtype(cp.dtype("datetime64[D]"))
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
index cf36bf271..d3aa168ee 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
@@ -3,11 +3,14 @@
 
 from collections import namedtuple
 from numba.cuda.tests.support import override_config, captured_stdout
-from numba.cuda.testing import skip_on_cudasim
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    skip_if_cupy_unavailable,
+    CUDATestCase,
+)
 from numba import cuda
 from numba.cuda import types
 from numba.cuda.np import numpy_support
-from numba.cuda.testing import CUDATestCase
 from numba.cuda.core import config
 from textwrap import dedent
 import math
@@ -449,6 +452,7 @@ def f(x, y):
         match = re.compile(pat6).search(llvm_ir)
         self.assertIsNotNone(match, msg=llvm_ir)
 
+    @skip_if_cupy_unavailable
     def test_union_debug(self):
         @cuda.jit("void(u8, int64[::1])", debug=True, opt=False)
         def a_union_use_case(arg, results):
@@ -628,6 +632,7 @@ def foo():
         # and refers to the offending function
         self.assertIn(str(foo.py_func), msg)
 
+    @skip_if_cupy_unavailable
     def test_linecache_source(self):
         """Test that source from linecache (like Jupyter notebooks) works.
 
@@ -712,6 +717,7 @@ def choice(cond1, cond2):
             if "llvm.dbg.declare" in line:
                 self.assertNotIn("bool", line)
 
+    @skip_if_cupy_unavailable
     def test_llvm_inliner_flag_conflict(self):
         # bar will be marked as 'alwaysinline', but when DEBUGINFO_DEFAULT is
         # set functions are not marked as 'alwaysinline' and this results in a
@@ -788,6 +794,7 @@ def foo(x, y):
         """,
         )
 
+    @skip_if_cupy_unavailable
     def test_DILocation_versioned_variables(self):
         """Tests that DILocation information for versions of variables matches
         up to their definition site."""
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py
index 6ecf8a26c..e85ab2a6e 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py
@@ -15,6 +15,7 @@
 from numba import cuda
 from numba.cuda.testing import unittest, CUDATestCase, ForeignArray
 from numba.cuda.testing import skip_on_cudasim
+from numba.cuda.testing import skip_if_cupy_unavailable
 import cupy as cp
 
 
@@ -46,6 +47,7 @@ def get_host_data(arr):
 class TestDeviceArrayCapture(CUDATestCase):
     """Test capturing device arrays from global scope."""
 
+    @skip_if_cupy_unavailable
     def test_basic_capture(self):
         """Test basic global capture with different array types."""
         for name, make_array in ARRAY_FACTORIES:
@@ -72,6 +74,7 @@ def kernel(output):
                 result = output.get()
                 np.testing.assert_array_equal(result, host_data)
 
+    @skip_if_cupy_unavailable
     def test_computation(self):
         """Test captured global arrays used in computations."""
         for name, make_array in ARRAY_FACTORIES:
@@ -99,6 +102,7 @@ def kernel(output):
                 expected = host_data * 2.0
                 np.testing.assert_array_equal(result, expected)
 
+    @skip_if_cupy_unavailable
     def test_mutability(self):
         """Test that captured arrays can be written to (mutability)."""
         for name, make_array in ARRAY_FACTORIES:
@@ -118,6 +122,7 @@ def write_kernel():
                 expected = np.array([1.0, 2.0, 3.0, 4.0, 5.0], dtype=np.float32)
                 np.testing.assert_array_equal(result, expected)
 
+    @skip_if_cupy_unavailable
     def test_multiple_arrays(self):
         """Test capturing multiple arrays from globals."""
         for name, make_array in ARRAY_FACTORIES:
@@ -144,6 +149,7 @@ def kernel(output):
                 expected = np.array([11.0, 22.0, 33.0], dtype=np.float32)
                 np.testing.assert_array_equal(result, expected)
 
+    @skip_if_cupy_unavailable
     def test_multidimensional(self):
         """Test capturing multidimensional arrays."""
         for name, make_array in ARRAY_FACTORIES:
@@ -172,6 +178,7 @@ def kernel(output):
                 expected = host_2d.flatten()
                 np.testing.assert_array_equal(result, expected)
 
+    @skip_if_cupy_unavailable
     def test_dtypes(self):
         """Test capturing arrays with different dtypes."""
         dtypes = [
@@ -199,6 +206,7 @@ def kernel(output):
                     kernel[1, len(host_data)](output)
                     np.testing.assert_array_equal(output.get(), host_data)
 
+    @skip_if_cupy_unavailable
     def test_direct_kernel_access(self):
         """Test direct kernel access (not via device function)."""
         for name, make_array in ARRAY_FACTORIES:
@@ -219,6 +227,7 @@ def direct_access_kernel(output):
                 expected = np.array([8.0, 9.0, 10.0], dtype=np.float32)
                 np.testing.assert_array_equal(result, expected)
 
+    @skip_if_cupy_unavailable
     def test_zero_dimensional(self):
         """Test capturing 0-D (scalar) device arrays."""
         for name, make_array in ARRAY_FACTORIES:
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
index 002165013..8117b453c 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
@@ -190,6 +190,7 @@ def f():
         )
 
     @skip_on_cudasim("cudasim ignores casting by jit decorator signature")
+    @skip_if_cupy_unavailable
     def test_device_casting(self):
         # Ensure that casts to the correct type are forced when calling a
         # device function with a signature. This test ensures that:
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py b/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
index 5c9d9ec1f..51b00db9d 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
@@ -22,6 +22,7 @@
     skip_on_cudasim,
     unittest,
     CUDATestCase,
+    skip_if_cupy_unavailable,
 )
 import math
 
@@ -491,6 +492,7 @@ def add_device(a, b):
         self.assertEqual("Add two integers, device version", add_device.__doc__)
 
     @skip_on_cudasim("Cudasim does not have device pointers")
+    @skip_if_cupy_unavailable
     def test_dispatcher_cpointer_arguments(self):
         ptr = types.CPointer(types.int32)
         sig = void(ptr, int32, ptr, ptr, uint32)
@@ -852,6 +854,7 @@ def add_one_sig(x):
                         if i < len(x):
                             x[i] = i + 1
 
+    @skip_if_cupy_unavailable
     def test_shared_memory_carveout_valid_values(self):
         carveout_values = ["MaxL1", "MaxShared", "default", 0, 50, 100, -1]
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
index 235090893..f1d8f3f0d 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
@@ -10,6 +10,7 @@
 from numba import cuda
 from numba.cuda.testing import (
     skip_on_cudasim,
+    skip_if_cupy_unavailable,
     CUDATestCase,
     DeprecatedDeviceArrayApiWarning,
 )
@@ -54,6 +55,7 @@ def test_gufunc_small(self):
         Gold = np.matmul(A, B)
         self.assertTrue(np.allclose(C, Gold))
 
+    @skip_if_cupy_unavailable
     def test_gufunc_auto_transfer(self):
         gufunc = _get_matmulcore_gufunc()
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
index 5bd39a002..9d3f7a35d 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
@@ -13,6 +13,7 @@
     skip_on_cudasim,
     CUDATestCase,
     DeprecatedDeviceArrayApiWarning,
+    skip_if_cupy_unavailable,
 )
 import unittest
 import cupy as cp
@@ -21,6 +22,7 @@
 
 @skip_on_cudasim("ufunc API unsupported in the simulator")
 class TestGUFuncScalar(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_gufunc_scalar_output(self):
         #    function type:
         #        - has no void return type
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py b/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
index 47a8cbcc2..4a67a624f 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
@@ -4,7 +4,7 @@
 import numpy as np
 from numba import cuda
 from numba.cuda import config, float32, float64, int32, void
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, CUDATestCase, skip_if_cupy_unavailable
 
 if config.ENABLE_CUDASIM:
     import numpy as cp
@@ -25,6 +25,7 @@ def div(grid, l_x, l_y):
         y = grid.get() if not config.ENABLE_CUDASIM else grid
         self.assertTrue(np.all(y == 0.5))
 
+    @skip_if_cupy_unavailable
     def test_inplace_div_double(self):
         @cuda.jit(void(float64[:, :], int32, int32))
         def div_double(grid, l_x, l_y):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py b/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
index 9c1dff9d2..f484cc388 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
@@ -4,7 +4,7 @@
 import numpy as np
 from numba import cuda
 from numba.cuda import float64, void
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, CUDATestCase, skip_if_cupy_unavailable
 from numba.cuda.core import config
 from contextlib import nullcontext
 
@@ -23,6 +23,7 @@
 
 
 class TestCudaLaplace(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_laplace_small(self):
         @cuda.jit(float64(float64, float64), device=True, inline="always")
         def get_max(a, b):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
index 0d1b9aeca..6dc157f10 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
@@ -24,6 +24,7 @@
 
 
 class TestCudaMatMul(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_func(self):
         @cuda.jit(void(float32[:, ::1], float32[:, ::1], float32[:, ::1]))
         def cu_square_matrix_mul(A, B, C):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py b/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py
index 21c9b429f..6f298405d 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py
@@ -4,7 +4,11 @@
 from numba import cuda
 from numba.cuda import config
 import numpy as np
-from numba.cuda.testing import skip_on_cudasim, CUDATestCase
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    CUDATestCase,
+    skip_if_cupy_unavailable,
+)
 import threading
 import unittest
 
@@ -57,6 +61,7 @@ def check(inp, out):
         check(A, B)
 
     @skip_on_cudasim("Simulator does not support multiple threads")
+    @skip_if_cupy_unavailable
     def test_multithreaded(self):
         def work(gpu, dA, results, ridx):
             try:
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
index 826f76d36..15c03558c 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
@@ -8,6 +8,7 @@
 from numba.cuda.testing import (
     skip_on_cudasim,
     skip_under_cuda_memcheck,
+    skip_if_cupy_unavailable,
     DeprecatedDeviceArrayApiTest,
 )
 import unittest
@@ -19,6 +20,7 @@
 has_mp_get_context = hasattr(multiprocessing, "get_context")
 
 
+@skip_if_cupy_unavailable
 def check_concurrent_compiling():
     @cuda.jit
     def foo(x):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py b/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
index 6222a543a..4830f8b18 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
@@ -4,7 +4,7 @@
 import numpy as np
 from numba import cuda
 from numba.cuda import config, float32, void
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, CUDATestCase, skip_if_cupy_unavailable
 
 if config.ENABLE_CUDASIM:
     import numpy as cp
@@ -19,6 +19,7 @@ def generate_input(n):
 
 
 class TestCudaNonDet(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_for_pre(self):
         """Test issue with loop not running due to bad sign-extension at the for
         loop precondition.
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py b/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py
index 70440a476..a2b06827f 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py
@@ -4,7 +4,7 @@
 import numpy as np
 from numba import cuda
 from numba.cuda import config
-from numba.cuda.testing import CUDATestCase
+from numba.cuda.testing import CUDATestCase, skip_if_cupy_unavailable
 import unittest
 from numba.cuda import config
 
@@ -59,6 +59,7 @@ def test_empty_array_host(self):
         got = sum_reduce(A)
         self.assertEqual(expect, got)
 
+    @skip_if_cupy_unavailable
     def test_empty_array_device(self):
         A = np.arange(0, dtype=np.float64) + 1
         dA = cp.asarray(A)
@@ -87,6 +88,7 @@ def test_non_identity_init(self):
         got = sum_reduce(A, init=init)
         self.assertEqual(expect, got)
 
+    @skip_if_cupy_unavailable
     def test_result_on_device(self):
         A = np.arange(10, dtype=np.float64) + 1
         got = cp.zeros(1, dtype=np.float64)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
index a3e11f505..8492453f5 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
@@ -13,6 +13,7 @@
     unittest,
     CUDATestCase,
     skip_on_cudasim,
+    skip_if_cupy_unavailable,
     DeprecatedDeviceArrayApiWarning,
 )
 
@@ -76,6 +77,7 @@ def test_issue_fp16_support(self):
         self._check_shared_array_size_fp16(2, 2, types.float16)
         self._check_shared_array_size_fp16(2, 2, np.float16)
 
+    @skip_if_cupy_unavailable
     def test_issue_2393(self):
         """
         Test issue of warp misalign address due to nvvm not knowing the
@@ -369,6 +371,7 @@ def slice_nonunit_reverse_stride(x):
         expected = np.array([99, 3, 99, 2, 99, 1], dtype=np.int32)
         self._test_dynshared_slice(slice_nonunit_reverse_stride, arr, expected)
 
+    @skip_if_cupy_unavailable
     def test_issue_5073(self):
         # An example with which Bug #5073 (slices of dynamic shared memory all
         # alias) was discovered. The kernel uses all threads in the block to
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py b/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
index 62c3936eb..cdc100503 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
@@ -17,7 +17,11 @@
 
 from numba.cuda.extending import overload
 from numba.cuda.tests.support import override_config
-from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    CUDATestCase,
+    skip_on_cudasim,
+    skip_if_cupy_unavailable,
+)
 
 if config.ENABLE_CUDASIM:
     import numpy as cp
@@ -65,6 +69,7 @@ class TestSSA(SSABaseTest):
     Contains tests to help isolate problems in SSA
     """
 
+    @skip_if_cupy_unavailable
     def test_argument_name_reused(self):
         @jit
         def foo(result, x):
@@ -73,6 +78,7 @@ def foo(result, x):
 
         self.check_func(foo, np.array([124.0]), 123)
 
+    @skip_if_cupy_unavailable
     def test_if_else_redefine(self):
         @jit
         def foo(result, x, y):
@@ -86,6 +92,7 @@ def foo(result, x, y):
         self.check_func(foo, np.array([2.0]), 3, 2)
         self.check_func(foo, np.array([2.0]), 2, 3)
 
+    @skip_if_cupy_unavailable
     def test_sum_loop(self):
         @jit
         def foo(result, n):
@@ -97,6 +104,7 @@ def foo(result, n):
         self.check_func(foo, np.array([0.0]), 0)
         self.check_func(foo, np.array([45.0]), 10)
 
+    @skip_if_cupy_unavailable
     def test_sum_loop_2vars(self):
         @jit
         def foo(result, n):
@@ -111,6 +119,7 @@ def foo(result, n):
         self.check_func(foo, np.array([0.0, 0.0]), 0)
         self.check_func(foo, np.array([45.0, 110.0]), 10)
 
+    @skip_if_cupy_unavailable
     def test_sum_2d_loop(self):
         @jit
         def foo(result, n):
@@ -153,12 +162,14 @@ def foo(result, n):
     @skip_on_cudasim(
         "Numba variable warnings are not supported in the simulator"
     )
+    @skip_if_cupy_unavailable
     def test_undefined_var(self):
         with override_config("ALWAYS_WARN_UNINIT_VAR", 0):
             self.check_undefined_var(should_warn=False)
         with override_config("ALWAYS_WARN_UNINIT_VAR", 1):
             self.check_undefined_var(should_warn=True)
 
+    @skip_if_cupy_unavailable
     def test_phi_propagation(self):
         @jit
         def foo(result, actions):
@@ -258,6 +269,7 @@ class TestReportedSSAIssues(SSABaseTest):
     # Tests from issues
     # https://github.com/numba/numba/issues?q=is%3Aopen+is%3Aissue+label%3ASSA
 
+    @skip_if_cupy_unavailable
     def test_issue2194(self):
         @jit
         def foo(result, V):
@@ -272,6 +284,7 @@ def foo(result, V):
         V = np.empty(1)
         self.check_func(foo, np.array([1.0]), V)
 
+    @skip_if_cupy_unavailable
     def test_issue3094(self):
         @jit
         def foo(result, pred):
@@ -283,6 +296,7 @@ def foo(result, pred):
 
         self.check_func(foo, np.array([0]), False)
 
+    @skip_if_cupy_unavailable
     def test_issue3931(self):
         @jit
         def foo(result, arr):
@@ -297,6 +311,7 @@ def foo(result, arr):
         result_gpu = np.zeros((3, 2))
         self.check_func(foo, result_gpu, np.zeros((3, 2)))
 
+    @skip_if_cupy_unavailable
     def test_issue3976(self):
         def overload_this(a):
             return 42
@@ -317,6 +332,7 @@ def ol(a):
 
         self.check_func(foo, np.array([42]), True)
 
+    @skip_if_cupy_unavailable
     def test_issue3979(self):
         @jit
         def foo(result, A, B):
@@ -333,6 +349,7 @@ def foo(result, A, B):
             foo, np.array([2, 4]), np.array([1, 2]), np.array([3, 4])
         )
 
+    @skip_if_cupy_unavailable
     def test_issue5219(self):
         def overload_this(a, b=None):
             if isinstance(b, tuple):
@@ -356,6 +373,7 @@ def test_tuple(result, a, b):
 
         self.check_func(test_tuple, np.array([2]), 1, (2,))
 
+    @skip_if_cupy_unavailable
     def test_issue5223(self):
         @jit
         def bar(result, x):
@@ -372,6 +390,7 @@ def bar(result, x):
         expected = np.ones(5)  # Since len(a) == 5, it should return unchanged
         self.check_func(bar, expected, a)
 
+    @skip_if_cupy_unavailable
     def test_issue5243(self):
         @jit
         def foo(result, q, lin):
@@ -382,6 +401,7 @@ def foo(result, q, lin):
         lin = np.array([0.1, 0.6, 0.3])
         self.check_func(foo, np.array([0.1]), np.zeros((2, 2)), lin)
 
+    @skip_if_cupy_unavailable
     def test_issue5482_missing_variable_init(self):
         # Test error that lowering fails because variable is missing
         # a definition before use.
@@ -402,6 +422,7 @@ def foo(result, x, v, n):
 
         self.check_func(foo, np.array([10]), 1, 5, 3)
 
+    @skip_if_cupy_unavailable
     def test_issue5493_unneeded_phi(self):
         # Test error that unneeded phi is inserted because variable does not
         # have a dominance definition.
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
index 99d8f786d..812d31d2f 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
@@ -12,6 +12,7 @@
 from numba.cuda.cudadrv.driver import CudaAPIError, driver
 from numba.cuda.testing import skip_on_cudasim
 from numba.cuda.testing import CUDATestCase
+from numba.cuda.testing import skip_if_cupy_unavailable
 import unittest
 import cupy as cp
 import pytest
@@ -81,6 +82,7 @@ def vector_add(a, b):
                 np.testing.assert_allclose(expected, actual)
                 self.assertEqual(actual.dtype, ty)
 
+    @skip_if_cupy_unavailable
     def test_1d_async(self):
         for vectorize in vectorize_funcs:
 
@@ -158,6 +160,7 @@ def vector_add(a, b):
                 # to be using addition). Instead, compare against the input dtype.
                 self.assertEqual(dtype, actual.dtype)
 
+    @skip_if_cupy_unavailable
     def test_reduce_async(self):
         for vectorize in vectorize_funcs:
 
@@ -179,6 +182,7 @@ def vector_add(a, b):
                 # Compare against the input dtype as in test_reduce().
                 self.assertEqual(dtype, actual.dtype)
 
+    @skip_if_cupy_unavailable
     def test_manual_transfer(self):
         for vectorize in vectorize_funcs:
 
@@ -194,6 +198,7 @@ def vector_add(a, b):
             np.testing.assert_equal(expected, actual)
             self.assertEqual(expected.dtype, actual.dtype)
 
+    @skip_if_cupy_unavailable
     def test_ufunc_output_2d(self):
         for vectorize in vectorize_funcs:
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
index c04396b71..de252dc4a 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
@@ -6,7 +6,11 @@
 
 from numba.cuda import vectorize, int32, uint32, float32, float64
 from numba.cuda import config
-from numba.cuda.testing import skip_on_cudasim, CUDATestCase
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    CUDATestCase,
+    skip_if_cupy_unavailable,
+)
 from numba.cuda.tests.support import CheckWarningsMixin
 
 if config.ENABLE_CUDASIM:
@@ -150,6 +154,7 @@ def fngpu(a, b):
         got = fngpu(a, b)
         np.testing.assert_almost_equal(expect, got)
 
+    @skip_if_cupy_unavailable
     def test_device_broadcast(self):
         """
         Same test as .test_broadcast() but with device array as inputs
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py
index 6091ad454..0204bf24b 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py
@@ -4,7 +4,11 @@
 import numpy as np
 from numba.cuda import vectorize
 from numba.cuda import float64
-from numba.cuda.testing import skip_on_cudasim, CUDATestCase
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    skip_if_cupy_unavailable,
+    CUDATestCase,
+)
 import unittest
 import cupy as cp
 
@@ -13,6 +17,7 @@
 
 @skip_on_cudasim("ufunc API unsupported in the simulator")
 class TestCUDAVectorizeScalarArg(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_vectorize_scalar_arg(self):
         @vectorize(sig, target="cuda")
         def vector_add(a, b):
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py b/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py
index 565e93b09..2695ef746 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py
@@ -3,7 +3,11 @@
 
 import unittest
 
-from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    CUDATestCase,
+    skip_if_cupy_unavailable,
+    skip_on_cudasim,
+)
 from numba.cuda.tests.support import captured_stdout
 import cupy as cp
 
@@ -26,6 +30,7 @@ def tearDown(self):
         self._captured_stdout.__exit__(None, None, None)
         super().tearDown()
 
+    @skip_if_cupy_unavailable
     def test_ex_cpointer(self):
         # ex_cpointer.sig.begin
         import numpy as np
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py b/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
index 96e2a6bc9..c6ee1776f 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
@@ -3,7 +3,11 @@
 
 import unittest
 
-from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    CUDATestCase,
+    skip_if_cupy_unavailable,
+    skip_on_cudasim,
+)
 from numba.cuda.tests.support import captured_stdout
 import cupy as cp
 
@@ -26,6 +30,7 @@ def tearDown(self):
         self._captured_stdout.__exit__(None, None, None)
         super().tearDown()
 
+    @skip_if_cupy_unavailable
     def test_ex_globals_constant_capture(self):
         """
         Test demonstrating how global variables are captured as constants.
@@ -63,6 +68,7 @@ def compute_totals(quantities, totals):
         expected = np.array([10.8, 54.0, 16.2, 64.8, 162.0])
         np.testing.assert_allclose(d_totals.get(), expected)
 
+    @skip_if_cupy_unavailable
     def test_ex_globals_device_array_capture(self):
         """
         Test demonstrating how global device arrays are captured by pointer.
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py b/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
index 64947a16e..750aa799f 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
@@ -12,7 +12,11 @@
 
 import unittest
 
-from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    CUDATestCase,
+    skip_if_cupy_unavailable,
+    skip_on_cudasim,
+)
 from numba.cuda.tests.support import captured_stdout
 import cupy as cp
 
@@ -35,6 +39,7 @@ def tearDown(self):
         self._captured_stdout.__exit__(None, None, None)
         super().tearDown()
 
+    @skip_if_cupy_unavailable
     def test_ex_matmul(self):
         """Test of matrix multiplication on various cases."""
         # magictoken.ex_import.begin
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py b/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py
index 894a5729d..3a360dc5c 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py
@@ -5,6 +5,7 @@
 
 from numba.cuda.testing import (
     CUDATestCase,
+    skip_if_cupy_unavailable,
     skip_on_cudasim,
     skip_on_standalone_numba_cuda,
 )
@@ -30,6 +31,7 @@ def tearDown(self):
         super().tearDown()
 
     @skip_on_standalone_numba_cuda
+    @skip_if_cupy_unavailable
     def test_ex_montecarlo(self):
         # ex_montecarlo.import.begin
         import numba
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py b/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
index 79f686ee9..7a09cb1d3 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
@@ -3,7 +3,11 @@
 
 import unittest
 
-from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    CUDATestCase,
+    skip_if_cupy_unavailable,
+    skip_on_cudasim,
+)
 from numba.cuda.tests.support import captured_stdout
 import cupy as cp
 
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py b/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py
index 61e211f32..79fab2248 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py
@@ -6,6 +6,7 @@
 from numba.cuda.testing import (
     CUDATestCase,
     skip_if_cudadevrt_missing,
+    skip_if_cupy_unavailable,
     skip_on_cudasim,
     skip_unless_cc_60,
 )
@@ -32,6 +33,7 @@ def tearDown(self):
         self._captured_stdout.__exit__(None, None, None)
         super().tearDown()
 
+    @skip_if_cupy_unavailable
     def test_ex_sessionize(self):
         # ex_sessionize.import.begin
         import numpy as np
diff --git a/numba_cuda/numba/cuda/tests/nrt/test_nrt.py b/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
index bbeecbeb8..0b4a74ee2 100644
--- a/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
+++ b/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
@@ -6,7 +6,11 @@
 
 import numpy as np
 import unittest
-from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    CUDATestCase,
+    skip_on_cudasim,
+    skip_if_cupy_unavailable,
+)
 from numba.cuda.tests.support import run_in_subprocess, override_config
 from numba.cuda import get_current_device
 from numba.cuda.cudadrv.nvrtc import compile
@@ -387,6 +391,7 @@ def foo():
             self.assertEqual(stats.free, stats_free)
             self.assertEqual(stats.mi_free, stats_mi_free)
 
+    @skip_if_cupy_unavailable
     def test_nrt_toggle_enabled(self):
         def array_reshape1d(arr, newshape, got):
             y = arr.reshape(newshape)

From 1497092682b219cd8dd31cb985804bcb9e661c76 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Sat, 31 Jan 2026 15:08:49 -0800
Subject: [PATCH 45/60] CI

---
 ci/tools/run-tests | 14 +++++++++++++-
 pixi.toml          | 19 +++++++++++++++++--
 pyproject.toml     |  4 ++--
 3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/ci/tools/run-tests b/ci/tools/run-tests
index dd242379f..0762ca14e 100755
--- a/ci/tools/run-tests
+++ b/ci/tools/run-tests
@@ -9,8 +9,20 @@
 set -euo pipefail
 
 echo "Installing numba-cuda wheel with test dependencies"
+
+# Check Python version to determine if we should install cupy
+PYTHON_VERSION=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
+INSTALL_CUPY=true
+if [[ "${PYTHON_VERSION}" == "3.14" ]]; then
+  echo "Python 3.14 detected - skipping cupy installation (not yet available)"
+  INSTALL_CUPY=false
+fi
+
 if [[ "${LOCAL_CTK}" == 1 ]]; then
-  pip install "${NUMBA_CUDA_ARTIFACTS_DIR}"/*.whl "cuda-bindings==${TEST_CUDA_MAJOR}.*" "cupy-cuda${TEST_CUDA_MAJOR}x" --group test
+  pip install "${NUMBA_CUDA_ARTIFACTS_DIR}"/*.whl "cuda-bindings==${TEST_CUDA_MAJOR}.*" --group test
+  if [[ "${INSTALL_CUPY}" == "true" ]]; then
+    pip install "cupy-cuda${TEST_CUDA_MAJOR}x"
+  fi
 else
   pip install $(ls "${NUMBA_CUDA_ARTIFACTS_DIR}"/*.whl)["cu${TEST_CUDA_MAJOR}"] "cuda-toolkit==${TEST_CUDA_MAJOR}.${TEST_CUDA_MINOR}.*" --group "test-cu${TEST_CUDA_MAJOR}"
 fi
diff --git a/pixi.toml b/pixi.toml
index ac52447da..f8b2b1900 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -77,7 +77,10 @@ cffi = ">=1"
 pytest = ">=8,<9"
 pytest-xdist = ">=3.8"
 pytest-benchmark = ">=5.1"
-cupy = "*"
+
+[feature.test-cupy.dependencies]
+# CuPy is not available for Python 3.14 yet
+cupy = { version = "*", python = "<3.14" }
 
 [feature.test.pypi-dependencies]
 ml_dtypes = "*"
@@ -109,13 +112,14 @@ numpydoc = ">=1.9.0"
 nvidia-sphinx-theme = "*"
 
 [environments]
-default = { features = ["cu-13-1", "test", "cu", "cu-13", "cu-rt", "nvvm", "py314"], solve-group = "default" }
+default = { features = ["cu-13-1", "test", "test-cupy", "cu", "cu-13", "cu-rt", "nvvm", "py314"], solve-group = "default" }
 dev = { features = ["ruff"], no-default-feature = true }
 bench-against = { features = ["test"], no-default-feature = true }
 # CUDA 12
 cu-12-0-py310 = { features = [
     "cu-12-0",
     "test",
+    "test-cupy",
     "cu",
     "cu-12",
     "py310",
@@ -123,6 +127,7 @@ cu-12-0-py310 = { features = [
 cu-12-0-py311 = { features = [
     "cu-12-0",
     "test",
+    "test-cupy",
     "cu",
     "cu-12",
     "py311",
@@ -130,6 +135,7 @@ cu-12-0-py311 = { features = [
 cu-12-2-py311 = { features = [
     "cu-12-2",
     "test",
+    "test-cupy",
     "cu",
     "cu-12",
     "nvvm",
@@ -138,6 +144,7 @@ cu-12-2-py311 = { features = [
 cu-12-8-py310 = { features = [
     "cu-12-8",
     "test",
+    "test-cupy",
     "cu",
     "cu-12",
     "cu-rt",
@@ -147,6 +154,7 @@ cu-12-8-py310 = { features = [
 cu-12-8-py311 = { features = [
     "cu-12-8",
     "test",
+    "test-cupy",
     "cu",
     "cu-12",
     "cu-rt",
@@ -156,6 +164,7 @@ cu-12-8-py311 = { features = [
 cu-12-8-py312 = { features = [
     "cu-12-8",
     "test",
+    "test-cupy",
     "cu",
     "cu-12",
     "cu-rt",
@@ -165,6 +174,7 @@ cu-12-8-py312 = { features = [
 cu-12-8-py313 = { features = [
     "cu-12-8",
     "test",
+    "test-cupy",
     "cu",
     "cu-12",
     "cu-rt",
@@ -174,6 +184,7 @@ cu-12-8-py313 = { features = [
 cu-12-9-py312 = { features = [
     "cu-12-9",
     "test",
+    "test-cupy",
     "bench",
     "cu",
     "cu-12",
@@ -185,6 +196,7 @@ cu-12-9-py312 = { features = [
 cu-13-0-py312 = { features = [
     "cu-13-0",
     "test",
+    "test-cupy",
     "cu",
     "cu-13",
     "cu-rt",
@@ -194,6 +206,7 @@ cu-13-0-py312 = { features = [
 cu-13-0-py313 = { features = [
     "cu-13-0",
     "test",
+    "test-cupy",
     "cu",
     "cu-13",
     "cu-rt",
@@ -203,6 +216,7 @@ cu-13-0-py313 = { features = [
 cu-13-0-py314 = { features = [
     "cu-13-0",
     "test",
+    "test-cupy",
     "cu",
     "cu-13",
     "cu-rt",
@@ -212,6 +226,7 @@ cu-13-0-py314 = { features = [
 cu-13-1-py314 = { features = [
     "cu-13-1",
     "test",
+    "test-cupy",
     "cu",
     "cu-13",
     "cu-rt",
diff --git a/pyproject.toml b/pyproject.toml
index ba0d37e23..2e56f5903 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,12 +48,12 @@ test = [
 ]
 test-cu12 = [
     "cuda-toolkit[curand]==12.*",
-    "cupy-cuda12x",
+    "cupy-cuda12x; python_version<'3.14'",
     { include-group = "test" }
 ]
 test-cu13 = [
     "cuda-toolkit[curand]==13.*",
-    "cupy-cuda13x",
+    "cupy-cuda13x; python_version<'3.14'",
     { include-group = "test" }
 ]
 

From 2437cd8a24b9eaccd5f3181b6a96769a03867c97 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 2 Feb 2026 06:43:04 -0800
Subject: [PATCH 46/60] resolve environments

---
 pixi.lock | 2376 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 pixi.toml |   34 +-
 2 files changed, 2164 insertions(+), 246 deletions(-)

diff --git a/pixi.lock b/pixi.lock
index 0a2ceb9fb..e6a1d87d6 100644
--- a/pixi.lock
+++ b/pixi.lock
@@ -13,23 +13,44 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2
       - conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_8.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.1.4-hbd8a1cb_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cffi-2.0.0-py314h6fefde3_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cffi-2.0.0-py314h4a8dc5f_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cfgv-3.5.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_linux-64-13.1.115-ha770c72_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_linux-64-13.1.80-h376f20c_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_linux-64-13.1.80-h376f20c_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_linux-64-13.1.80-h376f20c_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvrtc-13.1.115-hecca717_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py314h972ecce_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py314h3ed1f13_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py314h8c728da_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/icu-78.2-h33c6efd_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-1.2.2-py_0.tar.bz2
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_105.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-13.2.1.1-h676940d_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-12.1.0.78-hecca717_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurand-10.4.1.81-h676940d_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-12.0.9.81-h676940d_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.7.3.1-hecca717_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran-15.2.0-h69a702a_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-15.2.0-h68bc16d_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_16.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.11.0-5_h47877c9_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.2-hb03c661_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb9d3cd8_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libnvjitlink-13.1.115-hecca717_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.30-pthreads_h94d23a6_4.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.51.2-hf4e2dac_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.3-h5347b49_0.conda
@@ -37,20 +58,21 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/make-4.4.1-hb9d3cd8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/nodeenv-1.10.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.4.2-py314h2b28147_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.0-h26f9b46_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.5.1-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pluggy-1.6.0-pyhf9edf01_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pre-commit-4.5.1-pyha770c72_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/psutil-7.2.1-py314h3f2afee_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/psutil-7.2.2-py314h0f05182_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/py-cpuinfo-9.0.0-pyhd8ed1ab_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-8.4.2-pyhcf101f3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-benchmark-5.2.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.8.0-pyhd8ed1ab_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.2-he1279bd_0_cp314t.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314t.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.2-h32b2ec7_101_cp314.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pyyaml-6.0.3-pyh7db6752_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.3-h853b02a_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-80.9.0-pyhff2d567_0.conda
@@ -62,29 +84,50 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h280c20c_3.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
-      - pypi: https://files.pythonhosted.org/packages/79/2b/a826ba18d2179a56e144aef69e57fb2ab7c464ef0b2111940ee8a3a223a2/ml_dtypes-0.5.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
-      - pypi: https://files.pythonhosted.org/packages/10/a7/cfbe475c35371cae1358e61f20c5f075badc18c4797ab4354140e1d283cf/numpy-2.4.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
+      - pypi: https://files.pythonhosted.org/packages/c6/bb/82c7dcf38070b46172a517e2334e665c5bf374a262f99a283ea454bece7c/ml_dtypes-0.5.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
       linux-aarch64:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-2_gnu.tar.bz2
+      - conda: https://conda.anaconda.org/conda-forge/noarch/arm-variant-1.2.0-sbsa.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-h4777abc_8.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.1.4-hbd8a1cb_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cffi-2.0.0-py314h0bd77cf_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cfgv-3.5.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_linux-aarch64-13.1.115-h579c4fd_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_linux-aarch64-13.1.80-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_linux-aarch64-13.1.80-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_linux-aarch64-13.1.80-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cuda-nvrtc-13.1.115-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py314h3ec1dcb_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py314heaf0aa5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py314h3642cf7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/icu-78.2-hb1525cb_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.45-default_h1979696_105.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.11.0-5_haddc8a3_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.11.0-5_hd72aa62_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-13.2.1.1-he38c790_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-12.1.0.78-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.4.1.81-he38c790_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-12.0.9.81-he38c790_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.7.3.1-h8f3c8d4_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.3-hfae3067_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-hd65408f_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran-15.2.0-he9431aa_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran5-15.2.0-h1b7bec0_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgomp-15.2.0-h8acb6b2_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblapack-3.11.0-5_h88aeb00_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblzma-5.8.1-h86ecc28_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libmpdec-4.0.0-h86ecc28_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libnvjitlink-13.1.115-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libopenblas-0.3.30-pthreads_h9d3fd7e_4.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.51.2-h10b116e_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-15.2.0-hef695bb_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.41.3-h1022ec0_0.conda
@@ -92,6 +135,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/make-4.4.1-h2a6d0cb_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.5-ha32ae93_3.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/nodeenv-1.10.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-2.4.2-py314haac167e_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.6.0-h8e36d6e_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.5.1-pyhcf101f3_0.conda
@@ -119,7 +163,6 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.7-h85ac4a6_6.conda
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/04/f9/067b84365c7e83bda15bba2b06c6ca250ce27b20630b1128c435fb7a09aa/ml_dtypes-0.5.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl
-      - pypi: https://files.pythonhosted.org/packages/c0/c4/2e7908915c0e32ca636b92e4e4a3bdec4cb1e7eb0f8aedf1ed3c68a0d8cd/numpy-2.4.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl
       win-64:
       - conda: https://conda.anaconda.org/conda-forge/win-64/_openmp_mutex-4.5-2_gnu.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/bzip2-1.0.8-h0ad9c76_8.conda
@@ -127,23 +170,49 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/cffi-2.0.0-py314h5a2d7ad_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cfgv-3.5.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_win-64-13.1.115-h57928b3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_win-64-13.1.80-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_win-64-13.1.80-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_win-64-13.1.80-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvrtc-13.1.115-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py314h59d4d8c_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py314hc101868_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py314h8b4fd5f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libblas-3.11.0-5_hf2e6a31_mkl.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcblas-3.11.0-5_h2a3cdd5_mkl.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-13.2.1.1-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-12.1.0.78-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.4.1.81-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-12.0.9.81-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.7.3.1-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libexpat-2.7.3-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libffi-3.5.2-h52bdfb6_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libgcc-15.2.0-h8ee18e1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libgomp-15.2.0-h8ee18e1_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libhwloc-2.12.2-default_h4379cf1_1000.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libiconv-1.18-hc1393d2_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/liblapack-3.11.0-5_hf9ab0e9_mkl.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/liblzma-5.8.1-h2466b09_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libmpdec-4.0.0-h2466b09_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libnvjitlink-13.1.115-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libsqlite-3.51.2-hf5d6505_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libwinpthread-12.0.0.r4.gg4f2fc60ca-h57928b3_10.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libxml2-16-2.15.1-h3cfd58e_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libxml2-2.15.1-h779ef1b_1.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libzlib-1.3.1-h2466b09_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/llvm-openmp-21.1.8-h4fa8253_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/make-4.4.1-h0e40799_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/mkl-2025.3.0-hac47afa_455.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/nodeenv-1.10.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/numpy-2.4.2-py314h06c3c77_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/openssl-3.6.0-h725018a_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.5.1-pyhcf101f3_0.conda
@@ -160,6 +229,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pyyaml-6.0.3-pyh7db6752_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-80.9.0-pyhff2d567_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/tbb-2022.3.0-h3155e25_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/tk-8.6.13-h2c6b04d_3.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/tomli-2.4.0-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.15.0-pyhcf101f3_0.conda
@@ -174,7 +244,6 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/e9/93/2bfed22d2498c468f6bcd0d9f56b033eaa19f33320389314c19ef6766413/ml_dtypes-0.5.4-cp314-cp314-win_amd64.whl
-      - pypi: https://files.pythonhosted.org/packages/7e/bb/c6513edcce5a831810e2dddc0d3452ce84d208af92405a0c2e58fd8e7881/numpy-2.4.1-cp314-cp314-win_amd64.whl
   cu-12-0-py310:
     channels:
     - url: https://conda.anaconda.org/conda-forge/
@@ -215,9 +284,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py310h8c3aed4_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py310hbc0d89f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py310h25320af_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-12.4.0-h26ba24d_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-12.4.0-h6b7512a_10.conda
@@ -230,8 +302,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.0.1.189-hd3aeb46_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.0.0.21-hd3aeb46_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufile-1.5.0.59-hd3aeb46_1.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurand-10.3.1.50-hd3aeb46_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.4.2.57-hd3aeb46_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.0.0.76-hd3aeb46_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_16.conda
@@ -321,9 +397,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py310h556c47b_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py310h967c7ba_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py310heccc163_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-12.4.0-h628656a_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-12.4.0-heb3b579_10.conda
@@ -336,7 +415,11 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.45-default_h1979696_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.11.0-5_haddc8a3_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.11.0-5_hd72aa62_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.0.1.189-hac28a21_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.0.0.21-hac28a21_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.3.1.50-hac28a21_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.4.2.57-hac28a21_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.0.0.76-hac28a21_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.3-hfae3067_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-hd65408f_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_16.conda
@@ -422,16 +505,23 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py310h9349102_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py310h867cfc4_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py310h699e580_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libblas-3.11.0-5_hf2e6a31_mkl.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcblas-3.11.0-5_h2a3cdd5_mkl.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.0.1.189-h63175ca_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.0.0.21-h63175ca_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.3.1.50-h63175ca_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.4.2.57-h63175ca_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.0.0.76-h63175ca_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libexpat-2.7.3-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libffi-3.5.2-h52bdfb6_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libgcc-15.2.0-h8ee18e1_16.conda
@@ -485,7 +575,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py310hf0cc224_0
+        build: py310h5d23e43_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/c7/a3/51886727bd16e2f47587997b802dd56398692ce8c6c03c2e5bb32ecafe26/ml_dtypes-0.5.4-cp310-cp310-win_amd64.whl
   cu-12-0-py311:
@@ -528,9 +618,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py311h72da3fd_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py311he30c881_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py311hc665b79_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-12.4.0-h26ba24d_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-12.4.0-h6b7512a_10.conda
@@ -543,8 +636,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.0.1.189-hd3aeb46_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.0.0.21-hd3aeb46_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufile-1.5.0.59-hd3aeb46_1.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurand-10.3.1.50-hd3aeb46_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.4.2.57-hd3aeb46_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.0.0.76-hd3aeb46_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_16.conda
@@ -634,9 +731,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py311h1f68eda_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py311h6a7bbfe_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py311h8e4e6a5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-12.4.0-h628656a_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-12.4.0-heb3b579_10.conda
@@ -649,7 +749,11 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.45-default_h1979696_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.11.0-5_haddc8a3_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.11.0-5_hd72aa62_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.0.1.189-hac28a21_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.0.0.21-hac28a21_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.3.1.50-hac28a21_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.4.2.57-hac28a21_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.0.0.76-hac28a21_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.3-hfae3067_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-hd65408f_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_16.conda
@@ -735,16 +839,23 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py311h3856ebc_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py311h3f47771_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py311h5dfdfe8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libblas-3.11.0-5_hf2e6a31_mkl.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcblas-3.11.0-5_h2a3cdd5_mkl.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.0.1.189-h63175ca_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.0.0.21-h63175ca_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.3.1.50-h63175ca_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.4.2.57-h63175ca_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.0.0.76-h63175ca_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libexpat-2.7.3-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libffi-3.5.2-h52bdfb6_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libgcc-15.2.0-h8ee18e1_16.conda
@@ -798,7 +909,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py311h17f48b4_0
+        build: py311hb9e802a_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/b4/24/70bd59276883fdd91600ca20040b41efd4902a923283c4d6edcb1de128d2/ml_dtypes-0.5.4-cp311-cp311-win_amd64.whl
   cu-12-2-py311:
@@ -848,9 +959,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.2-he2b69de_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py311h72da3fd_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py311he30c881_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py311hc665b79_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-12.4.0-h26ba24d_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-12.4.0-h6b7512a_10.conda
@@ -863,8 +977,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.2.5.6-hd3aeb46_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.0.8.103-hd3aeb46_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufile-1.7.2.10-hd3aeb46_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurand-10.3.3.141-hd3aeb46_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.5.2.141-hd3aeb46_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.1.2.141-hd3aeb46_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_16.conda
@@ -961,9 +1079,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.2-he2b69de_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py311h1f68eda_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py311h6a7bbfe_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py311h8e4e6a5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-12.4.0-h628656a_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-12.4.0-heb3b579_10.conda
@@ -976,8 +1097,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.45-default_h1979696_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.11.0-5_haddc8a3_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.11.0-5_hd72aa62_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.2.5.6-hac28a21_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.0.8.103-hac28a21_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufile-1.7.2.10-hac28a21_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.3.3.141-hac28a21_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.5.2.141-hac28a21_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.1.2.141-hac28a21_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.3-hfae3067_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-hd65408f_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_16.conda
@@ -1070,16 +1195,23 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.2-he2b69de_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py311h3856ebc_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py311h3f47771_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py311h5dfdfe8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libblas-3.11.0-5_hf2e6a31_mkl.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcblas-3.11.0-5_h2a3cdd5_mkl.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.2.5.6-h63175ca_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.0.8.103-h63175ca_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.3.3.141-h63175ca_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.5.2.141-h63175ca_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.1.2.141-h63175ca_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libexpat-2.7.3-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libffi-3.5.2-h52bdfb6_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libgcc-15.2.0-h8ee18e1_16.conda
@@ -1133,7 +1265,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py311h17f48b4_0
+        build: py311hb9e802a_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/b4/24/70bd59276883fdd91600ca20040b41efd4902a923283c4d6edcb1de128d2/ml_dtypes-0.5.4-cp311-cp311-win_amd64.whl
   cu-12-8-py310:
@@ -1187,9 +1319,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py310h8c3aed4_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py310hbc0d89f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py310h25320af_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-14.3.0-he8b2097_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-14.3.0-h298d278_17.conda
@@ -1316,9 +1451,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py310h556c47b_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py310h967c7ba_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py310heccc163_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-14.3.0-hda29b82_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-14.3.0-h118592a_17.conda
@@ -1439,9 +1577,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py310h9349102_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py310h867cfc4_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py310h699e580_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -1511,7 +1652,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py310hf0cc224_0
+        build: py310h5d23e43_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/c7/a3/51886727bd16e2f47587997b802dd56398692ce8c6c03c2e5bb32ecafe26/ml_dtypes-0.5.4-cp310-cp310-win_amd64.whl
   cu-12-8-py311:
@@ -1565,9 +1706,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py311h72da3fd_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py311he30c881_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py311hc665b79_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-14.3.0-he8b2097_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-14.3.0-h298d278_17.conda
@@ -1694,9 +1838,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py311h1f68eda_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py311h6a7bbfe_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py311h8e4e6a5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-14.3.0-hda29b82_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-14.3.0-h118592a_17.conda
@@ -1817,9 +1964,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py311h3856ebc_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py311h3f47771_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py311h5dfdfe8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -1889,7 +2039,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py311h17f48b4_0
+        build: py311hb9e802a_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/b4/24/70bd59276883fdd91600ca20040b41efd4902a923283c4d6edcb1de128d2/ml_dtypes-0.5.4-cp311-cp311-win_amd64.whl
   cu-12-8-py312:
@@ -1943,9 +2093,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py312h0317cef_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py312h16a6543_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py312h8285ef7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-14.3.0-he8b2097_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-14.3.0-h298d278_17.conda
@@ -2072,9 +2225,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py312h500e0d2_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py312hdcd7d0a_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py312hf55c4e8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-14.3.0-hda29b82_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-14.3.0-h118592a_17.conda
@@ -2195,9 +2351,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py312hf676df9_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py312hc3434b0_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py312ha1a9051_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -2267,7 +2426,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py312h61be6c2_0
+        build: py312ha067a5a_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/f5/f0/0cfadd537c5470378b1b32bd859cf2824972174b51b873c9d95cfd7475a5/ml_dtypes-0.5.4-cp312-cp312-win_amd64.whl
   cu-12-8-py313:
@@ -2321,9 +2480,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py313h586c94b_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py313h28b6081_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py313h5d5ffb9_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-14.3.0-he8b2097_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-14.3.0-h298d278_17.conda
@@ -2448,9 +2610,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py313h7988abe_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py313h6b3a76b_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py313h59403f9_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-14.3.0-hda29b82_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-14.3.0-h118592a_17.conda
@@ -2570,9 +2735,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py313h5dfe2c3_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py313ha16128a_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py313h927ade5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -2643,7 +2811,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py313h96b86a2_0
+        build: py313he80dd91_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/e1/8b/200088c6859d8221454825959df35b5244fa9bdf263fd0249ac5fb75e281/ml_dtypes-0.5.4-cp313-cp313-win_amd64.whl
   cu-12-9-py312:
@@ -3134,7 +3302,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py312h61be6c2_0
+        build: py312ha067a5a_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/f5/f0/0cfadd537c5470378b1b32bd859cf2824972174b51b873c9d95cfd7475a5/ml_dtypes-0.5.4-cp312-cp312-win_amd64.whl
   cu-13-0-py312:
@@ -3188,9 +3356,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py312h045ee1a_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py312h1a70bb2_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py312h8285ef7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-15.2.0-hc5723f1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-15.2.0-h862fb80_17.conda
@@ -3319,9 +3490,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py312h63ce5a7_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py312hc495b10_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py312hf55c4e8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-15.2.0-habb1d5c_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-15.2.0-h0139441_17.conda
@@ -3444,9 +3618,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py312h050d4bf_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py312h7babc83_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py312ha1a9051_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -3518,7 +3695,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py312h61be6c2_0
+        build: py312ha067a5a_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/f5/f0/0cfadd537c5470378b1b32bd859cf2824972174b51b873c9d95cfd7475a5/ml_dtypes-0.5.4-cp312-cp312-win_amd64.whl
   cu-13-0-py313:
@@ -3572,9 +3749,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py313h727d180_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py313h0630d88_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py313h5d5ffb9_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-15.2.0-hc5723f1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-15.2.0-h862fb80_17.conda
@@ -3701,9 +3881,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py313h1bad292_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py313h407dc6c_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py313h59403f9_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-15.2.0-habb1d5c_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-15.2.0-h0139441_17.conda
@@ -3825,9 +4008,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py313h670e13b_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py313haef2af9_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py313h927ade5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -3900,7 +4086,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py313h96b86a2_0
+        build: py313he80dd91_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/e1/8b/200088c6859d8221454825959df35b5244fa9bdf263fd0249ac5fb75e281/ml_dtypes-0.5.4-cp313-cp313-win_amd64.whl
   cu-13-0-py314:
@@ -3954,9 +4140,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py314h972ecce_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py314h3ed1f13_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py314h8c728da_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-15.2.0-hc5723f1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-15.2.0-h862fb80_17.conda
@@ -4083,9 +4272,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py314h3ec1dcb_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py314heaf0aa5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py314h3642cf7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-15.2.0-habb1d5c_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-15.2.0-h0139441_17.conda
@@ -4207,9 +4399,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py314h59d4d8c_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py314hc101868_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py314h8b4fd5f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -4282,7 +4477,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py314h3be3d12_0
+        build: py314h625260f_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/e9/93/2bfed22d2498c468f6bcd0d9f56b033eaa19f33320389314c19ef6766413/ml_dtypes-0.5.4-cp314-cp314-win_amd64.whl
   cu-13-1-py314:
@@ -4336,9 +4531,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py314h972ecce_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py314h3ed1f13_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py314h8c728da_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-15.2.0-hc5723f1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-15.2.0-h862fb80_17.conda
@@ -4465,9 +4663,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py314h3ec1dcb_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py314heaf0aa5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py314h3642cf7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-15.2.0-habb1d5c_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-15.2.0-h0139441_17.conda
@@ -4589,9 +4790,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py314h59d4d8c_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py314hc101868_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py314h8b4fd5f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -4664,7 +4868,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py314h3be3d12_0
+        build: py314h625260f_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/e9/93/2bfed22d2498c468f6bcd0d9f56b033eaa19f33320389314c19ef6766413/ml_dtypes-0.5.4-cp314-cp314-win_amd64.whl
   default:
@@ -4718,9 +4922,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py314h972ecce_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py314h3ed1f13_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py314h8c728da_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-15.2.0-hc5723f1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-15.2.0-h862fb80_17.conda
@@ -4847,9 +5054,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py314h3ec1dcb_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py314heaf0aa5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py314h3642cf7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-15.2.0-habb1d5c_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-15.2.0-h0139441_17.conda
@@ -4971,9 +5181,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py314h59d4d8c_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py314hc101868_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py314h8b4fd5f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.16-pyhd8ed1ab_0.conda
@@ -5046,7 +5259,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py314h3be3d12_0
+        build: py314h625260f_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/e9/93/2bfed22d2498c468f6bcd0d9f56b033eaa19f33320389314c19ef6766413/ml_dtypes-0.5.4-cp314-cp314-win_amd64.whl
   dev:
@@ -5416,7 +5629,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/win_inet_pton-1.1.0-pyh7428d3b_8.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py314h3be3d12_0
+        build: py314h625260f_0
       - pypi: https://files.pythonhosted.org/packages/8d/3f/95338030883d8c8b91223b4e21744b04d11b161a3ef117295d8241f50ab4/accessible_pygments-0.0.5-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/8c/79/017fab2f7167a9a9795665f894d04f77aafceca80821b51589bb4b23ff5c/nvidia_sphinx_theme-0.0.9.post1-py3-none-any.whl
@@ -5947,22 +6160,6 @@ packages:
   - pkg:pypi/cffi?source=hash-mapping
   size: 300271
   timestamp: 1761203085220
-- conda: https://conda.anaconda.org/conda-forge/linux-64/cffi-2.0.0-py314h6fefde3_1.conda
-  sha256: b5214aa3e0853240f41c79b6cbdb3b3c6b6bfc384713ffad1b41b1442d44737a
-  md5: 1598bfc06ced45b100bca5117c9b3a4b
-  depends:
-  - __glibc >=2.17,<3.0.a0
-  - libffi >=3.5.2,<3.6.0a0
-  - libgcc >=14
-  - pycparser
-  - python >=3.14,<3.15.0a0
-  - python_abi 3.14.* *_cp314t
-  license: MIT
-  license_family: MIT
-  purls:
-  - pkg:pypi/cffi?source=hash-mapping
-  size: 304412
-  timestamp: 1761202966547
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cffi-2.0.0-py310h0826a50_1.conda
   sha256: 63458040026be843a189e319190a0622486017c92ef251d4dff7ec847f9a8418
   md5: 152a5ba791642d8a81fe02d134ab3839
@@ -11588,6 +11785,18 @@ packages:
   purls: []
   size: 68354405
   timestamp: 1757018387981
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvrtc-13.1.115-hecca717_0.conda
+  sha256: 9cc4f9df70c02eea5121cdb0e865207b04cd52591f57ebcac2ba44fada10eb5b
+  md5: df16c9049d882cdaf4f83a5b90079589
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 35339417
+  timestamp: 1768272955912
 - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvrtc-13.1.80-hecca717_0.conda
   sha256: d6b326bdbf6fa7bfa0fa617dda547dc585159816b8f130f2535740c4e53fd12c
   md5: 7ef874b2dc4ca388ecef3b3893305459
@@ -11663,6 +11872,18 @@ packages:
   purls: []
   size: 32555050
   timestamp: 1757018424779
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cuda-nvrtc-13.1.115-h8f3c8d4_0.conda
+  sha256: a1ec61512cecb093797e00590ad381ecd5852d2a32440ff22b34f78c743f3d5a
+  md5: 34da2ff2c64054d65eb8f04d76c40cca
+  depends:
+  - arm-variant * sbsa
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 33616576
+  timestamp: 1768272976976
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cuda-nvrtc-13.1.80-h8f3c8d4_0.conda
   sha256: 5e10ce4dd84c22c73e58a9f8359fb1e5ef4596afd3a0bc12b9fbde73b388ec0d
   md5: 0473ebdb01f2f4024177b024fc19fa72
@@ -11735,6 +11956,18 @@ packages:
   purls: []
   size: 59235886
   timestamp: 1757018672897
+- conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvrtc-13.1.115-hac47afa_0.conda
+  sha256: a8869b7d997722f90b9f8a602dc0b1d0d497f2a6f3561dc89383aeb2cd379a66
+  md5: 372d3c612a832d5f87d8dd9702d487b2
+  depends:
+  - cuda-version >=13.1,<13.2.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 31006920
+  timestamp: 1768273107962
 - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvrtc-13.1.80-hac47afa_0.conda
   sha256: 3f67de8a9eb182fa20bbc80bda7185afb676cfe8894f6a0549173bd752a7d2f4
   md5: 7b42337a35cd887ec3eed254b5ed606f
@@ -12881,6 +13114,46 @@ packages:
   purls: []
   size: 19915
   timestamp: 1762823943653
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py310h8c3aed4_2.conda
+  sha256: bab72866e713729c4824323aa4ff9346a48d0c74dff21d2cebb49331c9c58f57
+  md5: 9e5f2f1fc83026ad80f0660895ea3994
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py310hbc0d89f_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.10,<3.11.0a0
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359719
+  timestamp: 1757733038131
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py311h72da3fd_2.conda
+  sha256: 01f0f69dbc66ca8fe7182678258915425573f5ae5aef338efb963aceb444ef1f
+  md5: 7ff80f6526ae96cff25f226544e72baa
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py311he30c881_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.11,<3.12.0a0
+  - python_abi 3.11.* *_cp311
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359669
+  timestamp: 1757732902729
 - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py312h0317cef_2.conda
   sha256: 078e83045e252b7c616c4e6b580acc1c12b4ade24b4ecd71be4d5dc767387bca
   md5: 8cee37f4bad743e108f904e902f65df1
@@ -12901,6 +13174,126 @@ packages:
   purls: []
   size: 359152
   timestamp: 1757733115653
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py312h045ee1a_2.conda
+  sha256: 806110d9c5c6802006eec55d012e2e82dddadf8f7c9743297a25eef5800d6a25
+  md5: 2be1fbddb4658b3325d531e3e8f62abe
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py312h1a70bb2_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.12,<3.13.0a0
+  - python_abi 3.12.* *_cp312
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359604
+  timestamp: 1757731606512
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py313h586c94b_2.conda
+  sha256: 8e1b0bf555b5ac78d620ccfd20d70c45b717eb6f074631b1a9e962c5d8f0e484
+  md5: 0685ae3980f823b2ca78552f7d8d4033
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py313h28b6081_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359537
+  timestamp: 1757732883343
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py313h727d180_2.conda
+  sha256: 0cf7e5f9461b144320ff2d30f1e7d74c7990e69aa15ec8211cc117f1214a9985
+  md5: 9a9af89f20555cbb1892f81d096b937d
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py313h0630d88_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359195
+  timestamp: 1757731600945
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py314h972ecce_2.conda
+  sha256: fc4c9e4286c943f8ce6e3f5f29e4ac750939b46cecd06ff70b00d6ba0472af02
+  md5: 5efa78fb77f5f07b02dde55a66bbff24
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py314h3ed1f13_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.14.0rc2,<3.15.0a0
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359816
+  timestamp: 1757731942829
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py310h556c47b_2.conda
+  sha256: 3595e84792c1e36fa79348a404d71b94ad7fd2db8d0ca2551377661dbe40a9ea
+  md5: 6a547864445662481528190824613fef
+  depends:
+  - cuda-cudart-dev_linux-aarch64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py310h967c7ba_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.10,<3.11.0a0
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359399
+  timestamp: 1757733587754
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py311h1f68eda_2.conda
+  sha256: 9bfa8bbc0a630e331a04359675c3a728bc9a856284807b5042e24bab4cb16f28
+  md5: 0c76272fc6fa05ff39c53ea5ea5d1154
+  depends:
+  - cuda-cudart-dev_linux-aarch64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py311h6a7bbfe_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.11,<3.12.0a0
+  - python_abi 3.11.* *_cp311
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359395
+  timestamp: 1757733506707
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py312h500e0d2_2.conda
   sha256: 05992a9fddee5bded2f68aeaaba937901ef3b5b246132f7f25478579cc99d48a
   md5: 73a45823cac7c3926192682b7a71ed94
@@ -12921,14 +13314,14 @@ packages:
   purls: []
   size: 359411
   timestamp: 1757733170501
-- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py312hf676df9_2.conda
-  sha256: 6636ac902b44dbc8f8e14d8d2593d057af2f7b722b704edbe01600a2c90c752c
-  md5: 270e90ae04455f4f85b8763ec1755373
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py312h63ce5a7_2.conda
+  sha256: b58e3b72197504103175bfccce853f71de94716e832faa3fb69a22508242185a
+  md5: 6a3767487d9c694dee98bced05c7d048
   depends:
-  - cuda-cudart-dev_win-64
+  - cuda-cudart-dev_linux-aarch64
   - cuda-nvrtc
-  - cuda-version >=12,<13.0a0
-  - cupy-core 13.6.0 py312hc3434b0_2
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py312hc495b10_2
   - libcublas
   - libcufft
   - libcurand
@@ -12939,70 +13332,735 @@ packages:
   license: MIT
   license_family: MIT
   purls: []
-  size: 361552
-  timestamp: 1757734756770
-- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py312h16a6543_2.conda
-  sha256: ebe205ad39f19067898f4513816d8c44ac8036d0c4b9f1ee5aa0233e0f5dc1d7
-  md5: e0667d2bf17e4ff3bd50861f245ed961
+  size: 359083
+  timestamp: 1757732404821
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py313h1bad292_2.conda
+  sha256: f41cf3dae5a43376eb47a172ebc684337f5ed623a16f165dc5ae1444598b5910
+  md5: 38504562c74c201725045cfbf54699ee
   depends:
-  - __glibc >=2.17,<3.0.a0
-  - fastrlock >=0.8.3,<0.9.0a0
-  - libgcc >=14
-  - libstdcxx >=14
-  - numpy >=1.22
-  - python >=3.12,<3.13.0a0
-  - python_abi 3.12.* *_cp312
-  constrains:
-  - cuda-nvrtc >=12,<13.0a0
-  - libcufft >=11,<12.0a0
-  - libcurand >=10,<11.0a0
-  - scipy >=1.7,<1.17
-  - optuna ~=3.0
-  - cutensor >=2.3.1.0,<3.0a0
-  - cuda-version >=12,<13.0a0
-  - cupy >=13.6.0,<13.7.0a0
-  - libcusparse >=12,<13.0a0
-  - libcusolver >=11,<12.0a0
-  - nccl >=2.27.7.1,<3.0a0
-  - libcublas >=12,<13.0a0
-  - __cuda >=12.0
+  - cuda-cudart-dev_linux-aarch64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py313h407dc6c_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
   license: MIT
   license_family: MIT
-  purls:
-  - pkg:pypi/cupy?source=hash-mapping
-  size: 56720768
-  timestamp: 1757733006716
-- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py312hdcd7d0a_2.conda
-  sha256: bc3cf5f1f0b0b4653d573507087ee56bfa04900232133e87c9baebfe6a128612
-  md5: 07720f931f710f3d2061b0bdcb808b82
+  purls: []
+  size: 359766
+  timestamp: 1757732380354
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py313h7988abe_2.conda
+  sha256: 3263457e1415b2695cbba24e45b6d200b05f98120169ce56ac266ef9b29f38b7
+  md5: d378f8038cb5acfb9e24650b7b581f48
   depends:
-  - fastrlock >=0.8.3,<0.9.0a0
-  - libgcc >=14
-  - libstdcxx >=14
-  - numpy >=1.22
-  - python >=3.12,<3.13.0a0
-  - python >=3.12,<3.13.0a0 *_cpython
+  - cuda-cudart-dev_linux-aarch64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py313h6b3a76b_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359576
+  timestamp: 1757733613485
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py314h3ec1dcb_2.conda
+  sha256: 9b78e1d7c9f42ad09dcad9e8784bfe95aec35ff30c10bc0a8f7cc92033e4c11f
+  md5: ef63ce910ca3d9278fa7b411740e6064
+  depends:
+  - cuda-cudart-dev_linux-aarch64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py314heaf0aa5_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.14.0rc2,<3.15.0a0
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359844
+  timestamp: 1757732501296
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py310h9349102_2.conda
+  sha256: a9de522e66ff07d1567b4011f7a6e6c858f573053c989bf8a3a91276cf211bdc
+  md5: 3f610f7dce9af31ba31ff4bc8e4cc0ef
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py310h867cfc4_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.10,<3.11.0a0
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361800
+  timestamp: 1757734323240
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py311h3856ebc_2.conda
+  sha256: 7fff0c303355730c2e29386159fab97f31b3423bb5fd856e7e449ec735ef8e07
+  md5: 8e5df8d8969bf8dbf85740207e354e4c
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py311h3f47771_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.11,<3.12.0a0
+  - python_abi 3.11.* *_cp311
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361007
+  timestamp: 1757734548861
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py312h050d4bf_2.conda
+  sha256: 2f80b492e9bd02d36583caabc9933db381aa4313b25ff9b98e4386f39e2d6244
+  md5: 083c371b7832142e6ea9842088a96f55
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py312h7babc83_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.12,<3.13.0a0
   - python_abi 3.12.* *_cp312
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361055
+  timestamp: 1757732736235
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py312hf676df9_2.conda
+  sha256: 6636ac902b44dbc8f8e14d8d2593d057af2f7b722b704edbe01600a2c90c752c
+  md5: 270e90ae04455f4f85b8763ec1755373
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py312hc3434b0_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.12,<3.13.0a0
+  - python_abi 3.12.* *_cp312
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361552
+  timestamp: 1757734756770
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py313h5dfe2c3_2.conda
+  sha256: b36285a74901926ddab1b49e86936957715c1db476207c6e524338867eef9683
+  md5: 01e63e587cf8c7477d53a3e98782e81d
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py313ha16128a_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361341
+  timestamp: 1757734712476
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py313h670e13b_2.conda
+  sha256: 13b870d34d8df1cd72a60892cc95f150d01e8915f4f11f92a7622602fbe847dc
+  md5: 1c75580206c0367647f7b23bfabb8a93
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py313haef2af9_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361420
+  timestamp: 1757731939881
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py314h59d4d8c_2.conda
+  sha256: b0755d67f3e501cdfbf6ac6ed5a8a3a37adbb41df25bb6e2922e9b6c59919bd0
+  md5: f60dd8b8db34ab07021459d9a0ad4a8a
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py314hc101868_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.14.0rc2,<3.15.0a0
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361792
+  timestamp: 1757732239805
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py310hbc0d89f_2.conda
+  sha256: 59ee4ca6f4166e575b0f0174941bef5759035e1098abf8f3c6816cc497206c6e
+  md5: 54e7f3bcf179555759acc4341921f3db
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.10,<3.11.0a0
+  - python_abi 3.10.* *_cp310
+  constrains:
+  - nccl >=2.27.7.1,<3.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - scipy >=1.7,<1.17
+  - libcusolver >=11,<12.0a0
+  - libcufft >=11,<12.0a0
+  - libcusparse >=12,<13.0a0
+  - cuda-version >=12,<13.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  - libcublas >=12,<13.0a0
+  - optuna ~=3.0
+  - libcurand >=10,<11.0a0
+  - __cuda >=12.0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 56537348
+  timestamp: 1757732911282
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py311he30c881_2.conda
+  sha256: 45e67d3a56d36935e4189b17e707bf6b887d21df6411fab9d835455a10250db8
+  md5: c9ca2bae852b83675f256aec6c518396
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.11,<3.12.0a0
+  - python_abi 3.11.* *_cp311
+  constrains:
+  - __cuda >=12.0
+  - cuda-version >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcusparse >=12,<13.0a0
+  - scipy >=1.7,<1.17
+  - libcufft >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  - optuna ~=3.0
+  - libcusolver >=11,<12.0a0
+  - libcublas >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 56743670
+  timestamp: 1757732786905
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py312h16a6543_2.conda
+  sha256: ebe205ad39f19067898f4513816d8c44ac8036d0c4b9f1ee5aa0233e0f5dc1d7
+  md5: e0667d2bf17e4ff3bd50861f245ed961
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.12,<3.13.0a0
+  - python_abi 3.12.* *_cp312
+  constrains:
+  - cuda-nvrtc >=12,<13.0a0
+  - libcufft >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  - scipy >=1.7,<1.17
+  - optuna ~=3.0
+  - cutensor >=2.3.1.0,<3.0a0
+  - cuda-version >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcusparse >=12,<13.0a0
+  - libcusolver >=11,<12.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - libcublas >=12,<13.0a0
+  - __cuda >=12.0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 56720768
+  timestamp: 1757733006716
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py312h1a70bb2_2.conda
+  sha256: 955e08c61145c77fcafe91d88bded3fe7bfe87e46a08db2f1345980d56a5444d
+  md5: b7613be94326f391c4b6edd7f114d3ee
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.12,<3.13.0a0
+  - python_abi 3.12.* *_cp312
+  constrains:
+  - optuna ~=3.0
+  - libcurand >=10,<11.0a0
+  - cuda-nvrtc >=13,<14.0a0
+  - __cuda >=13.0
+  - nccl >=2.27.7.1,<3.0a0
+  - cuda-version >=13,<14.0a0
+  - libcublas >=13,<14.0a0
+  - libcusolver >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcufft >=12,<13.0a0
+  - scipy >=1.7,<1.17
+  - libcusparse >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 31539281
+  timestamp: 1757731547163
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py313h0630d88_2.conda
+  sha256: 82c950c3118d81368ad0dee224ab946c963b57ccad34911cacdcc52fc046d792
+  md5: a2a6a0df7ef6e9ae482bae698cfd7476
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  constrains:
+  - libcusolver >=12,<13.0a0
+  - cuda-nvrtc >=13,<14.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcublas >=13,<14.0a0
+  - __cuda >=13.0
+  - libcufft >=12,<13.0a0
+  - optuna ~=3.0
+  - nccl >=2.27.7.1,<3.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcurand >=10,<11.0a0
+  - scipy >=1.7,<1.17
+  - cuda-version >=13,<14.0a0
+  - libcusparse >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 31734692
+  timestamp: 1757731531047
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py313h28b6081_2.conda
+  sha256: 56b2ebb09c8a74746f1dbf660bb7a50af562e9416a2f9733d8e8715503cca81a
+  md5: 388fb72307f756f7f2c7f5928647bc6b
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  constrains:
+  - libcusolver >=11,<12.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  - libcufft >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcusparse >=12,<13.0a0
+  - scipy >=1.7,<1.17
+  - optuna ~=3.0
+  - cutensor >=2.3.1.0,<3.0a0
+  - __cuda >=12.0
+  - cuda-version >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - libcublas >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 56733316
+  timestamp: 1757732780713
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py314h3ed1f13_2.conda
+  sha256: f377afaddccdaead2963bdabc3fa550e8d3e5d6aa6fc632cc01eadfd11442ef8
+  md5: d80c89a6489cb472feb8b009c34d3c11
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.14.0rc2,<3.15.0a0
+  - python_abi 3.14.* *_cp314
+  constrains:
+  - libcusparse >=12,<13.0a0
+  - scipy >=1.7,<1.17
+  - __cuda >=13.0
+  - cuda-version >=13,<14.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcusolver >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - optuna ~=3.0
+  - libcufft >=12,<13.0a0
+  - cuda-nvrtc >=13,<14.0a0
+  - libcublas >=13,<14.0a0
+  - libcurand >=10,<11.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 31824862
+  timestamp: 1757731889554
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py310h967c7ba_2.conda
+  sha256: ecc9ed490591577e7a0d9a994a1c4ae3a2f0b8605cdb3c67548fd8c1aeb48c95
+  md5: f77bbe8edf8f4c9e1be06aebb99bebec
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.10,<3.11.0a0
+  - python >=3.10,<3.11.0a0 *_cpython
+  - python_abi 3.10.* *_cp310
+  constrains:
+  - scipy >=1.7,<1.17
+  - __cuda >=12.0
+  - libcufft >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  - libcusolver >=11,<12.0a0
+  - cuda-version >=12,<13.0a0
+  - libcusparse >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - libcublas >=12,<13.0a0
+  - optuna ~=3.0
+  - cutensor >=2.3.1.0,<3.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 63896600
+  timestamp: 1757733496346
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py311h6a7bbfe_2.conda
+  sha256: 8ebdcc75e0cf89d5f73b34dde93dad9387b76b883b197fe3e41cee7b116376fb
+  md5: 5f61a21425c550d4badcdbf96c8723f9
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.11,<3.12.0a0
+  - python >=3.11,<3.12.0a0 *_cpython
+  - python_abi 3.11.* *_cp311
+  constrains:
+  - cupy >=13.6.0,<13.7.0a0
+  - libcufft >=11,<12.0a0
+  - cuda-version >=12,<13.0a0
+  - libcusolver >=11,<12.0a0
+  - __cuda >=12.0
+  - libcublas >=12,<13.0a0
+  - libcurand >=10,<11.0a0
+  - optuna ~=3.0
+  - cuda-nvrtc >=12,<13.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - scipy >=1.7,<1.17
+  - libcusparse >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 64073249
+  timestamp: 1757733413707
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py312hc495b10_2.conda
+  sha256: f862a404f82ca39e331802d3373d4b75dc4e53e885d8c5e6e222dfa59feab962
+  md5: 363b56bd0a936dc789f017ef904d4c75
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.12,<3.13.0a0
+  - python >=3.12,<3.13.0a0 *_cpython
+  - python_abi 3.12.* *_cp312
+  constrains:
+  - cuda-nvrtc >=13,<14.0a0
+  - __cuda >=13.0
+  - libcufft >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcusolver >=12,<13.0a0
+  - libcublas >=13,<14.0a0
+  - libcusparse >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - cuda-version >=13,<14.0a0
+  - scipy >=1.7,<1.17
+  - libcurand >=10,<11.0a0
+  - optuna ~=3.0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 36623787
+  timestamp: 1757732346566
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py312hdcd7d0a_2.conda
+  sha256: bc3cf5f1f0b0b4653d573507087ee56bfa04900232133e87c9baebfe6a128612
+  md5: 07720f931f710f3d2061b0bdcb808b82
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.12,<3.13.0a0
+  - python >=3.12,<3.13.0a0 *_cpython
+  - python_abi 3.12.* *_cp312
+  constrains:
+  - cuda-nvrtc >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcusparse >=12,<13.0a0
+  - cuda-version >=12,<13.0a0
+  - libcurand >=10,<11.0a0
+  - libcufft >=11,<12.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - libcusolver >=11,<12.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - __cuda >=12.0
+  - libcublas >=12,<13.0a0
+  - optuna ~=3.0
+  - scipy >=1.7,<1.17
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 63865734
+  timestamp: 1757733078190
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py313h407dc6c_2.conda
+  sha256: 69a3cd03e492b76d29d66d83a533d132ffcb99f8f6831191ecb99e8372e8f76e
+  md5: ff191fa08a0238048035b0638e21220b
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python >=3.13,<3.14.0a0 *_cp313
+  - python_abi 3.13.* *_cp313
+  constrains:
+  - cuda-nvrtc >=13,<14.0a0
+  - scipy >=1.7,<1.17
+  - nccl >=2.27.7.1,<3.0a0
+  - libcurand >=10,<11.0a0
+  - cuda-version >=13,<14.0a0
+  - libcusparse >=12,<13.0a0
+  - __cuda >=13.0
+  - libcufft >=12,<13.0a0
+  - optuna ~=3.0
+  - libcublas >=13,<14.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcusolver >=12,<13.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 36458540
+  timestamp: 1757732319930
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py313h6b3a76b_2.conda
+  sha256: 004036b2bd95684b64ae4a56663988da4026136c57be580de080e137b4e48b43
+  md5: 0fb0ebad3a2eb9f4c860465c47955131
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python >=3.13,<3.14.0a0 *_cp313
+  - python_abi 3.13.* *_cp313
+  constrains:
+  - __cuda >=12.0
+  - nccl >=2.27.7.1,<3.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - scipy >=1.7,<1.17
+  - libcurand >=10,<11.0a0
+  - libcusparse >=12,<13.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  - cuda-version >=12,<13.0a0
+  - libcublas >=12,<13.0a0
+  - libcufft >=11,<12.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcusolver >=11,<12.0a0
+  - optuna ~=3.0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 63810519
+  timestamp: 1757733528386
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py314heaf0aa5_2.conda
+  sha256: 354bc9f675b433909ff8854371e4b3606817dc304c0cd95c63d67d480ab80462
+  md5: c37b0c4ab40f2b23de6ab31042b2476a
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.14.0rc2,<3.15.0a0
+  - python >=3.14.0rc2,<3.15.0a0 *_cp314
+  - python_abi 3.14.* *_cp314
   constrains:
-  - cuda-nvrtc >=12,<13.0a0
-  - cupy >=13.6.0,<13.7.0a0
+  - scipy >=1.7,<1.17
   - libcusparse >=12,<13.0a0
+  - libcusolver >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - cuda-version >=13,<14.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - cuda-nvrtc >=13,<14.0a0
+  - optuna ~=3.0
+  - libcufft >=12,<13.0a0
+  - __cuda >=13.0
+  - libcublas >=13,<14.0a0
+  - libcurand >=10,<11.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 36619576
+  timestamp: 1757732433081
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py310h867cfc4_2.conda
+  sha256: 115131c370be8c410fba05e78d83f197b4c48a8b30fd8a32c3bd8d9a3ad80215
+  md5: 2b22c1eb70d5f8f86c4babac37703437
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.10,<3.11.0a0
+  - python_abi 3.10.* *_cp310
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
   - cuda-version >=12,<13.0a0
+  - optuna ~=3.0
+  - scipy >=1.7,<1.17
+  - __cuda >=12.0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcublas >=12,<13.0a0
   - libcurand >=10,<11.0a0
-  - libcufft >=11,<12.0a0
-  - nccl >=2.27.7.1,<3.0a0
   - libcusolver >=11,<12.0a0
   - cutensor >=2.3.1.0,<3.0a0
-  - __cuda >=12.0
+  - libcusparse >=12,<13.0a0
+  - libcufft >=11,<12.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 54351742
+  timestamp: 1757734211315
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py311h3f47771_2.conda
+  sha256: c874fd562f9750b468f009dfcc4ed6de6bb41ccc3bb6d65957ab3c1498613675
+  md5: 16602f6836e4e866c52a78a21feb1560
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.11,<3.12.0a0
+  - python_abi 3.11.* *_cp311
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
+  - libcusolver >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  - libcufft >=11,<12.0a0
+  - libcusparse >=12,<13.0a0
+  - cuda-version >=12,<13.0a0
   - libcublas >=12,<13.0a0
   - optuna ~=3.0
+  - cutensor >=2.3.1.0,<3.0a0
+  - cuda-nvrtc >=12,<13.0a0
   - scipy >=1.7,<1.17
+  - __cuda >=12.0
+  - cupy >=13.6.0,<13.7.0a0
   license: MIT
   license_family: MIT
   purls:
   - pkg:pypi/cupy?source=hash-mapping
-  size: 63865734
-  timestamp: 1757733078190
+  size: 54764492
+  timestamp: 1757734470749
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py312h7babc83_2.conda
+  sha256: 18582d52c9abd1e5008af3e4bd38552b8410713777066ac8024415c99d7a83e8
+  md5: 33050f8f5af87ae8cbfbb4e40de61fbf
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.12,<3.13.0a0
+  - python_abi 3.12.* *_cp312
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
+  - cupy >=13.6.0,<13.7.0a0
+  - libcurand >=10,<11.0a0
+  - cuda-version >=13,<14.0a0
+  - libcufft >=12,<13.0a0
+  - libcusolver >=12,<13.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - __cuda >=13.0
+  - scipy >=1.7,<1.17
+  - libcusparse >=12,<13.0a0
+  - cuda-nvrtc >=13,<14.0a0
+  - optuna ~=3.0
+  - libcublas >=13,<14.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 29913800
+  timestamp: 1757732657370
 - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py312hc3434b0_2.conda
   sha256: 9aae135cb29962786adafa0b3bae094f9fce0b4ca386aaaa7d038ae518efcba6
   md5: 9e1c32b5b8172ae6666850b583355257
@@ -13033,6 +14091,96 @@ packages:
   - pkg:pypi/cupy?source=hash-mapping
   size: 54685402
   timestamp: 1757734676711
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py313ha16128a_2.conda
+  sha256: 7df3f437c45ba61754643a2c61f4e6c7c5b4be3bf58fa029d39e4fc8ddb7e54b
+  md5: 5a270c8af5e377ff40932ce8ec8472e3
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
+  - libcusolver >=11,<12.0a0
+  - cuda-version >=12,<13.0a0
+  - __cuda >=12.0
+  - scipy >=1.7,<1.17
+  - libcublas >=12,<13.0a0
+  - optuna ~=3.0
+  - cuda-nvrtc >=12,<13.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcusparse >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcufft >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 54751648
+  timestamp: 1757734626461
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py313haef2af9_2.conda
+  sha256: b207060087be5dcb79c533d4d160730f3a7de23d5e96253fe0770b1dc03cc124
+  md5: 60df31229f6e6084a0c8a7ee07976133
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
+  - __cuda >=13.0
+  - cuda-nvrtc >=13,<14.0a0
+  - scipy >=1.7,<1.17
+  - libcusolver >=12,<13.0a0
+  - libcublas >=13,<14.0a0
+  - libcusparse >=12,<13.0a0
+  - libcufft >=12,<13.0a0
+  - cuda-version >=13,<14.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcurand >=10,<11.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - optuna ~=3.0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 29792078
+  timestamp: 1757731883397
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py314hc101868_2.conda
+  sha256: f6533a698dd95c9d18efea957d3b524906f9fed4d69ba67b51b158be8aa51a64
+  md5: 06b1af7b5254c0864e82e1105f9f0f2e
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.14.0rc2,<3.15.0a0
+  - python_abi 3.14.* *_cp314
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcublas >=13,<14.0a0
+  - cuda-version >=13,<14.0a0
+  - scipy >=1.7,<1.17
+  - cuda-nvrtc >=13,<14.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcurand >=10,<11.0a0
+  - libcusparse >=12,<13.0a0
+  - optuna ~=3.0
+  - libcufft >=12,<13.0a0
+  - __cuda >=13.0
+  - libcusolver >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 29836019
+  timestamp: 1757732178441
 - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
   sha256: 6d977f0b2fc24fee21a9554389ab83070db341af6d6f09285360b2e09ef8b26e
   md5: 003b8ba0a94e2f1e117d0bd46aebc901
@@ -13076,6 +14224,38 @@ packages:
   - pkg:pypi/execnet?source=hash-mapping
   size: 39499
   timestamp: 1762974150770
+- conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py310h25320af_2.conda
+  sha256: 95eea806cb216036e4d0446fcff724c334c8899d02be2368a430ec5361ed29a4
+  md5: 8dbd4fc06661c78fdc2daedf23824bfe
+  depends:
+  - python
+  - libgcc >=14
+  - libstdcxx >=14
+  - libgcc >=14
+  - __glibc >=2.17,<3.0.a0
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 40665
+  timestamp: 1756729198132
+- conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py311hc665b79_2.conda
+  sha256: 5299a4aeaf04fbc2f8f46e707ae16c1f4e594905e6df18457f18ba002a886110
+  md5: ac18884886449ce97b76f8906462ff27
+  depends:
+  - python
+  - libgcc >=14
+  - libstdcxx >=14
+  - libgcc >=14
+  - __glibc >=2.17,<3.0.a0
+  - python_abi 3.11.* *_cp311
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 41082
+  timestamp: 1756729161435
 - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py312h8285ef7_2.conda
   sha256: b0e5b19d2148816914920fe5c3148d5b5bf7c46bc34a2cac5124883bd1b83d05
   md5: 94fb93ec1751a3614d3a6f184832fd87
@@ -13090,24 +14270,154 @@ packages:
   license_family: MIT
   purls:
   - pkg:pypi/fastrlock?source=hash-mapping
-  size: 41672
-  timestamp: 1756729175159
-- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py312hf55c4e8_2.conda
-  sha256: 5c5cfaf55a0165c45ee63beb92abf4aa2ae1ef28d8064f7c884749ec4bd00a22
-  md5: 7ec9d6889be02f9bf66cfb9dd3112c8b
+  size: 41672
+  timestamp: 1756729175159
+- conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py313h5d5ffb9_2.conda
+  sha256: 30498ed45133f457fd9ed14d5fac6512347f05d11fe1ed89842c7dfdb516f78f
+  md5: 9bcbd351966dc56a24fc0c368da5ad99
+  depends:
+  - python
+  - __glibc >=2.17,<3.0.a0
+  - libstdcxx >=14
+  - libgcc >=14
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 41201
+  timestamp: 1756729160955
+- conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py314h8c728da_2.conda
+  sha256: 1ea4fd24e37d27692b04b57fa51f14fd2217ea251087ce1c0701af234c1452d9
+  md5: f1f936bb0ff435f3190ca1c17fa327e7
+  depends:
+  - python
+  - libstdcxx >=14
+  - libgcc >=14
+  - __glibc >=2.17,<3.0.a0
+  - libgcc >=14
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 41496
+  timestamp: 1756729160091
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py310heccc163_2.conda
+  sha256: aad519c924568a72bd4dcab74c793d4b09e339dce6bd3c5c027bd498eef7ccc4
+  md5: caafa6b88cc2cff22a72280c8f083a31
+  depends:
+  - python
+  - python 3.10.* *_cpython
+  - libgcc >=14
+  - libstdcxx >=14
+  - libgcc >=14
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 44918
+  timestamp: 1756729193056
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py311h8e4e6a5_2.conda
+  sha256: f186881661b83be9fb8d47c71340997b929fa5e0673ead9070082b8e390d6a73
+  md5: 9251413f2e3ea6eb586b21423f849536
+  depends:
+  - python
+  - libstdcxx >=14
+  - libgcc >=14
+  - python 3.11.* *_cpython
+  - libgcc >=14
+  - python_abi 3.11.* *_cp311
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 45171
+  timestamp: 1756729186510
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py312hf55c4e8_2.conda
+  sha256: 5c5cfaf55a0165c45ee63beb92abf4aa2ae1ef28d8064f7c884749ec4bd00a22
+  md5: 7ec9d6889be02f9bf66cfb9dd3112c8b
+  depends:
+  - python
+  - libgcc >=14
+  - python 3.12.* *_cpython
+  - libstdcxx >=14
+  - libgcc >=14
+  - python_abi 3.12.* *_cp312
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 45432
+  timestamp: 1756729166837
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py313h59403f9_2.conda
+  sha256: e28da81b99c8970e19e4f3ef7758a7a695263e0d3ff7d9fbdf232690bef6519d
+  md5: 59043167df894cee605e4cf470302bda
+  depends:
+  - python
+  - python 3.13.* *_cp313
+  - libgcc >=14
+  - libstdcxx >=14
+  - libgcc >=14
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 44449
+  timestamp: 1756729165562
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py314h3642cf7_2.conda
+  sha256: 512662de1d9d4231feaf6f818014317dad4f2a60d8ef0d859f72116a69062583
+  md5: 685382bf317bd1d7f174e763c91d98a1
+  depends:
+  - python
+  - python 3.14.* *_cp314
+  - libstdcxx >=14
+  - libgcc >=14
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 44705
+  timestamp: 1756729193250
+- conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py310h699e580_2.conda
+  sha256: 57deb00090c09edc841a43499f23396bb35d51aa5aaa6886d4ae1d0ff969b3dd
+  md5: 3207527dea58c115e7e97856709465db
+  depends:
+  - python
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 36960
+  timestamp: 1756729187087
+- conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py311h5dfdfe8_2.conda
+  sha256: dd0a2552a36565545aedc65739ffc11574167c263340b32ff6314ce998168e08
+  md5: 4fb7d2650ac4a3967e8e57d68e801db3
   depends:
   - python
-  - libgcc >=14
-  - python 3.12.* *_cpython
-  - libstdcxx >=14
-  - libgcc >=14
-  - python_abi 3.12.* *_cp312
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - python_abi 3.11.* *_cp311
   license: MIT
   license_family: MIT
   purls:
   - pkg:pypi/fastrlock?source=hash-mapping
-  size: 45432
-  timestamp: 1756729166837
+  size: 37145
+  timestamp: 1756729198099
 - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py312ha1a9051_2.conda
   sha256: bbcc17eb4acf110032fe8092d4e54a6d262b72d504597103e72a958fb248579f
   md5: b6ff9e7af087d51a24353f16d1a3ed06
@@ -13126,6 +14436,42 @@ packages:
   - pkg:pypi/fastrlock?source=hash-mapping
   size: 37498
   timestamp: 1756729168844
+- conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py313h927ade5_2.conda
+  sha256: 2a23cce182f04de8e522d47a9e41f9f9a85eb25a2d67d52356ce1d6522bbbe79
+  md5: 1fc8d6295c7ebff653118d2ba22cf226
+  depends:
+  - python
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 36385
+  timestamp: 1756729186432
+- conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py314h8b4fd5f_2.conda
+  sha256: 1d341146022014b0f0d4b33630ba1757246dd6b5ecefdada0d49e6db774a18a9
+  md5: ac8c973aff08071df98933eccd5a7fa5
+  depends:
+  - python
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 36661
+  timestamp: 1756729190828
 - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
   name: filecheck
   version: 1.0.3
@@ -13975,6 +15321,32 @@ packages:
   purls: []
   size: 68079
   timestamp: 1765819124349
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.0.1.189-hd3aeb46_3.conda
+  sha256: a3c89c1b6018d16c22fc583887f728b3065a1f50a82d8a40a793a973aac606c5
+  md5: 626745031f369cf70670283436cc6742
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-nvrtc
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 233989011
+  timestamp: 1701931830910
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.2.5.6-hd3aeb46_0.conda
+  sha256: 7af6a21b53736b5a53c1044808ffd781a6ee1f0a66b618bf3c834a71bdb706aa
+  md5: c216c28589360a5acee904b480911c14
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-nvrtc
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 258710189
+  timestamp: 1702976169266
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.8.4.1-h9ab20c4_1.conda
   sha256: 3d3f7344db000feced2f9154cf0b3f3d245a1d317a1981e43b8b15f7baaaf6f1
   md5: 3ba4fd8bef181c020173d29ac67cae68
@@ -14027,6 +15399,47 @@ packages:
   purls: []
   size: 393920044
   timestamp: 1764897195935
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-13.2.1.1-h676940d_0.conda
+  sha256: c38f5041d0a99d94cee17f26029e4c02f3247bfb39cbe12d8f2c3dcf5f656eaa
+  md5: f904a04f3e173de15d3c31bd3dfc21c7
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - cuda-nvrtc
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 376501341
+  timestamp: 1768276465220
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.0.1.189-hac28a21_3.conda
+  sha256: befa2389febbff1541fa2bf542c98b3b32f2c569c53fca95c439796224c0dae3
+  md5: b16ccbf9d633bdce9cf5b3363a468c41
+  depends:
+  - cuda-nvrtc
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 233886126
+  timestamp: 1701931743428
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.2.5.6-hac28a21_0.conda
+  sha256: 52ae33e756f22f7a82038a409d58ce52fa8a9c45896417662f101d83c36139a6
+  md5: b8f9003432a6b58e2bbd174910f9df84
+  depends:
+  - cuda-nvrtc
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 259896450
+  timestamp: 1702976080471
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.8.4.1-hd55a8e4_1.conda
   sha256: 7d10a5b2750faccc39dd66d28ca5b74cb618d3445ed8c933d51736dba2b7bcc4
   md5: 8d6b39fb6f62e3e1b278774c00b115ac
@@ -14088,6 +15501,48 @@ packages:
   purls: []
   size: 516220026
   timestamp: 1764897082131
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-13.2.1.1-he38c790_0.conda
+  sha256: ba0e73bc783f6eb34770dbd2296c437b1b4c8ea888ac76beb2fe30643eb62883
+  md5: 295ab160a641ff6f42b9ba50669f7e1a
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - arm-variant * sbsa
+  - cuda-nvrtc
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 481495336
+  timestamp: 1768276502914
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.0.1.189-h63175ca_3.conda
+  sha256: d39c6d2e01dad4e9b06707f7343150e423042fd2c65cc5772333ab82d4132bb1
+  md5: c69ce5f6ea90ad064df6960636acaf15
+  depends:
+  - cuda-nvrtc
+  - cuda-version >=12.0,<12.1.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 247048799
+  timestamp: 1701932385460
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.2.5.6-h63175ca_0.conda
+  sha256: 2e0abbb96a9aefd0e6c284df7ca6223e48ee55304cb0fea72cd7db59489eac8e
+  md5: d695bf389c6314948a130aa6334c58c2
+  depends:
+  - cuda-nvrtc
+  - cuda-version >=12.2,<12.3.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 288422075
+  timestamp: 1702976743208
 - conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.8.4.1-he0c23c2_1.conda
   sha256: 7a4c53bbcf77c37033777acd1ff60b4664615ae67fff245718d43db422feac59
   md5: 626453d0b7f7b9f3c3a92e4398314714
@@ -14140,6 +15595,19 @@ packages:
   purls: []
   size: 388564116
   timestamp: 1764897124611
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-13.2.1.1-hac47afa_0.conda
+  sha256: 0e7180aed3a41eff2c5a3df079abb3ea86612eea18f833febe858cebac0a3e96
+  md5: d56da2a29117df5d879594b5e58fc3a5
+  depends:
+  - cuda-nvrtc
+  - cuda-version >=13.1,<13.2.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 371899218
+  timestamp: 1768276556597
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcudnn-9.10.2.21-hf7e9902_0.conda
   sha256: dc6b89e874867b2cdf08224059bd1543cbb72ed646da177c1454596469c9a4bb
   md5: a178a1f3642521f104ecceeefa138d01
@@ -14293,6 +15761,30 @@ packages:
   purls: []
   size: 61127411
   timestamp: 1761105599209
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.0.0.21-hd3aeb46_2.conda
+  sha256: ed62279e20761c033525a550dc753327103f53aa37bf441c40db2f37950b7b50
+  md5: 5dbf17a732e01fed414a22bdf89aaaad
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 44795345
+  timestamp: 1701904310549
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.0.8.103-hd3aeb46_0.conda
+  sha256: af72a643d81c2401be7e5ccb8f2eb033e8254531ccd521101e9af8609817b5bf
+  md5: e6ca97f313721442e41e725ce7b3b75a
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 62856006
+  timestamp: 1702938780985
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.3.3.83-h5888daf_1.conda
   sha256: 1a38727a9666b7020ad844fd5074693b2c378d0161f58401d9f8488bdeb920a1
   md5: d0d12b6842be47267e3214e7ab2b1b02
@@ -14341,6 +15833,44 @@ packages:
   purls: []
   size: 192378644
   timestamp: 1764880073980
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-12.1.0.78-hecca717_0.conda
+  sha256: 4f8951e942210116ee6e1548c25774009afddc59e494b5eac0e5ca539196d1b5
+  md5: 58a7aa38206ea03a9eb6ccbcc012901e
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 192379210
+  timestamp: 1768273636415
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.0.0.21-hac28a21_2.conda
+  sha256: c9647dedc5da9a60ca1d88e8f82a42e7b1837f3d2bccd294bb46b218795d498e
+  md5: cbd87df968670b2d4d752b22657591fe
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 44814329
+  timestamp: 1701904278310
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.0.8.103-hac28a21_0.conda
+  sha256: ba19464e72391d1f7b45b862fa93c8e87cb0821148ae36b91cadcb3833f35b57
+  md5: b7a1c44db1312dd191ff21ecd82076c5
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 62915617
+  timestamp: 1702938781901
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.3.3.83-h3ae8b8a_1.conda
   sha256: d5cb9df683d7ea22184714b5c0569a5decf0a332d81c241b60ff68599a5ccc06
   md5: 093577dd6d3b9be7d3f7a6ecb01dcf01
@@ -14394,6 +15924,44 @@ packages:
   purls: []
   size: 192843651
   timestamp: 1764880098927
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-12.1.0.78-h8f3c8d4_0.conda
+  sha256: 82f4715e0c6aa59080531d816bb847e3096635625645fdd8046fa6c1d248ef2e
+  md5: 1bd80ebee861a876bdf7860d559f4866
+  depends:
+  - arm-variant * sbsa
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 192700443
+  timestamp: 1768273669731
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.0.0.21-h63175ca_2.conda
+  sha256: b06554c3106338de6dd85b6b697dfd27d823067adcf0e7236110fa0ea49cc6b9
+  md5: 403b53342b3588579e16772a18722739
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 44262157
+  timestamp: 1701904877029
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.0.8.103-h63175ca_0.conda
+  sha256: af4f043218b7584fe9c1d4f0cf40edfdfd01637fedbfaf100273a8ba131dafc0
+  md5: 3e0d3168dcaea961f6ffa665b0c27c40
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 63117431
+  timestamp: 1702939178613
 - conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.3.3.83-he0c23c2_1.conda
   sha256: 083ba1d13f5512dae13fd7e3785336d578bc66f01c88917bbf1f53923339a5e4
   md5: 6e4c0fa04966e643cbe847321bdeee54
@@ -14442,6 +16010,18 @@ packages:
   purls: []
   size: 192328577
   timestamp: 1764880153393
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-12.1.0.78-hac47afa_0.conda
+  sha256: 7bf34c7298350325e0f23b2483f53e015fff446c03dd8d75c500cc5dbb5cee62
+  md5: a8ce534392102f2b3109dcee4702468a
+  depends:
+  - cuda-version >=13.1,<13.2.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 192328586
+  timestamp: 1768273720164
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufile-1.13.1.3-h628e99a_1.conda
   sha256: 213f5df6ed25d19c4390666708a32ea457b1dcda64aca121f861b94671e2ed63
   md5: 9a97a35e7e63910013d638c389fa3514
@@ -14666,6 +16246,18 @@ packages:
   purls: []
   size: 43737577
   timestamp: 1764879942081
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcurand-10.4.1.81-h676940d_0.conda
+  sha256: bba28a650b35f221eaad9537df4a6f1d86b2fa617e52f56194ad2a959f84736c
+  md5: 5926fbc6df184a110130a310608cb5e8
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 43775293
+  timestamp: 1768273736749
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.3.1.50-hac28a21_1.conda
   sha256: 6c749658411c13e639977cce1da74dfacb693c4348fadffe09780c04fa4809b5
   md5: 72936062b7c649fc03b0a52e2ba54275
@@ -14749,6 +16341,21 @@ packages:
   purls: []
   size: 44154661
   timestamp: 1764879984766
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.4.1.81-he38c790_0.conda
+  sha256: ef4300b83ea202e459e917a4f159478074fdc10c51f3061374361e9b89b6ba04
+  md5: b02eb8fbb430bd99f7a870382a91c24d
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - arm-variant * sbsa
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 44099763
+  timestamp: 1768273767993
 - conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.3.1.50-h63175ca_1.conda
   sha256: 3030074dcf96f4e397e4ba778d802900249a61388876cde06dc97257b2a2bc16
   md5: af9c9c9ae729b884dcc5dc48b3bb205a
@@ -14821,6 +16428,47 @@ packages:
   purls: []
   size: 46140551
   timestamp: 1764880079531
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.4.1.81-hac47afa_0.conda
+  sha256: 807515b768161a684b097a6959fabd013fad813ca595b3fd25e9b53b0c796487
+  md5: 753cb0f8717a35b53215a18c009953b2
+  depends:
+  - cuda-version >=13.1,<13.2.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 46201230
+  timestamp: 1768273862521
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.4.2.57-hd3aeb46_2.conda
+  sha256: 65e4acdce5c358c57f0d263c87c39346695d0954855868bff60cb066043c7632
+  md5: a684e4ff8d2a6a100249377aa9d37a5c
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.0,<12.1.0a0
+  - libcublas >=12.0.1.189,<12.1.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 58748256
+  timestamp: 1701944344928
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.5.2.141-hd3aeb46_0.conda
+  sha256: a83322a1ede77e652acc3330d68f0428e28b198c3f7517bd3f1aeaf577232363
+  md5: 4ee6abbff18849a3036a1678771e4800
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.2,<12.3.0a0
+  - libcublas >=12.2.5.6,<12.3.0a0
+  - libcusparse >=12.1.2.141,<12.2.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 79957553
+  timestamp: 1703004799401
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.7.3.90-h9ab20c4_1.conda
   sha256: 868ba1b0b0ae15f7621ee960a459a74b9a17b69ba629c510a11bb37480e7b6df
   md5: 2d58a7eb9150525ea89195cf1bcfbc4c
@@ -14879,8 +16527,54 @@ packages:
   - libstdcxx >=14
   license: LicenseRef-NVIDIA-End-User-License-Agreement
   purls: []
-  size: 161086488
-  timestamp: 1764943396933
+  size: 161086488
+  timestamp: 1764943396933
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-12.0.9.81-h676940d_0.conda
+  sha256: d6181d5fe7fbc36304577fbb50add02382ae9e7c6b1b598d310945bd12272f0b
+  md5: 17a342e69a0821ecf76a0e79a2044288
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - cuda-version >=13.1,<13.2.0a0
+  - libcublas >=13.2.1.1,<13.3.0a0
+  - libcusparse >=12.7.3.1,<12.8.0a0
+  - libgcc >=14
+  - libnvjitlink >=13.1.115,<14.0a0
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 161188241
+  timestamp: 1768286542683
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.4.2.57-hac28a21_2.conda
+  sha256: 83e01fddb31617623fc7475aa84db9efc0498cc76aca88e42e86f71442872f6c
+  md5: 7fbef3231f572b4b7c3bfe8efd6fcb5c
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - libcublas >=12.0.1.189,<12.1.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 58531702
+  timestamp: 1701944296106
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.5.2.141-hac28a21_0.conda
+  sha256: 8b84ec1fcee407676bc5dee930747ee6fd4b887d8a3f9ad69d98705260c7ae2d
+  md5: 4b628857805683900422fea3a166cd6f
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - libcublas >=12.2.5.6,<12.3.0a0
+  - libcusparse >=12.1.2.141,<12.2.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 79719976
+  timestamp: 1703004749317
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.7.3.90-hd55a8e4_1.conda
   sha256: 5016ad770146b3eb3739ee4213f82d3afed125626dbb77f0ee4b421cb9ab6d63
   md5: 7b044a3b61ea805e90e91f750c0e70dd
@@ -14950,6 +16644,53 @@ packages:
   purls: []
   size: 177727995
   timestamp: 1764943428002
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-12.0.9.81-he38c790_0.conda
+  sha256: ce671884833cfed45128a7be1d6102242c394524a654b4ba3921ec49a856a6e7
+  md5: c1aa3d742409b794d096fcaf6aaf3c1a
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - arm-variant * sbsa
+  - cuda-version >=13.1,<13.2.0a0
+  - libcublas >=13.2.1.1,<13.3.0a0
+  - libcusparse >=12.7.3.1,<12.8.0a0
+  - libgcc >=14
+  - libnvjitlink >=13.1.115,<14.0a0
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 177825017
+  timestamp: 1768286571769
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.4.2.57-h63175ca_2.conda
+  sha256: 1486f5ced304b13ec1e8fb2af3e4134aeb8c1bc98d5c13c864c48c2f9e42cfa6
+  md5: 11f11b1971bd9a2e39eade3206c6e63a
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - libcublas >=12.0.1.189,<12.1.0a0
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 56552114
+  timestamp: 1701944947700
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.5.2.141-h63175ca_0.conda
+  sha256: 7073d934f6d2dd607a4f987efa2c2d16b0e68340db7637b8e98ff4a1004d3ca3
+  md5: 0ff5423da121b524f887e8f24c6a55df
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - libcublas >=12.2.5.6,<12.3.0a0
+  - libcusparse >=12.1.2.141,<12.2.0a0
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 77792716
+  timestamp: 1703005402425
 - conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.7.3.90-he0c23c2_1.conda
   sha256: c967651aab88a4a9a761be0b027b460c36850a9cd9df03890ce5bf833cef8c9f
   md5: 830a8909cfd5427f57b93ca6e468c1dd
@@ -15010,6 +16751,47 @@ packages:
   purls: []
   size: 156777611
   timestamp: 1764943590003
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-12.0.9.81-hac47afa_0.conda
+  sha256: 660e6b88a56b9b125e9f3e44975baf75249bee32505960b7906c1e8ba84bc9e3
+  md5: 79dca8cbbf9f76e1b298f3538c6c3bb8
+  depends:
+  - cuda-version >=13.1,<13.2.0a0
+  - libcublas >=13.2.1.1,<13.3.0a0
+  - libcusparse >=12.7.3.1,<12.8.0a0
+  - libnvjitlink >=13.1.115,<14.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 156887400
+  timestamp: 1768286696520
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.0.0.76-hd3aeb46_2.conda
+  sha256: def44b0e57a59bc060bc69fb1c79c39cf281efe8980cd78840cb092ada5eda19
+  md5: 91072eaa64ea11a9f804547806dbacf0
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 98176542
+  timestamp: 1701931152417
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.1.2.141-hd3aeb46_0.conda
+  sha256: 48ab25898ae3315a9dce7f5a5ad2c1d5bce84c78c757f54dce4a43c65d436af4
+  md5: 3b4528c647c041ec53a883023ef4f054
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 112121082
+  timestamp: 1702970684025
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.5.10.65-hecca717_2.conda
   sha256: 7b511549a22df408d36dadbeabdfd9c35b124d9d6f000b29ffcbe4b38b7faeb7
   md5: 890ebfaad48c887d3d82847ec9d6bc79
@@ -15062,6 +16844,47 @@ packages:
   purls: []
   size: 144184696
   timestamp: 1764886592758
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.7.3.1-hecca717_0.conda
+  sha256: 86b31339206cb44c2cddeea4684de748d39ecc89c45c884a92e653f0af2986c6
+  md5: 915b747d67493ba94a0d9b79095cc06d
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libnvjitlink >=13.1.115,<14.0a0
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 145513192
+  timestamp: 1768280223267
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.0.0.76-hac28a21_2.conda
+  sha256: fa7b204b0b25ab4a61db98ef8c0d8ccc7d5fc158bcc89f95eedd4286af67ba9b
+  md5: 2d5bbfce1a53628178df9d711445cd60
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 98122549
+  timestamp: 1701931113993
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.1.2.141-hac28a21_0.conda
+  sha256: 50b0c4c09aa576dce40ae62fe45253b244fd01c4024b0efbc34bec3532db8ded
+  md5: 4cfd7e21691a81d22e483b08f384b594
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 112124005
+  timestamp: 1702970635167
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.5.10.65-h8f3c8d4_2.conda
   sha256: 9dbee8f1bfa9a876d24b12a34d4a022f33e584669c59bf93368b79d0bf55cd2f
   md5: 1e0731f3e9f303e6106a8fdd359a272e
@@ -15120,6 +16943,47 @@ packages:
   purls: []
   size: 160004278
   timestamp: 1764886666561
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.7.3.1-h8f3c8d4_0.conda
+  sha256: 85f20536cc261bf285ca2d9730d2b27669d862a38fa70a54a236d574be913f7b
+  md5: 73816ec8be4d675a1933cd0dc382382a
+  depends:
+  - arm-variant * sbsa
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libnvjitlink >=13.1.115,<14.0a0
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 161564363
+  timestamp: 1768280242337
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.0.0.76-h63175ca_2.conda
+  sha256: 7ac8438172e0712ae6e2ebe790f4a9117b1764a6a30f29513b0b4c6a36ae9211
+  md5: 18a3190fb1e98ce0765dca19a880997a
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 97602376
+  timestamp: 1701931624725
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.1.2.141-h63175ca_0.conda
+  sha256: d58adb5b76459c082c0c903ce798c9057b6c6e284b60117efc811b46b39abf96
+  md5: c689031410d83ceefe2c2299040f9de6
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 111548772
+  timestamp: 1702971058166
 - conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.5.10.65-hac47afa_2.conda
   sha256: fc911af27ab28af77d4b7203c6c9ebb15f4ddf27af8e8331d9a9983f4dd96483
   md5: 4e84a8282a9c1802ec4f516090164228
@@ -15172,6 +17036,19 @@ packages:
   purls: []
   size: 142426523
   timestamp: 1764886657256
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.7.3.1-hac47afa_0.conda
+  sha256: 1ac52f373db5c5e00c1978f0bc6b2c2c576c80fba8801086ccb142d46eff0a4e
+  md5: 36a861ab5d2c5fd0a63395bbd6bab7d2
+  depends:
+  - cuda-version >=13.1,<13.2.0a0
+  - libnvjitlink >=13.1.115,<14.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 143956601
+  timestamp: 1768280260283
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda
   sha256: 1e1b08f6211629cbc2efe7a5bca5953f8f6b3cae0eeb04ca4dacee1bd4e2db2f
   md5: 8b09ae86839581147ef2e5c5e229d164
@@ -15578,6 +17455,7 @@ packages:
   constrains:
   - xz 5.8.2.*
   license: 0BSD
+  purls: []
   size: 113207
   timestamp: 1768752626120
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblzma-5.8.1-h86ecc28_2.conda
@@ -16096,6 +17974,18 @@ packages:
   purls: []
   size: 31218311
   timestamp: 1757021832026
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libnvjitlink-13.1.115-hecca717_0.conda
+  sha256: 1ce8ac2f6fb3aaab065599f74b1e1bc68affc0804a081da239ab2c727abdc1cb
+  md5: 6cd0aefa03c679824ee5047ed39b0a09
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=13,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 31331227
+  timestamp: 1768274146966
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libnvjitlink-13.1.80-hecca717_0.conda
   sha256: 1ccfcadcd096e225a4d3a10c7d35363fa3ef02e97b54efb6ef50c8849aec4804
   md5: 12c045632ae898f40024b7a1d61fc100
@@ -16147,6 +18037,20 @@ packages:
   purls: []
   size: 29710724
   timestamp: 1757021907780
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libnvjitlink-13.1.115-h8f3c8d4_0.conda
+  sha256: 49ff65205602d2535586e646008ff0577a92bf6f16de9c4cc6a10473caf3d700
+  md5: e211b0e0846d538f23296214de1d35a6
+  depends:
+  - arm-variant * sbsa
+  - cuda-version >=13,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 29775481
+  timestamp: 1768274109937
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libnvjitlink-13.1.80-h8f3c8d4_0.conda
   sha256: 3ffb88544e6407cad2b82a9e6b405a28ba6d56d600f8f58c3b6cda62d844f94e
   md5: d69b83167de6fd594dcf3b93ef82cf90
@@ -16197,6 +18101,18 @@ packages:
   purls: []
   size: 27704690
   timestamp: 1757021910611
+- conda: https://conda.anaconda.org/conda-forge/win-64/libnvjitlink-13.1.115-hac47afa_0.conda
+  sha256: 7a07c089f3d58552caad6151a0aaa6366231078f4dec4c6b4bd15aa06490daf6
+  md5: 27d92a3cc46bebee72ad41931c8442f5
+  depends:
+  - cuda-version >=13,<13.2.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 28186019
+  timestamp: 1768274186462
 - conda: https://conda.anaconda.org/conda-forge/win-64/libnvjitlink-13.1.80-hac47afa_0.conda
   sha256: e83551c06b6594ad5bc3eeeed09ead80607b422dee660657262e77fa26648d51
   md5: 792c82dd2a996b65970ec5789c43840f
@@ -17776,22 +19692,6 @@ packages:
   - pylint>=2.6.0 ; extra == 'dev'
   - pyink ; extra == 'dev'
   requires_python: '>=3.9'
-- pypi: https://files.pythonhosted.org/packages/79/2b/a826ba18d2179a56e144aef69e57fb2ab7c464ef0b2111940ee8a3a223a2/ml_dtypes-0.5.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
-  name: ml-dtypes
-  version: 0.5.4
-  sha256: 0d2ffd05a2575b1519dc928c0b93c06339eb67173ff53acb00724502cda231cf
-  requires_dist:
-  - numpy>=1.21
-  - numpy>=1.21.2 ; python_full_version >= '3.10'
-  - numpy>=1.23.3 ; python_full_version >= '3.11'
-  - numpy>=1.26.0 ; python_full_version >= '3.12'
-  - numpy>=2.1.0 ; python_full_version >= '3.13'
-  - absl-py ; extra == 'dev'
-  - pytest ; extra == 'dev'
-  - pytest-xdist ; extra == 'dev'
-  - pylint>=2.6.0 ; extra == 'dev'
-  - pyink ; extra == 'dev'
-  requires_python: '>=3.9'
 - pypi: https://files.pythonhosted.org/packages/a9/80/19189ea605017473660e43762dc853d2797984b3c7bf30ce656099add30c/ml_dtypes-0.5.4-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
   name: ml-dtypes
   version: 0.5.4
@@ -18462,7 +20362,7 @@ packages:
   timestamp: 1765466860567
 - conda: .
   name: numba-cuda
-  version: 0.25.0
+  version: 0.24.0
   build: py310h04c9772_0
   subdir: linux-64
   variants:
@@ -18484,7 +20384,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
+  version: 0.24.0
   build: py310h3ca6f64_0
   subdir: linux-aarch64
   variants:
@@ -18506,11 +20406,10 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
-  build: py310hf0cc224_0
+  version: 0.24.0
+  build: py310h5d23e43_0
   subdir: win-64
   variants:
-    cxx_compiler: vs2022
     python: 3.10.*
     target_platform: win-64
   depends:
@@ -18522,21 +20421,19 @@ packages:
   - cuda-python >=12.9,<14
   - cuda-pathfinder >=1.3.1,<2
   - cuda-cudart
-  - vc >=14.3,<15
-  - vc14_runtime >=14.44.35208
-  - ucrt >=10.0.20348.0
+  - vc >=14.1,<15
+  - vc14_runtime >=14.16.27033
   - python_abi 3.10.* *_cp310
   - numpy >=1.21,<3
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
-  build: py311h17f48b4_0
-  subdir: win-64
+  version: 0.24.0
+  build: py311h2894be0_0
+  subdir: linux-aarch64
   variants:
-    cxx_compiler: vs2022
     python: 3.11.*
-    target_platform: win-64
+    target_platform: linux-aarch64
   depends:
   - python
   - packaging
@@ -18546,20 +20443,19 @@ packages:
   - cuda-python >=12.9,<14
   - cuda-pathfinder >=1.3.1,<2
   - cuda-cudart
-  - vc >=14.3,<15
-  - vc14_runtime >=14.44.35208
-  - ucrt >=10.0.20348.0
+  - libstdcxx >=14
+  - libgcc >=14
   - python_abi 3.11.* *_cp311
   - numpy >=1.23,<3
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
-  build: py311h2894be0_0
-  subdir: linux-aarch64
+  version: 0.24.0
+  build: py311hb9e802a_0
+  subdir: win-64
   variants:
     python: 3.11.*
-    target_platform: linux-aarch64
+    target_platform: win-64
   depends:
   - python
   - packaging
@@ -18569,14 +20465,14 @@ packages:
   - cuda-python >=12.9,<14
   - cuda-pathfinder >=1.3.1,<2
   - cuda-cudart
-  - libstdcxx >=14
-  - libgcc >=14
+  - vc >=14.1,<15
+  - vc14_runtime >=14.16.27033
   - python_abi 3.11.* *_cp311
   - numpy >=1.23,<3
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
+  version: 0.24.0
   build: py311he8c1319_0
   subdir: linux-64
   variants:
@@ -18598,7 +20494,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
+  version: 0.24.0
   build: py312h3eebbd5_0
   subdir: linux-64
   variants:
@@ -18620,13 +20516,12 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
-  build: py312h61be6c2_0
-  subdir: win-64
+  version: 0.24.0
+  build: py312h8e85db0_0
+  subdir: linux-aarch64
   variants:
-    cxx_compiler: vs2022
     python: 3.12.*
-    target_platform: win-64
+    target_platform: linux-aarch64
   depends:
   - python
   - packaging
@@ -18636,20 +20531,19 @@ packages:
   - cuda-python >=12.9,<14
   - cuda-pathfinder >=1.3.1,<2
   - cuda-cudart
-  - vc >=14.3,<15
-  - vc14_runtime >=14.44.35208
-  - ucrt >=10.0.20348.0
+  - libstdcxx >=14
+  - libgcc >=14
   - python_abi 3.12.* *_cp312
   - numpy >=1.23,<3
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
-  build: py312h8e85db0_0
-  subdir: linux-aarch64
+  version: 0.24.0
+  build: py312ha067a5a_0
+  subdir: win-64
   variants:
     python: 3.12.*
-    target_platform: linux-aarch64
+    target_platform: win-64
   depends:
   - python
   - packaging
@@ -18659,14 +20553,14 @@ packages:
   - cuda-python >=12.9,<14
   - cuda-pathfinder >=1.3.1,<2
   - cuda-cudart
-  - libstdcxx >=14
-  - libgcc >=14
+  - vc >=14.1,<15
+  - vc14_runtime >=14.16.27033
   - python_abi 3.12.* *_cp312
   - numpy >=1.23,<3
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
+  version: 0.24.0
   build: py313h66129c8_0
   subdir: linux-aarch64
   variants:
@@ -18688,11 +20582,10 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
-  build: py313h96b86a2_0
+  version: 0.24.0
+  build: py313he80dd91_0
   subdir: win-64
   variants:
-    cxx_compiler: vs2022
     python: 3.13.*
     target_platform: win-64
   depends:
@@ -18704,15 +20597,14 @@ packages:
   - cuda-python >=12.9,<14
   - cuda-pathfinder >=1.3.1,<2
   - cuda-cudart
-  - vc >=14.3,<15
-  - vc14_runtime >=14.44.35208
-  - ucrt >=10.0.20348.0
+  - vc >=14.1,<15
+  - vc14_runtime >=14.16.27033
   - python_abi 3.13.* *_cp313
   - numpy >=1.23,<3
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
+  version: 0.24.0
   build: py313hf75ce08_0
   subdir: linux-64
   variants:
@@ -18734,13 +20626,12 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
-  build: py314h3be3d12_0
-  subdir: win-64
+  version: 0.24.0
+  build: py314h59f3c06_0
+  subdir: linux-64
   variants:
-    cxx_compiler: vs2022
     python: 3.14.*
-    target_platform: win-64
+    target_platform: linux-64
   depends:
   - python
   - packaging
@@ -18750,20 +20641,19 @@ packages:
   - cuda-python >=12.9,<14
   - cuda-pathfinder >=1.3.1,<2
   - cuda-cudart
-  - vc >=14.3,<15
-  - vc14_runtime >=14.44.35208
-  - ucrt >=10.0.20348.0
+  - libstdcxx >=15
+  - libgcc >=15
   - python_abi 3.14.* *_cp314
   - numpy >=1.23,<3
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
-  build: py314h59f3c06_0
-  subdir: linux-64
+  version: 0.24.0
+  build: py314h625260f_0
+  subdir: win-64
   variants:
     python: 3.14.*
-    target_platform: linux-64
+    target_platform: win-64
   depends:
   - python
   - packaging
@@ -18773,14 +20663,14 @@ packages:
   - cuda-python >=12.9,<14
   - cuda-pathfinder >=1.3.1,<2
   - cuda-cudart
-  - libstdcxx >=15
-  - libgcc >=15
+  - vc >=14.1,<15
+  - vc14_runtime >=14.16.27033
   - python_abi 3.14.* *_cp314
   - numpy >=1.23,<3
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
+  version: 0.24.0
   build: py314ha479ada_0
   subdir: linux-aarch64
   variants:
@@ -18800,21 +20690,6 @@ packages:
   - python_abi 3.14.* *_cp314
   - numpy >=1.23,<3
   license: BSD-2-Clause
-- pypi: https://files.pythonhosted.org/packages/10/a7/cfbe475c35371cae1358e61f20c5f075badc18c4797ab4354140e1d283cf/numpy-2.4.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
-  name: numpy
-  version: 2.4.1
-  sha256: 52b5f61bdb323b566b528899cc7db2ba5d1015bda7ea811a8bcf3c89c331fa42
-  requires_python: '>=3.11'
-- pypi: https://files.pythonhosted.org/packages/7e/bb/c6513edcce5a831810e2dddc0d3452ce84d208af92405a0c2e58fd8e7881/numpy-2.4.1-cp314-cp314-win_amd64.whl
-  name: numpy
-  version: 2.4.1
-  sha256: 7d5d7999df434a038d75a748275cd6c0094b0ecdb0837342b332a82defc4dc4d
-  requires_python: '>=3.11'
-- pypi: https://files.pythonhosted.org/packages/c0/c4/2e7908915c0e32ca636b92e4e4a3bdec4cb1e7eb0f8aedf1ed3c68a0d8cd/numpy-2.4.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl
-  name: numpy
-  version: 2.4.1
-  sha256: 5d558123217a83b2d1ba316b986e9248a1ed1971ad495963d555ccd75dcb1556
-  requires_python: '>=3.11'
 - conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.2.6-py310hefbff90_0.conda
   sha256: 0ba94a61f91d67413e60fa8daa85627a8f299b5054b0eff8f93d26da83ec755e
   md5: b0cea2c364bf65cd19e023040eeab05d
@@ -18915,6 +20790,25 @@ packages:
   - pkg:pypi/numpy?source=hash-mapping
   size: 8983076
   timestamp: 1766383421113
+- conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.4.2-py314h2b28147_0.conda
+  sha256: 80a1929df6252fe9a32f383c50f9ad18c38377843580902f9fb2906cd552ece1
+  md5: e5ff0b238f18fd2a2aca8ca068794df6
+  depends:
+  - python
+  - libgcc >=14
+  - libstdcxx >=14
+  - __glibc >=2.17,<3.0.a0
+  - python_abi 3.14.* *_cp314
+  - liblapack >=3.9.0,<4.0a0
+  - libblas >=3.9.0,<4.0a0
+  - libcblas >=3.9.0,<4.0a0
+  constrains:
+  - numpy-base <0a0
+  license: BSD-3-Clause
+  purls:
+  - pkg:pypi/numpy?source=hash-mapping
+  size: 8927060
+  timestamp: 1770020140979
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-2.2.6-py310h6e5608f_0.conda
   sha256: d7234b9c45e4863c7d4c5221c1e91d69b0e0009464bf361c3fea47e64dc4adc2
   md5: 9e9f1f279eb02c41bda162a42861adc0
@@ -19015,6 +20909,25 @@ packages:
   - pkg:pypi/numpy?source=hash-mapping
   size: 7815157
   timestamp: 1766383452981
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-2.4.2-py314haac167e_0.conda
+  sha256: 65206193d2f348a715ca190580bbca1917dcf1f3a2a56124584636b7ed6c3caa
+  md5: 02e3507a8fa2f34d5e44004d5e0f6423
+  depends:
+  - python
+  - libgcc >=14
+  - libstdcxx >=14
+  - python 3.14.* *_cp314
+  - libcblas >=3.9.0,<4.0a0
+  - python_abi 3.14.* *_cp314
+  - libblas >=3.9.0,<4.0a0
+  - liblapack >=3.9.0,<4.0a0
+  constrains:
+  - numpy-base <0a0
+  license: BSD-3-Clause
+  purls:
+  - pkg:pypi/numpy?source=hash-mapping
+  size: 8006748
+  timestamp: 1770020188593
 - conda: https://conda.anaconda.org/conda-forge/win-64/numpy-2.2.6-py310h4987827_0.conda
   sha256: 6f628e51763b86a535a723664e3aa1e38cb7147a2697f80b75c1980c1ed52f3e
   md5: d2596785ac2cf5bab04e2ee9e5d04041
@@ -19115,6 +21028,25 @@ packages:
   - pkg:pypi/numpy?source=compressed-mapping
   size: 7584934
   timestamp: 1766383321713
+- conda: https://conda.anaconda.org/conda-forge/win-64/numpy-2.4.2-py314h06c3c77_0.conda
+  sha256: a0e6059c41fd1fb6b991ba965cb6bee7514da40c1664ecddfeb09516f926e281
+  md5: a5691a20b4523bacc0947b3051f3fddb
+  depends:
+  - python
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - liblapack >=3.9.0,<4.0a0
+  - libblas >=3.9.0,<4.0a0
+  - python_abi 3.14.* *_cp314
+  - libcblas >=3.9.0,<4.0a0
+  constrains:
+  - numpy-base <0a0
+  license: BSD-3-Clause
+  purls:
+  - pkg:pypi/numpy?source=hash-mapping
+  size: 7309039
+  timestamp: 1770020044956
 - conda: https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.10.0-pyhcf101f3_0.conda
   sha256: 482d94fce136c4352b18c6397b9faf0a3149bfb12499ab1ffebad8db0cb6678f
   md5: 3aa4b625f20f55cf68e92df5e5bf3c39
@@ -19379,20 +21311,20 @@ packages:
   - pkg:pypi/psutil?source=hash-mapping
   size: 228170
   timestamp: 1767012382363
-- conda: https://conda.anaconda.org/conda-forge/linux-64/psutil-7.2.1-py314h3f2afee_0.conda
-  sha256: 3571148467c49837027099ec5c4bbb5473202917d66279a317f05896bd7586e7
-  md5: b2911c190fb8f5eb62be3a60adb105db
+- conda: https://conda.anaconda.org/conda-forge/linux-64/psutil-7.2.2-py314h0f05182_0.conda
+  sha256: f15574ed6c8c8ed8c15a0c5a00102b1efe8b867c0bd286b498cd98d95bd69ae5
+  md5: 4f225a966cfee267a79c5cb6382bd121
   depends:
   - python
   - libgcc >=14
   - __glibc >=2.17,<3.0.a0
-  - python_abi 3.14.* *_cp314t
+  - python_abi 3.14.* *_cp314
   license: BSD-3-Clause
   license_family: BSD
   purls:
-  - pkg:pypi/psutil?source=hash-mapping
-  size: 228971
-  timestamp: 1767012384426
+  - pkg:pypi/psutil?source=compressed-mapping
+  size: 231303
+  timestamp: 1769678156552
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/psutil-7.2.1-py310hef25091_0.conda
   sha256: d6deeea23c1c40be15d8ac4171f00ebac2a2028bb09152151ec3d0a479018f31
   md5: 6d96240ee0dcba494ab8ed1b8517bdf5
@@ -19890,9 +21822,10 @@ packages:
   size: 36790521
   timestamp: 1765021515427
   python_site_packages_path: lib/python3.14/site-packages
-- conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.2-he1279bd_0_cp314t.conda
-  sha256: 79a4be7901d977858bdf1b0024b30360d8448e30fe38bece903f855b21b88cf6
-  md5: 08a2a24f4e6907bea0ebfe22eecae6be
+- conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.2-h32b2ec7_101_cp314.conda
+  build_number: 101
+  sha256: 24719868a471dd94041aa9873c6f87adf3b86c07878ad4e242ac97228f9e6460
+  md5: 051f60a9d1e3aae7160d173aeb7029f8
   depends:
   - __glibc >=2.17,<3.0.a0
   - bzip2 >=1.0.8,<2.0a0
@@ -19900,25 +21833,23 @@ packages:
   - libexpat >=2.7.3,<3.0a0
   - libffi >=3.5.2,<3.6.0a0
   - libgcc >=14
-  - liblzma >=5.8.1,<6.0a0
+  - liblzma >=5.8.2,<6.0a0
   - libmpdec >=4.0.0,<5.0a0
-  - libsqlite >=3.51.1,<4.0a0
-  - libuuid >=2.41.2,<3.0a0
+  - libsqlite >=3.51.2,<4.0a0
+  - libuuid >=2.41.3,<3.0a0
   - libzlib >=1.3.1,<2.0a0
   - ncurses >=6.5,<7.0a0
   - openssl >=3.5.4,<4.0a0
-  - python_abi 3.14.* *_cp314t
-  - readline >=8.2,<9.0a0
+  - python_abi 3.14.* *_cp314
+  - readline >=8.3,<9.0a0
   - tk >=8.6.13,<8.7.0a0
   - tzdata
   - zstd >=1.5.7,<1.6.0a0
-  track_features:
-  - py_freethreading
   license: Python-2.0
   purls: []
-  size: 47658766
-  timestamp: 1765021403755
-  python_site_packages_path: lib/python3.14t/site-packages
+  size: 36833080
+  timestamp: 1769458770373
+  python_site_packages_path: lib/python3.14/site-packages
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/python-3.10.19-h28be5d3_2_cpython.conda
   build_number: 2
   sha256: 9bdbc749cd9ee99ae4d72116aad5140e908fdf1215a417375f5e351f96372c77
@@ -20226,17 +22157,6 @@ packages:
   purls: []
   size: 6989
   timestamp: 1752805904792
-- conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314t.conda
-  build_number: 8
-  sha256: d9ed2538fba61265a330ee1b1afe99a4bb23ace706172b9464546c7e01259d63
-  md5: 3251796e09870c978e0f69fa05e38fb6
-  constrains:
-  - python 3.14.* *_cp314t
-  license: BSD-3-Clause
-  license_family: BSD
-  purls: []
-  size: 7020
-  timestamp: 1752805919426
 - conda: https://conda.anaconda.org/conda-forge/linux-64/pytorch-2.9.1-cuda129_mkl_py312_h2ff76c1_302.conda
   sha256: 3f27e7b4da22d6a6c61f16f3bf82b4766e128d8339380eca1ecb769a85a975de
   md5: 062a64a99d83ebf707bc5ee5fa32ff50
diff --git a/pixi.toml b/pixi.toml
index e0da41671..656dea385 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -77,10 +77,7 @@ cffi = ">=1"
 pytest = ">=8,<9"
 pytest-xdist = ">=3.8"
 pytest-benchmark = ">=5.1"
-
-[feature.test-cupy.dependencies]
-# CuPy is not available for Python 3.14 yet
-cupy = { version = "*", python = "<3.14" }
+cupy = "*"
 
 [feature.test.pypi-dependencies]
 ml_dtypes = "*"
@@ -111,112 +108,114 @@ numpydoc = ">=1.9.0"
 [feature.docs.pypi-dependencies]
 nvidia-sphinx-theme = "*"
 
+[feature.test-cupy.dependencies]
+cupy = "*"
+
 [environments]
-default = { features = ["cu-13-1", "test", "test-cupy", "cu", "cu-13", "cu-rt", "nvvm", "py314"], solve-group = "default" }
+default = { features = ["cu-13-1", "test", "cu", "cu-13", "cu-rt", "nvvm", "py314"], solve-group = "default" }
 dev = { features = ["ruff"], no-default-feature = true }
 bench-against = { features = ["test"], no-default-feature = true }
 # CUDA 12
 cu-12-0-py310 = { features = [
     "cu-12-0",
     "test",
-    "test-cupy",
     "cu",
     "cu-12",
     "py310",
+    "test-cupy",
 ], solve-group = "cu-12-0-py310" }
 cu-12-0-py311 = { features = [
     "cu-12-0",
     "test",
-    "test-cupy",
     "cu",
     "cu-12",
     "py311",
+    "test-cupy",
 ], solve-group = "cu-12-0-py311" }
 cu-12-2-py311 = { features = [
     "cu-12-2",
     "test",
-    "test-cupy",
     "cu",
     "cu-12",
     "nvvm",
     "py311",
+    "test-cupy",
 ], solve-group = "cu-12-2-py311" }
 cu-12-8-py310 = { features = [
     "cu-12-8",
     "test",
-    "test-cupy",
     "cu",
     "cu-12",
     "cu-rt",
     "nvvm",
     "py310",
+    "test-cupy",
 ], solve-group = "cu-12-8-py310" }
 cu-12-8-py311 = { features = [
     "cu-12-8",
     "test",
-    "test-cupy",
     "cu",
     "cu-12",
     "cu-rt",
     "nvvm",
     "py311",
+    "test-cupy",
 ], solve-group = "cu-12-8-py311" }
 cu-12-8-py312 = { features = [
     "cu-12-8",
     "test",
-    "test-cupy",
     "cu",
     "cu-12",
     "cu-rt",
     "nvvm",
     "py312",
+    "test-cupy",
 ], solve-group = "cu-12-8-py312" }
 cu-12-8-py313 = { features = [
     "cu-12-8",
     "test",
-    "test-cupy",
     "cu",
     "cu-12",
     "cu-rt",
     "nvvm",
     "py313",
+    "test-cupy",
 ], solve-group = "cu-12-8-py313" }
 cu-12-9-py312 = { features = [
     "cu-12-9",
     "test",
-    "test-cupy",
     "bench",
     "cu",
     "cu-12",
     "cu-rt",
     "nvvm",
     "py312",
+    "test-cupy",
 ], solve-group = "cu-12-9-py312" }
 # CUDA 13
 cu-13-0-py312 = { features = [
     "cu-13-0",
     "test",
-    "test-cupy",
     "cu",
     "cu-13",
     "cu-rt",
     "nvvm",
     "py312",
+    "test-cupy",
 ], solve-group = "cu-13-0-py312" }
 cu-13-0-py313 = { features = [
     "cu-13-0",
     "test",
-    "test-cupy",
     "cu",
     "cu-13",
     "cu-rt",
     "nvvm",
     "py313",
+    "test-cupy",
 ], solve-group = "cu-13-0-py313" }
 cu-13-0-py314 = { features = [
     "cu-13-0",
     "test",
-    "test-cupy",
     "cu",
     "cu-13",
     "cu-rt",
@@ -226,7 +225,6 @@ cu-13-0-py314 = { features = [
 cu-13-1-py314 = { features = [
     "cu-13-1",
     "test",
-    "test-cupy",
     "cu",
     "cu-13",
     "cu-rt",
@@ -299,7 +297,7 @@ NUMBA_ENABLE_CUDASIM = "1"
 
 [package]
 name = "numba-cuda"
-version = "0.26.0"
+version = "0.24.0"
 
 [package.build]
 backend = { name = "pixi-build-python", version = "*" }

From cf612cf664096ef3ad61f364c70b06b89796dc56 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 2 Feb 2026 07:07:02 -0800
Subject: [PATCH 47/60] fix

---
 numba_cuda/numba/cuda/tests/cudapy/test_matmul.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
index 157d16d26..013bb55df 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
@@ -52,15 +52,15 @@ def cu_square_matrix_mul(A, B, C):
 
             cuda.syncthreads()
 
-        if x < n and y < n:
-            C[y, x] = acc
+            if x < n and y < n:
+                C[y, x] = acc
 
     np.random.seed(42)
     A = np.array(np.random.random((n, n)), dtype=np.float32)
     B = np.array(np.random.random((n, n)), dtype=np.float32)
     C = np.empty_like(A)
 
-    stream = cp.cuda.stream()
+    stream = cp.cuda.Stream()
     nb_stream = cuda.api.external_stream(stream.ptr)
     with stream:
         dA = cp.asarray(A, stream)

From 61b9719ea5e8dadab687170511a49e6e98e5de47 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 2 Feb 2026 07:12:39 -0800
Subject: [PATCH 48/60] another fix

---
 numba_cuda/numba/cuda/tests/cudapy/test_matmul.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
index 013bb55df..c7e69d479 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
@@ -63,9 +63,9 @@ def cu_square_matrix_mul(A, B, C):
     stream = cp.cuda.Stream()
     nb_stream = cuda.api.external_stream(stream.ptr)
     with stream:
-        dA = cp.asarray(A, stream)
-        dB = cp.asarray(B, stream)
-        dC = cp.asarray(C, stream)
+        dA = cp.asarray(A)
+        dB = cp.asarray(B)
+        dC = cp.asarray(C)
 
     cu_square_matrix_mul[(bpg, bpg), (tpb, tpb), nb_stream](dA, dB, dC)
     with stream:

From 3d17c0ea79ec99d883848e11850f33a5b0b472d0 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 2 Feb 2026 07:55:18 -0800
Subject: [PATCH 49/60] correct device array

---
 numba_cuda/numba/cuda/tests/cudapy/test_matmul.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
index c7e69d479..c249f38bd 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
@@ -69,7 +69,7 @@ def cu_square_matrix_mul(A, B, C):
 
     cu_square_matrix_mul[(bpg, bpg), (tpb, tpb), nb_stream](dA, dB, dC)
     with stream:
-        c = dC.get() if not config.ENABLE_CUDASIM else dC
+        C = dC.get() if not config.ENABLE_CUDASIM else dC
 
     # Host compute
     Cans = np.dot(A, B)

From 83c28b0739645dc5525d2ca23ef3f3d0fd1d1972 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 2 Feb 2026 08:25:08 -0800
Subject: [PATCH 50/60] change cupy import pattern

---
 numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py   | 5 ++++-
 numba_cuda/numba/cuda/tests/cudadrv/test_events.py          | 5 ++++-
 numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py       | 6 +++++-
 numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py        | 6 +++++-
 numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py    | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py | 6 +++++-
 numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py         | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py     | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_caching.py          | 5 ++++-
 .../numba/cuda/tests/cudapy/test_cuda_array_interface.py    | 6 +++++-
 .../numba/cuda/tests/cudapy/test_cuda_jit_no_types.py       | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py        | 6 +++++-
 .../numba/cuda/tests/cudapy/test_device_array_capture.py    | 6 +++++-
 numba_cuda/numba/cuda/tests/cudapy/test_device_func.py      | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py       | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py           | 6 +++++-
 numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py    | 6 +++++-
 numba_cuda/numba/cuda/tests/cudapy/test_idiv.py             | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_laplace.py          | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_matmul.py           | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py         | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py     | 6 +++++-
 numba_cuda/numba/cuda/tests/cudapy/test_nondet.py           | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_reduction.py        | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_sm.py               | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_ssa.py              | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py        | 6 +++++-
 numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py  | 5 ++++-
 .../numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py    | 6 +++++-
 numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py   | 6 +++++-
 .../numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py    | 6 +++++-
 numba_cuda/numba/cuda/tests/doc_examples/test_globals.py    | 6 +++++-
 numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py    | 6 +++++-
 numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py     | 6 +++++-
 numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py | 6 +++++-
 numba_cuda/numba/cuda/tests/doc_examples/test_random.py     | 6 +++++-
 numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py  | 6 +++++-
 numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py | 6 +++++-
 numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py     | 6 +++++-
 numba_cuda/numba/cuda/tests/nrt/test_nrt.py                 | 5 ++++-
 40 files changed, 181 insertions(+), 40 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py b/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
index 1bb41b6c8..6c083821a 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
@@ -12,7 +12,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 
 class TestContextStack(CUDATestCase):
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
index 36a2be623..963317a03 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
@@ -11,7 +11,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 
 class TestCudaEvent(CUDATestCase):
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py b/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
index 3a2576d46..2a243ca73 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
@@ -11,7 +11,11 @@
 
 from numba import cuda
 from numba.cuda import config
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 import os
 import io
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py b/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py
index 9a745cfdf..4d44927ff 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py
@@ -6,7 +6,11 @@
 from numba import cuda
 from numba.cuda.testing import skip_on_cudasim
 from numba.cuda.testing import skip_if_cupy_unavailable
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 @skip_on_cudasim("CUDA Profiler unsupported in the simulator")
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py b/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
index c04b10e27..03a25d24e 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
@@ -10,7 +10,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 
 def reinterpret_array_type(byte_arr, start, stop, output):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
index f2d116615..81f70dd40 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
@@ -11,7 +11,11 @@
 )
 from numba.cuda.misc.special import literal_unroll
 from numba.cuda import config
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 @skip_on_cudasim("doesn't work in the simulator")
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py b/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
index 8c1512594..f5c866b88 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
@@ -20,7 +20,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 if not config.ENABLE_CUDASIM:
     from numba.cuda.bf16 import (
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
index 5baa8b8c6..f78abfbab 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
@@ -10,7 +10,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 RISKFREE = 0.02
 VOLATILITY = 0.30
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_caching.py b/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
index dc6a720a9..7a61bd26f 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
@@ -32,7 +32,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 from pickle import PicklingError
 
 # Module-level global for testing that caching rejects global device arrays
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
index 2fbe2c161..dc06765b6 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
@@ -17,7 +17,11 @@
 )
 from numba.cuda.tests.support import linux_only, override_config
 from unittest.mock import call, patch
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 import pytest
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
index 17fde5ca9..20339010a 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
@@ -11,7 +11,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 
 class TestCudaJitNoTypes(CUDATestCase):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
index aa39ca97c..045563134 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
@@ -22,7 +22,11 @@
 from numba.cuda.tests.support import ignore_internal_warnings
 import numpy as np
 import inspect
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 @skip_on_cudasim("Simulator does not produce debug dumps")
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py
index e85ab2a6e..3bb82e100 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py
@@ -16,7 +16,11 @@
 from numba.cuda.testing import unittest, CUDATestCase, ForeignArray
 from numba.cuda.testing import skip_on_cudasim
 from numba.cuda.testing import skip_if_cupy_unavailable
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 def make_numba_array(host_arr):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
index a2758b33f..d807d994e 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
@@ -28,7 +28,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 
 class TestDeviceFunc(CUDATestCase):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py b/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
index 51b00db9d..4e48c4869 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
@@ -29,7 +29,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 
 def add(x, y):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
index f1d8f3f0d..23aee6869 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
@@ -17,7 +17,11 @@
 import unittest
 from numba.cuda.core.errors import NumbaPerformanceWarning, TypingError
 from numba.cuda.tests.support import override_config
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 def _get_matmulcore_gufunc(dtype=float32):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
index 9d3f7a35d..26d8f4de7 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
@@ -16,9 +16,13 @@
     skip_if_cupy_unavailable,
 )
 import unittest
-import cupy as cp
 import pytest
 
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
+
 
 @skip_on_cudasim("ufunc API unsupported in the simulator")
 class TestGUFuncScalar(CUDATestCase):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py b/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
index 4a67a624f..263591e90 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
@@ -9,7 +9,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 
 class TestCudaIDiv(CUDATestCase):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py b/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
index f484cc388..46b9493b2 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
@@ -11,7 +11,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 # NOTE: CUDA kernel does not return any value
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
index c249f38bd..9b739d7eb 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
@@ -10,7 +10,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 # Ensure the test takes a reasonable amount of time in the simulator
 if config.ENABLE_CUDASIM:
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py b/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py
index 6f298405d..35976ff6a 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py
@@ -15,7 +15,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 
 class TestMultiGPUContext(CUDATestCase):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
index 15c03558c..7c83bf2bd 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
@@ -12,7 +12,11 @@
     DeprecatedDeviceArrayApiTest,
 )
 import unittest
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py b/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
index 4830f8b18..df291a215 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
@@ -9,7 +9,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 
 def generate_input(n):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py b/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py
index a2b06827f..2add394e5 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py
@@ -11,7 +11,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 # Avoid recompilation of the sum_reduce function by keeping it at global scope
 sum_reduce = cuda.Reduce(lambda a, b: a + b)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
index 8492453f5..a6f177db7 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
@@ -24,7 +24,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 from .extensions_usecases import struct_model_type, MyStruct
 import pytest
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py b/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
index cdc100503..5fa03deda 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
@@ -26,7 +26,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 _DEBUG = False
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
index 812d31d2f..569cc1db4 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
@@ -14,8 +14,12 @@
 from numba.cuda.testing import CUDATestCase
 from numba.cuda.testing import skip_if_cupy_unavailable
 import unittest
-import cupy as cp
 import pytest
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 from numba.cuda.testing import DeprecatedDeviceArrayApiWarning
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
index 4be2bb254..356a82515 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
@@ -16,7 +16,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 import unittest
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py
index 0204bf24b..b6c53731f 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py
@@ -10,7 +10,11 @@
     CUDATestCase,
 )
 import unittest
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 sig = [float64(float64, float64)]
 
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py b/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py
index 2695ef746..3370a01e3 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py
@@ -9,7 +9,11 @@
     skip_on_cudasim,
 )
 from numba.cuda.tests.support import captured_stdout
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py b/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
index e253c84fa..4dcbfe23a 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
@@ -10,7 +10,11 @@
 )
 from numba.cuda.tests.support import captured_stdout
 import numpy as np
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py b/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
index c6ee1776f..6b50aac9f 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
@@ -9,7 +9,11 @@
     skip_on_cudasim,
 )
 from numba.cuda.tests.support import captured_stdout
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py b/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
index 73c963a6a..cec938f22 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
@@ -10,7 +10,11 @@
     skip_unless_cc_60,
 )
 from numba.cuda.tests.support import captured_stdout
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 @skip_if_cudadevrt_missing
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py b/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
index 0d19800a2..ab152074c 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
@@ -18,7 +18,11 @@
     skip_on_cudasim,
 )
 from numba.cuda.tests.support import captured_stdout
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py b/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py
index 3a360dc5c..d7ddb9f27 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py
@@ -10,7 +10,11 @@
     skip_on_standalone_numba_cuda,
 )
 from numba.cuda.tests.support import captured_stdout
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_random.py b/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
index 13be2930f..a4bf925a4 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
@@ -7,7 +7,11 @@
 import unittest
 
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py b/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
index 7a09cb1d3..8e54ae717 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
@@ -9,7 +9,11 @@
     skip_on_cudasim,
 )
 from numba.cuda.tests.support import captured_stdout
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py b/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py
index 79fab2248..1b165b3f5 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py
@@ -11,7 +11,11 @@
     skip_unless_cc_60,
 )
 from numba.cuda.tests.support import captured_stdout
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 @skip_if_cudadevrt_missing
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py b/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
index ddfa49336..4ba4399a4 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
@@ -5,7 +5,11 @@
 
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
 from numba.cuda.tests.support import captured_stdout
-import cupy as cp
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
diff --git a/numba_cuda/numba/cuda/tests/nrt/test_nrt.py b/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
index 0b4a74ee2..0d2011f8e 100644
--- a/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
+++ b/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
@@ -31,7 +31,10 @@
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    import cupy as cp
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
 

From d3394fd75db0435409961f34ffe0bc06284de7f4 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 2 Feb 2026 08:49:01 -0800
Subject: [PATCH 51/60] fix several CI issues

---
 numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py      | 1 +
 numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py | 6 ++++++
 testing/pytest.ini                                          | 1 +
 3 files changed, 8 insertions(+)

diff --git a/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py b/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
index a796af0eb..46e9a3a79 100644
--- a/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
@@ -10,6 +10,7 @@
 from numba.cuda.np.numpy_support import numpy_version
 from numba.cuda.np import numpy_support
 from numba.cuda import types
+import warnings
 
 import numpy as np
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
index 81f70dd40..8ee9c385f 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
@@ -146,6 +146,9 @@ def kernel(out):
         self.assertPreciseEqual(expected, out.get())
 
     @skip_if_cupy_unavailable
+    @skip_on_nvjitlink_13_1_sm_120(
+        "var fails at link time on sm_120 + CUDA 13.1"
+    )
     def test_var_basic(self):
         arrays = (
             np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
@@ -170,6 +173,9 @@ def kernel(out):
         self.assertPreciseEqual(expected, out.get(), prec="double")
 
     @skip_if_cupy_unavailable
+    @skip_on_nvjitlink_13_1_sm_120(
+        "std fails at link time on sm_120 + CUDA 13.1"
+    )
     def test_std_basic(self):
         arrays = (
             np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
diff --git a/testing/pytest.ini b/testing/pytest.ini
index 5815ecb45..cbdb7defe 100644
--- a/testing/pytest.ini
+++ b/testing/pytest.ini
@@ -26,4 +26,5 @@ filterwarnings =
     ignore:overflow encountered in scalar .+:RuntimeWarning
     ignore:.*Host array used in CUDA kernel will incur copy overhead.*:numba.cuda.core.errors.NumbaPerformanceWarning
     ignore:NVRTC log messages.*Architectures prior to.*are deprecated.*:UserWarning
+    ignore:CUDA path could not be detected.*:UserWarning:cupy._environment
     ignore:Benchmark machine_info is different:pytest_benchmark.logger.PytestBenchmarkWarning

From e3c1c0996adc4bfa5df074e5501d12452cbcaed4 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 2 Feb 2026 09:09:05 -0800
Subject: [PATCH 52/60] tweak simulator warnings

---
 numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py       | 4 ----
 numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py | 5 +----
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py b/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
index 46e9a3a79..fcbde51f8 100644
--- a/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
@@ -360,10 +360,6 @@ def check_array_compatibility(ary1, ary2):
 
 
 def to_device(ary, stream=0, copy=True, to=None):
-    warnings.warn(
-        "to_device is deprecated. Please prefer cupy for array functions",
-        DeprecatedDeviceArrayApiWarning,
-    )
     ary = np.array(
         ary, copy=False if numpy_version < (2, 0) else None, subok=True
     )
diff --git a/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py b/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
index e492e594e..79b70a577 100644
--- a/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
+++ b/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
@@ -15,10 +15,7 @@
     reason="no reason to run benchmarks in the simulator",
 )
 
-if not config.ENABLE_CUDASIM:
-    with pytest.warns(DeprecatedDeviceArrayApiWarning):
-        devary_arg = cuda.device_array(128, dtype=np.float32)
-else:
+with pytest.warns(DeprecatedDeviceArrayApiWarning):
     devary_arg = cuda.device_array(128, dtype=np.float32)
 
 

From 75fcce98f69211a972491fae16976073cd44b0dc Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 2 Feb 2026 09:14:37 -0800
Subject: [PATCH 53/60] conditional cupy import

---
 numba_cuda/numba/cuda/tests/cudapy/test_matmul.py          | 6 ++----
 .../numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py   | 5 +----
 numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py   | 5 +----
 numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py    | 7 +++----
 numba_cuda/numba/cuda/tests/doc_examples/test_random.py    | 6 ++----
 numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py | 6 ++----
 numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py    | 6 ++----
 7 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
index 9b739d7eb..9c82fce7f 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
@@ -6,14 +6,12 @@
 from numba import cuda
 from numba.cuda import float32, void
 from numba.cuda.core import config
+import pytest
 
 if config.ENABLE_CUDASIM:
     import numpy as cp
 else:
-    try:
-        import cupy as cp
-    except ImportError:
-        cp = None
+    cp = pytest.importorskip("cupy")
 
 # Ensure the test takes a reasonable amount of time in the simulator
 if config.ENABLE_CUDASIM:
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py b/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
index 4dcbfe23a..dfa6c8178 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
@@ -11,10 +11,7 @@
 from numba.cuda.tests.support import captured_stdout
 import numpy as np
 
-try:
-    import cupy as cp
-except ImportError:
-    cp = None
+cp = pytest.importorskip("cupy")
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py b/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
index cec938f22..e9d7a1829 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
@@ -11,10 +11,7 @@
 )
 from numba.cuda.tests.support import captured_stdout
 
-try:
-    import cupy as cp
-except ImportError:
-    cp = None
+cp = pytest.importorskip("cupy")
 
 
 @skip_if_cudadevrt_missing
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py b/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
index ab152074c..91f6ab98a 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
@@ -18,11 +18,10 @@
     skip_on_cudasim,
 )
 from numba.cuda.tests.support import captured_stdout
+import pytest
 
-try:
-    import cupy as cp
-except ImportError:
-    cp = None
+
+cupy = pytest.importorskip("cupy")
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_random.py b/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
index a4bf925a4..f34553fe6 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
@@ -7,11 +7,9 @@
 import unittest
 
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+import pytest
 
-try:
-    import cupy as cp
-except ImportError:
-    cp = None
+cp = pytest.importorskip("cupy")
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py b/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
index 8e54ae717..024d5713a 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
@@ -9,11 +9,9 @@
     skip_on_cudasim,
 )
 from numba.cuda.tests.support import captured_stdout
+import pytest
 
-try:
-    import cupy as cp
-except ImportError:
-    cp = None
+cp = pytest.importorskip("cupy")
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py b/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
index 4ba4399a4..1dd5aad85 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
@@ -5,11 +5,9 @@
 
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
 from numba.cuda.tests.support import captured_stdout
+import pytest
 
-try:
-    import cupy as cp
-except ImportError:
-    cp = None
+cp = pytest.importorskip("cupy")
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")

From a0f7dcce6d04a1a15ef50f9352d79f68374b1e20 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 2 Feb 2026 09:59:18 -0800
Subject: [PATCH 54/60] more simulator fixups

---
 numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py | 5 +++--
 numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py      | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py | 5 ++++-
 numba_cuda/numba/cuda/tests/cudapy/test_matmul.py            | 3 ++-
 .../numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py     | 1 +
 numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py     | 1 +
 numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py      | 2 +-
 7 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py b/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
index 79b70a577..6852d01e4 100644
--- a/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
+++ b/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
@@ -15,8 +15,9 @@
     reason="no reason to run benchmarks in the simulator",
 )
 
-with pytest.warns(DeprecatedDeviceArrayApiWarning):
-    devary_arg = cuda.device_array(128, dtype=np.float32)
+if not config.ENABLE_CUDASIM:
+    with pytest.warns(DeprecatedDeviceArrayApiWarning):
+        devary_arg = cuda.device_array(128, dtype=np.float32)
 
 
 @pytest.mark.parametrize(
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
index f78abfbab..ae39dc28b 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
@@ -6,6 +6,7 @@
 from numba import cuda
 from numba.cuda import config, double, void
 from numba.cuda.testing import unittest, CUDATestCase, skip_if_cupy_unavailable
+from contextlib import nullcontext
 
 if config.ENABLE_CUDASIM:
     import numpy as cp
@@ -135,7 +136,9 @@ def black_scholes_cuda(callResult, putResult, S, X, T, R, V):
         # numba
         blockdim = 512, 1
         griddim = int(math.ceil(float(OPT_N) / blockdim[0])), 1
-        stream = cp.cuda.Stream()
+        stream = (
+            cp.cuda.Stream() if not config.ENABLE_CUDASIM else nullcontext()
+        )
         nb_stream = cuda.api.external_stream(stream.ptr)
 
         with stream:
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
index 20339010a..daf0880a5 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
@@ -7,6 +7,7 @@
 from numba.cuda.testing import CUDATestCase, skip_if_cupy_unavailable
 from numba.cuda.tests.support import override_config
 import unittest
+from contextlib import nullcontext
 
 if config.ENABLE_CUDASIM:
     import numpy as cp
@@ -81,7 +82,9 @@ def outer(argin, argout):
         a = np.zeros(1)
         b = np.zeros(1)
 
-        stream = cp.cuda.Stream()
+        stream = (
+            cp.cuda.Stream() if not config.ENABLE_CUDASIM else nullcontext()
+        )
         nb_stream = cuda.api.external_stream(stream.ptr)
         with stream:
             d_a = cp.asarray(a)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
index 9c82fce7f..a7a90f183 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
@@ -7,6 +7,7 @@
 from numba.cuda import float32, void
 from numba.cuda.core import config
 import pytest
+from contextlib import nullcontext
 
 if config.ENABLE_CUDASIM:
     import numpy as cp
@@ -61,7 +62,7 @@ def cu_square_matrix_mul(A, B, C):
     B = np.array(np.random.random((n, n)), dtype=np.float32)
     C = np.empty_like(A)
 
-    stream = cp.cuda.Stream()
+    stream = cp.cuda.Stream() if not config.ENABLE_CUDASIM else nullcontext()
     nb_stream = cuda.api.external_stream(stream.ptr)
     with stream:
         dA = cp.asarray(A)
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py b/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
index dfa6c8178..22ce0dbb2 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
@@ -10,6 +10,7 @@
 )
 from numba.cuda.tests.support import captured_stdout
 import numpy as np
+import pytest
 
 cp = pytest.importorskip("cupy")
 
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py b/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
index e9d7a1829..be0517bff 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
@@ -10,6 +10,7 @@
     skip_unless_cc_60,
 )
 from numba.cuda.tests.support import captured_stdout
+import pytest
 
 cp = pytest.importorskip("cupy")
 
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py b/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
index 91f6ab98a..3b0ad46ff 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
@@ -21,7 +21,7 @@
 import pytest
 
 
-cupy = pytest.importorskip("cupy")
+cp = pytest.importorskip("cupy")
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")

From 78b8d9c0b19de7ba101dbc7b36fc34b6f1d777b9 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 2 Feb 2026 10:30:27 -0800
Subject: [PATCH 55/60] few more test fixes

---
 numba_cuda/numba/cuda/tests/cudadrv/test_events.py | 3 ++-
 numba_cuda/numba/cuda/tests/cudapy/test_idiv.py    | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
index 963317a03..fc63559c0 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
@@ -6,7 +6,7 @@
 from numba.cuda import config
 from numba.cuda.testing import unittest, CUDATestCase
 from numba.cuda._compat import Device
-from numba.cuda.testing import skip_on_cudasim
+from numba.cuda.testing import skip_on_cudasim, skip_if_cupy_unavailable
 
 if config.ENABLE_CUDASIM:
     import numpy as cp
@@ -18,6 +18,7 @@
 
 
 class TestCudaEvent(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_event_elapsed(self):
         N = 32
         evtstart = cuda.event()
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py b/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
index 263591e90..a0fe365a8 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
@@ -16,6 +16,7 @@
 
 
 class TestCudaIDiv(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_inplace_div(self):
         @cuda.jit(void(float32[:, :], int32, int32))
         def div(grid, l_x, l_y):

From 79e8d1e2eb8138e3aff319f7b6670e37eb8233bf Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 2 Feb 2026 11:12:27 -0800
Subject: [PATCH 56/60] fix stream behavior in sim

---
 numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py | 9 +++++++--
 numba_cuda/numba/cuda/tests/cudapy/test_matmul.py       | 6 +++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
index ae39dc28b..cf31585ff 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
@@ -139,7 +139,11 @@ def black_scholes_cuda(callResult, putResult, S, X, T, R, V):
         stream = (
             cp.cuda.Stream() if not config.ENABLE_CUDASIM else nullcontext()
         )
-        nb_stream = cuda.api.external_stream(stream.ptr)
+        nb_stream = (
+            cuda.api.external_stream(stream.ptr)
+            if not config.ENABLE_CUDASIM
+            else cuda.stream()
+        )
 
         with stream:
             d_callResult = cp.asarray(callResultNumba)
@@ -169,7 +173,8 @@ def black_scholes_cuda(callResult, putResult, S, X, T, R, V):
                 d_putResult.get() if not config.ENABLE_CUDASIM else d_putResult
             )
 
-        stream.synchronize()
+        if not config.ENABLE_CUDASIM:
+            stream.synchronize()
 
         delta = np.abs(callResultNumpy - callResultNumba)
         L1norm = delta.sum() / np.abs(callResultNumpy).sum()
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
index a7a90f183..36ffda2e7 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
@@ -63,7 +63,11 @@ def cu_square_matrix_mul(A, B, C):
     C = np.empty_like(A)
 
     stream = cp.cuda.Stream() if not config.ENABLE_CUDASIM else nullcontext()
-    nb_stream = cuda.api.external_stream(stream.ptr)
+    nb_stream = (
+        cuda.api.external_stream(stream.ptr)
+        if not config.ENABLE_CUDASIM
+        else cuda.stream()
+    )
     with stream:
         dA = cp.asarray(A)
         dB = cp.asarray(B)

From f704f6f55b0dd0f91608e5e0719e0463843b67ae Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 2 Feb 2026 13:28:33 -0800
Subject: [PATCH 57/60] fix a few lingering test failures

---
 .../numba/cuda/tests/cudadrv/test_cuda_ndarray.py  | 14 +++++++++++---
 .../cuda/tests/cudapy/test_cuda_jit_no_types.py    |  6 +++++-
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py
index d945b0252..4b3c4bf25 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py
@@ -6,11 +6,19 @@
 from numba.cuda.cudadrv import devicearray
 from numba import cuda
 from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
-from numba.cuda.testing import skip_on_cudasim
+from numba.cuda.testing import skip_on_cudasim, skip_if_cupy_unavailable
 from numba.cuda.tests.support import IS_NUMPY_2
 
 import pytest
 
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
+
 
 class TestCudaNDArray(DeprecatedDeviceArrayApiTest):
     def test_device_array_interface(self):
@@ -618,11 +626,10 @@ def test_2d_view_f(self):
         view = np.zeros(shape, order="F")[::2, ::2]
         self._test_against_array_core(view)
 
+    @skip_if_cupy_unavailable
     def test_kernel_with_buffer(self):
         from cuda.core import Buffer
 
-        cp = pytest.importorskip("cupy")
-
         @cuda.jit
         def kernel(buf, n):
             n[0] = len(buf)
@@ -635,6 +642,7 @@ def kernel(buf, n):
         func(buf, out)
         assert out[0] == n
 
+    @skip_if_cupy_unavailable
     def test_kernel_with_strided_memory_view(self):
         from cuda.core.utils import StridedMemoryView
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
index daf0880a5..13440e1cc 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
@@ -85,7 +85,11 @@ def outer(argin, argout):
         stream = (
             cp.cuda.Stream() if not config.ENABLE_CUDASIM else nullcontext()
         )
-        nb_stream = cuda.api.external_stream(stream.ptr)
+        nb_stream = (
+            cuda.api.external_stream(stream.ptr)
+            if not config.ENABLE_CUDASIM
+            else cuda.stream()
+        )
         with stream:
             d_a = cp.asarray(a)
             d_b = cp.asarray(b)

From fdd36804a58a2118a7635b0873b10482c03f185b Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 3 Feb 2026 05:39:28 -0800
Subject: [PATCH 58/60] fix multithreaded tests in the simulator

---
 .../numba/cuda/tests/cudadrv/test_cuda_ndarray.py |  1 +
 .../cuda/tests/cudadrv/test_select_device.py      | 15 +++++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py
index 4b3c4bf25..9f2747b9f 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py
@@ -8,6 +8,7 @@
 from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 from numba.cuda.testing import skip_on_cudasim, skip_if_cupy_unavailable
 from numba.cuda.tests.support import IS_NUMPY_2
+from numba.cuda import config
 
 import pytest
 
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py b/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py
index 5b8f4164b..301369bfc 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py
@@ -9,15 +9,26 @@
 
 import numpy as np
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import (
+    unittest,
+    CUDATestCase,
+    DeprecatedDeviceArrayApiWarning,
+)
+import pytest
 
 
 def newthread(exception_queue):
     try:
+        from numba.cuda import config
+
         cuda.select_device(0)
         stream = cuda.stream()
         A = np.arange(100)
-        dA = cuda._api._to_device(A, stream=stream)
+        if not config.ENABLE_CUDASIM:
+            with pytest.warns(DeprecatedDeviceArrayApiWarning):
+                dA = cuda.to_device(A, stream=stream)
+        else:
+            dA = cuda.to_device(A, stream=stream)
         stream.synchronize()
         del dA
         del stream

From ee66e1fd9ca710441677f3c6dab68c9e46f13932 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 3 Feb 2026 06:36:56 -0800
Subject: [PATCH 59/60] simulator doesn't need cupy

---
 .github/workflows/simulator-test.yaml |  2 +-
 pixi.toml                             | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/simulator-test.yaml b/.github/workflows/simulator-test.yaml
index 5cb67a1f5..f415ce34f 100644
--- a/.github/workflows/simulator-test.yaml
+++ b/.github/workflows/simulator-test.yaml
@@ -102,7 +102,7 @@ jobs:
           set -euo pipefail
           CUDA_VER_PART="$(cut -d. -f1-2 <<< "${{ matrix.CUDA_VER }}" | tr . -)"
           PY_VER_PART="py$(tr -d . <<< "${{ matrix.PY_VER }}")"
-          PIXI_ENV="cu-${CUDA_VER_PART}-${PY_VER_PART}"
+          PIXI_ENV="sim-cu-${CUDA_VER_PART}-${PY_VER_PART}"
           echo "PIXI_ENV=${PIXI_ENV}" >> "${GITHUB_ENV}"
       - name: Setup pixi
         uses: prefix-dev/setup-pixi@fef5c9568ca6c4ff7707bf840ab0692ba3f08293  # v0.9.0
diff --git a/pixi.toml b/pixi.toml
index 656dea385..d67064179 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -111,6 +111,20 @@ nvidia-sphinx-theme = "*"
 [feature.test-cupy.dependencies]
 cupy = "*"
 
+[feature.test-sim.dependencies]
+# Simulator testing - same as test feature but without cupy
+make = "*"
+pre-commit = ">=4.3"
+psutil = ">=6"
+cffi = ">=1"
+pytest = ">=8,<9"
+pytest-xdist = ">=3.8"
+pytest-benchmark = ">=5.1"
+
+[feature.test-sim.pypi-dependencies]
+ml_dtypes = "*"
+filecheck = "*"
+
 [environments]
 default = { features = ["cu-13-1", "test", "cu", "cu-13", "cu-rt", "nvvm", "py314"], solve-group = "default" }
 dev = { features = ["ruff"], no-default-feature = true }
@@ -231,6 +245,16 @@ cu-13-1-py314 = { features = [
     "nvvm",
     "py314",
 ], solve-group = "cu-13-1-py314" }
+# Simulator (no CUDA required, no CuPy)
+sim-cu-12-8-py312 = { features = [
+    "cu-12-8",
+    "test-sim",
+    "cu",
+    "cu-12",
+    "cu-rt",
+    "nvvm",
+    "py312",
+], solve-group = "sim-cu-12-8-py312" }
 
 docs = { features = ["docs"], solve-group = "docs" }
 

From de75f7028f7dc3086b021ad7d4acb5de80530f03 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 3 Feb 2026 06:42:02 -0800
Subject: [PATCH 60/60] old skips

---
 numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py
index 9f2747b9f..b4f232de8 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py
@@ -627,10 +627,11 @@ def test_2d_view_f(self):
         view = np.zeros(shape, order="F")[::2, ::2]
         self._test_against_array_core(view)
 
-    @skip_if_cupy_unavailable
     def test_kernel_with_buffer(self):
         from cuda.core import Buffer
 
+        cp = pytest.importorskip("cupy")
+
         @cuda.jit
         def kernel(buf, n):
             n[0] = len(buf)
@@ -643,7 +644,6 @@ def kernel(buf, n):
         func(buf, out)
         assert out[0] == n
 
-    @skip_if_cupy_unavailable
     def test_kernel_with_strided_memory_view(self):
         from cuda.core.utils import StridedMemoryView