diff --git a/.github/workflows/simulator-test.yaml b/.github/workflows/simulator-test.yaml
index 5cb67a1f5..f415ce34f 100644
--- a/.github/workflows/simulator-test.yaml
+++ b/.github/workflows/simulator-test.yaml
@@ -102,7 +102,7 @@ jobs:
           set -euo pipefail
           CUDA_VER_PART="$(cut -d. -f1-2 <<< "${{ matrix.CUDA_VER }}" | tr . -)"
           PY_VER_PART="py$(tr -d . <<< "${{ matrix.PY_VER }}")"
-          PIXI_ENV="cu-${CUDA_VER_PART}-${PY_VER_PART}"
+          PIXI_ENV="sim-cu-${CUDA_VER_PART}-${PY_VER_PART}"
           echo "PIXI_ENV=${PIXI_ENV}" >> "${GITHUB_ENV}"
       - name: Setup pixi
         uses: prefix-dev/setup-pixi@fef5c9568ca6c4ff7707bf840ab0692ba3f08293  # v0.9.0
diff --git a/ci/coverage_report.sh b/ci/coverage_report.sh
index dce352e36..cad6ce976 100755
--- a/ci/coverage_report.sh
+++ b/ci/coverage_report.sh
@@ -5,6 +5,7 @@
 set -euo pipefail
 
 CUDA_VER_MAJOR_MINOR=${CUDA_VER%.*}
+CUDA_VER_MAJOR=${CUDA_VER%.*.*}
 
 rapids-logger "Install wheel with test dependencies and coverage tools"
 package=$(realpath "${NUMBA_CUDA_ARTIFACTS_DIR}"/*.whl)
@@ -13,6 +14,7 @@ python -m pip install \
     "${package}" \
     "cuda-python==${CUDA_VER_MAJOR_MINOR%.*}.*" \
     "cuda-core" \
+    "cupy-cuda${CUDA_VER_MAJOR}x" \
     pytest-cov \
     coverage \
     --group test
diff --git a/ci/tools/run-tests b/ci/tools/run-tests
index c936bf439..0762ca14e 100755
--- a/ci/tools/run-tests
+++ b/ci/tools/run-tests
@@ -9,8 +9,20 @@
 set -euo pipefail
 
 echo "Installing numba-cuda wheel with test dependencies"
+
+# Check Python version to determine if we should install cupy
+PYTHON_VERSION=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
+INSTALL_CUPY=true
+if [[ "${PYTHON_VERSION}" == "3.14" ]]; then
+  echo "Python 3.14 detected - skipping cupy installation (not yet available)"
+  INSTALL_CUPY=false
+fi
+
 if [[ "${LOCAL_CTK}" == 1 ]]; then
   pip install "${NUMBA_CUDA_ARTIFACTS_DIR}"/*.whl "cuda-bindings==${TEST_CUDA_MAJOR}.*" --group test
+  if [[ "${INSTALL_CUPY}" == "true" ]]; then
+    pip install "cupy-cuda${TEST_CUDA_MAJOR}x"
+  fi
 else
   pip install $(ls "${NUMBA_CUDA_ARTIFACTS_DIR}"/*.whl)["cu${TEST_CUDA_MAJOR}"] "cuda-toolkit==${TEST_CUDA_MAJOR}.${TEST_CUDA_MINOR}.*" --group "test-cu${TEST_CUDA_MAJOR}"
 fi
diff --git a/docs/source/user/ipc.rst b/docs/source/user/ipc.rst
index 904f1fd10..80acb0577 100644
--- a/docs/source/user/ipc.rst
+++ b/docs/source/user/ipc.rst
@@ -18,6 +18,10 @@ Sharing between processes is implemented using the Legacy CUDA IPC API
 Export device array to another process
 --------------------------------------
 
+.. note::
+  DeviceNDArray is deprecated. Prefer CuPy for array operations including
+  inter-process communication.
+
 A device array can be shared with another process in the same machine using
 the CUDA IPC API.  To do so, use the ``.get_ipc_handle()`` method on the device
 array to get a ``IpcArrayHandle`` object, which can be transferred to another
@@ -34,6 +38,10 @@ process.
 Import IPC memory from another process
 --------------------------------------
 
+.. note::
+  DeviceNDArray is deprecated. Prefer CuPy for array operations including
+  inter-process communication.
+
 The following function is used to open IPC handle from another process
 as a device array.
 
diff --git a/docs/source/user/memory.rst b/docs/source/user/memory.rst
index be4236619..3dd471745 100644
--- a/docs/source/user/memory.rst
+++ b/docs/source/user/memory.rst
@@ -11,6 +11,10 @@ Memory management
 Data transfer
 =============
 
+.. note::
+  DeviceNDArray is deprecated. Prefer CuPy for array operations including
+  memory transfers.
+
 Even though Numba can automatically transfer NumPy arrays to the device,
 it can only do so conservatively by always transferring device memory back to
 the host when a kernel finishes. To avoid the unnecessary transfer for
@@ -38,6 +42,9 @@ buffer using the following APIs:
 Device arrays
 -------------
 
+.. note::
+  DeviceNDArray is deprecated. Prefer CuPy for array operations.
+
 Device array references have the following methods.  These methods are to be
 called in host code, not within CUDA-jitted functions.
 
@@ -52,6 +59,11 @@ called in host code, not within CUDA-jitted functions.
 Pinned memory
 =============
 
+.. note::
+  DeviceNDArray is deprecated. Prefer CuPy for array operations including
+  allocating pinned memory
+
+
 .. autofunction:: numba.cuda.pinned
    :noindex:
 .. autofunction:: numba.cuda.pinned_array
@@ -63,6 +75,10 @@ Pinned memory
 Mapped memory
 =============
 
+.. note::
+  DeviceNDArray is deprecated. Prefer CuPy for array operations including
+  allocating mapped memory
+
 .. autofunction:: numba.cuda.mapped
    :noindex:
 .. autofunction:: numba.cuda.mapped_array
@@ -76,6 +92,10 @@ Mapped memory
 Managed memory
 ==============
 
+.. note::
+  DeviceNDArray is deprecated. Prefer CuPy for array operations including
+  allocating managed memory
+
 .. autofunction:: numba.cuda.managed_array
    :noindex:
 
diff --git a/numba_cuda/numba/cuda/_api.py b/numba_cuda/numba/cuda/_api.py
new file mode 100644
index 000000000..24284f56e
--- /dev/null
+++ b/numba_cuda/numba/cuda/_api.py
@@ -0,0 +1,355 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-2-Clause
+
+"""
+API that are reported to numba.cuda
+"""
+
+import contextlib
+
+import numpy as np
+from .cudadrv import devicearray, devices, driver
+from numba.cuda.core import config
+from numba.cuda.api_util import prepare_shape_strides_dtype
+from numba.cuda.cudadrv.devicearray import (
+    DeviceNDArray,
+)
+
+# NDarray device helper
+
+require_context = devices.require_context
+current_context = devices.get_context
+gpus = devices.gpus
+
+
+@require_context
+def external_stream(ptr):
+    """Create a Numba stream object for a stream allocated outside Numba.
+
+    :param ptr: Pointer to the external stream to wrap in a Numba Stream
+    :type ptr: int
+    """
+    return current_context().create_external_stream(ptr)
+
+
+def _from_cuda_array_interface(desc, owner=None, sync=True):
+    """Create a _DeviceNDArray from a cuda-array-interface description.
+    The ``owner`` is the owner of the underlying memory.
+    The resulting _DeviceNDArray will acquire a reference from it.
+
+    If ``sync`` is ``True``, then the imported stream (if present) will be
+    synchronized.
+    """
+    version = desc.get("version")
+    # Mask introduced in version 1
+    if 1 <= version:
+        mask = desc.get("mask")
+        # Would ideally be better to detect if the mask is all valid
+        if mask is not None:
+            raise NotImplementedError("Masked arrays are not supported")
+
+    shape = desc["shape"]
+    strides = desc.get("strides")
+
+    shape, strides, dtype = prepare_shape_strides_dtype(
+        shape, strides, desc["typestr"], order="C"
+    )
+    size = driver.memory_size_from_info(shape, strides, dtype.itemsize)
+
+    cudevptr_class = driver.binding.CUdeviceptr
+    devptr = cudevptr_class(desc["data"][0])
+    data = driver.MemoryPointer(
+        current_context(), devptr, size=size, owner=owner
+    )
+    stream_ptr = desc.get("stream", None)
+    if stream_ptr is not None:
+        stream = external_stream(stream_ptr)
+        if sync and config.CUDA_ARRAY_INTERFACE_SYNC:
+            stream.synchronize()
+    else:
+        stream = 0  # No "Numba default stream", not the CUDA default stream
+    da = devicearray.DeviceNDArray._create_nowarn(
+        shape=shape, strides=strides, dtype=dtype, gpu_data=data, stream=stream
+    )
+    return da
+
+
+def _as_cuda_array(obj, sync=True):
+    """Create a _DeviceNDArray from any object that implements
+    the :ref:`cuda array interface <cuda-array-interface>`.
+
+    A view of the underlying GPU buffer is created.  No copying of the data
+    is done.  The resulting _DeviceNDArray will acquire a reference from `obj`.
+
+    If ``sync`` is ``True``, then the imported stream (if present) will be
+    synchronized.
+    """
+    if (
+        interface := getattr(obj, "__cuda_array_interface__", None)
+    ) is not None:
+        return _from_cuda_array_interface(interface, owner=obj, sync=sync)
+    raise TypeError("*obj* doesn't implement the cuda array interface.")
+
+
+def _is_cuda_array(obj):
+    """Test if the object has defined the `__cuda_array_interface__` attribute.
+
+    Does not verify the validity of the interface.
+    """
+    return hasattr(obj, "__cuda_array_interface__")
+
+
+@require_context
+def _to_device(obj, stream=0, copy=True, to=None):
+    """to_device(obj, stream=0, copy=True, to=None)
+
+    Allocate and transfer a numpy ndarray or structured scalar to the device.
+
+    To copy host->device a numpy array::
+
+        ary = np.arange(10)
+        d_ary = cuda.to_device(ary)
+
+    To enqueue the transfer to a stream::
+
+        stream = cuda.stream()
+        d_ary = cuda.to_device(ary, stream=stream)
+
+    The resulting ``d_ary`` is a ``DeviceNDArray``.
+
+    To copy device->host::
+
+        hary = d_ary.copy_to_host()
+
+    To copy device->host to an existing array::
+
+        ary = np.empty(shape=d_ary.shape, dtype=d_ary.dtype)
+        d_ary.copy_to_host(ary)
+
+    To enqueue the transfer to a stream::
+
+        hary = d_ary.copy_to_host(stream=stream)
+    """
+    if to is None:
+        to, new = devicearray.auto_device(
+            obj, stream=stream, copy=copy, user_explicit=True
+        )
+        return to
+    if copy:
+        to.copy_to_device(obj, stream=stream)
+    return to
+
+
+@require_context
+def _device_array(shape, dtype=np.float64, strides=None, order="C", stream=0):
+    """device_array(shape, dtype=np.float64, strides=None, order='C', stream=0)
+
+    Allocate an empty device ndarray. Similar to :meth:`numpy.empty`.
+    """
+    shape, strides, dtype = prepare_shape_strides_dtype(
+        shape, strides, dtype, order
+    )
+    return DeviceNDArray._create_nowarn(
+        shape=shape, strides=strides, dtype=dtype, stream=stream
+    )
+
+
+@require_context
+def _managed_array(
+    shape,
+    dtype=np.float64,
+    strides=None,
+    order="C",
+    stream=0,
+    attach_global=True,
+):
+    """managed_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
+                     attach_global=True)
+
+    Allocate a np.ndarray with a buffer that is managed.
+    Similar to np.empty().
+
+    Managed memory is supported on Linux / x86 and PowerPC, and is considered
+    experimental on Windows and Linux / AArch64.
+
+    :param attach_global: A flag indicating whether to attach globally. Global
+                          attachment implies that the memory is accessible from
+                          any stream on any device. If ``False``, attachment is
+                          *host*, and memory is only accessible by devices
+                          with Compute Capability 6.0 and later.
+    """
+    shape, strides, dtype = prepare_shape_strides_dtype(
+        shape, strides, dtype, order
+    )
+    bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
+    buffer = current_context().memallocmanaged(
+        bytesize, attach_global=attach_global
+    )
+    npary = np.ndarray(
+        shape=shape, strides=strides, dtype=dtype, order=order, buffer=buffer
+    )
+    managedview = np.ndarray.view(npary, type=devicearray.ManagedNDArray)
+    managedview.device_setup(buffer, stream=stream)
+    return managedview
+
+
+@require_context
+def _pinned_array(shape, dtype=np.float64, strides=None, order="C"):
+    """pinned_array(shape, dtype=np.float64, strides=None, order='C')
+
+    Allocate an :class:`ndarray <numpy.ndarray>` with a buffer that is pinned
+    (pagelocked).  Similar to :func:`np.empty() <numpy.empty>`.
+    """
+    shape, strides, dtype = prepare_shape_strides_dtype(
+        shape, strides, dtype, order
+    )
+    bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
+    buffer = current_context().memhostalloc(bytesize)
+    return np.ndarray(
+        shape=shape, strides=strides, dtype=dtype, order=order, buffer=buffer
+    )
+
+
+@require_context
+def _mapped_array(
+    shape,
+    dtype=np.float64,
+    strides=None,
+    order="C",
+    stream=0,
+    portable=False,
+    wc=False,
+):
+    """mapped_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
+                    portable=False, wc=False)
+
+    Allocate a mapped ndarray with a buffer that is pinned and mapped on
+    to the device. Similar to np.empty()
+
+    :param portable: a boolean flag to allow the allocated device memory to be
+              usable in multiple devices.
+    :param wc: a boolean flag to enable writecombined allocation which is faster
+        to write by the host and to read by the device, but slower to
+        write by the host and slower to write by the device.
+    """
+    shape, strides, dtype = prepare_shape_strides_dtype(
+        shape, strides, dtype, order
+    )
+    bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
+    buffer = current_context().memhostalloc(bytesize, mapped=True)
+    npary = np.ndarray(
+        shape=shape, strides=strides, dtype=dtype, order=order, buffer=buffer
+    )
+    mappedview = np.ndarray.view(npary, type=devicearray.MappedNDArray)
+    mappedview.device_setup(buffer, stream=stream)
+    return mappedview
+
+
+@contextlib.contextmanager
+@require_context
+def _open_ipc_array(handle, shape, dtype, strides=None, offset=0):
+    """
+    A context manager that opens a IPC *handle* (*CUipcMemHandle*) that is
+    represented as a sequence of bytes (e.g. *bytes*, tuple of int)
+    and represent it as an array of the given *shape*, *strides* and *dtype*.
+    The *strides* can be omitted.  In that case, it is assumed to be a 1D
+    C contiguous array.
+
+    Yields a device array.
+
+    The IPC handle is closed automatically when context manager exits.
+    """
+    dtype = np.dtype(dtype)
+    # compute size
+    size = np.prod(shape) * dtype.itemsize
+    # manually recreate the IPC mem handle
+    driver_handle = driver.binding.CUipcMemHandle()
+    driver_handle.reserved = handle
+    # use *IpcHandle* to open the IPC memory
+    ipchandle = driver.IpcHandle(None, driver_handle, size, offset=offset)
+    yield ipchandle.open_array(
+        current_context(), shape=shape, strides=strides, dtype=dtype
+    )
+    ipchandle.close()
+
+
+def _contiguous_strides_like_array(ary):
+    """
+    Given an array, compute strides for a new contiguous array of the same
+    shape.
+    """
+    # Don't recompute strides if the default strides will be sufficient to
+    # create a contiguous array.
+    if ary.flags["C_CONTIGUOUS"] or ary.flags["F_CONTIGUOUS"] or ary.ndim <= 1:
+        return None
+
+    # Otherwise, we need to compute new strides using an algorithm adapted from
+    # NumPy v1.17.4's PyArray_NewLikeArrayWithShape in
+    # core/src/multiarray/ctors.c. We permute the strides in ascending order
+    # then compute the stride for the dimensions with the same permutation.
+
+    # Stride permutation. E.g. a stride array (4, -2, 12) becomes
+    # [(1, -2), (0, 4), (2, 12)]
+    strideperm = [x for x in enumerate(ary.strides)]
+    strideperm.sort(key=lambda x: x[1])
+
+    # Compute new strides using permutation
+    strides = [0] * len(ary.strides)
+    stride = ary.dtype.itemsize
+    for i_perm, _ in strideperm:
+        strides[i_perm] = stride
+        stride *= ary.shape[i_perm]
+    return tuple(strides)
+
+
+def _order_like_array(ary):
+    if ary.flags["F_CONTIGUOUS"] and not ary.flags["C_CONTIGUOUS"]:
+        return "F"
+    else:
+        return "C"
+
+
+def _device_array_like(ary, stream=0):
+    """
+    Call :func:`device_array() <numba.cuda.device_array>` with information from
+    the array.
+    """
+    strides = _contiguous_strides_like_array(ary)
+    order = _order_like_array(ary)
+    return _device_array(
+        shape=ary.shape,
+        dtype=ary.dtype,
+        strides=strides,
+        order=order,
+        stream=stream,
+    )
+
+
+def _mapped_array_like(ary, stream=0, portable=False, wc=False):
+    """
+    Call :func:`mapped_array() <numba.cuda.mapped_array>` with the information
+    from the array.
+    """
+    strides = _contiguous_strides_like_array(ary)
+    order = _order_like_array(ary)
+    return _mapped_array(
+        shape=ary.shape,
+        dtype=ary.dtype,
+        strides=strides,
+        order=order,
+        stream=stream,
+        portable=portable,
+        wc=wc,
+    )
+
+
+def _pinned_array_like(ary):
+    """
+    Call :func:`pinned_array() <numba.cuda.pinned_array>` with the information
+    from the array.
+    """
+    strides = _contiguous_strides_like_array(ary)
+    order = _order_like_array(ary)
+    return _pinned_array(
+        shape=ary.shape, dtype=ary.dtype, strides=strides, order=order
+    )
diff --git a/numba_cuda/numba/cuda/api.py b/numba_cuda/numba/cuda/api.py
index 81c338806..b105e94f7 100644
--- a/numba_cuda/numba/cuda/api.py
+++ b/numba_cuda/numba/cuda/api.py
@@ -9,10 +9,11 @@
 import os
 
 import numpy as np
-
+import warnings
 from .cudadrv import devicearray, devices, driver
-from numba.cuda.core import config
 from numba.cuda.api_util import prepare_shape_strides_dtype
+from numba.cuda.cudadrv.devicearray import DeprecatedDeviceArrayApiWarning
+from . import _api
 
 # NDarray device helper
 
@@ -30,38 +31,12 @@ def from_cuda_array_interface(desc, owner=None, sync=True):
     If ``sync`` is ``True``, then the imported stream (if present) will be
     synchronized.
     """
-    version = desc.get("version")
-    # Mask introduced in version 1
-    if 1 <= version:
-        mask = desc.get("mask")
-        # Would ideally be better to detect if the mask is all valid
-        if mask is not None:
-            raise NotImplementedError("Masked arrays are not supported")
-
-    shape = desc["shape"]
-    strides = desc.get("strides")
-
-    shape, strides, dtype = prepare_shape_strides_dtype(
-        shape, strides, desc["typestr"], order="C"
+    warnings.warn(
+        "Constructing DeviceNDArray objects via the __cuda_array_interface__ "
+        "is now deprecated. Please prefer cupy for constructing device arrays.",
+        DeprecatedDeviceArrayApiWarning,
     )
-    size = driver.memory_size_from_info(shape, strides, dtype.itemsize)
-
-    cudevptr_class = driver.binding.CUdeviceptr
-    devptr = cudevptr_class(desc["data"][0])
-    data = driver.MemoryPointer(
-        current_context(), devptr, size=size, owner=owner
-    )
-    stream_ptr = desc.get("stream", None)
-    if stream_ptr is not None:
-        stream = external_stream(stream_ptr)
-        if sync and config.CUDA_ARRAY_INTERFACE_SYNC:
-            stream.synchronize()
-    else:
-        stream = 0  # No "Numba default stream", not the CUDA default stream
-    da = devicearray.DeviceNDArray(
-        shape=shape, strides=strides, dtype=dtype, gpu_data=data, stream=stream
-    )
-    return da
+    return _api._from_cuda_array_interface(desc, owner=owner, sync=sync)
 
 
 def as_cuda_array(obj, sync=True):
@@ -74,11 +49,12 @@ def as_cuda_array(obj, sync=True):
     If ``sync`` is ``True``, then the imported stream (if present) will be
     synchronized.
     """
-    if (
-        interface := getattr(obj, "__cuda_array_interface__", None)
-    ) is not None:
-        return from_cuda_array_interface(interface, owner=obj, sync=sync)
-    raise TypeError("*obj* doesn't implement the cuda array interface.")
+    warnings.warn(
+        "Constructing DeviceNDArray objects via as_cuda_array is now deprecated. "
+        "Please prefer cupy for constructing device arrays.",
+        DeprecatedDeviceArrayApiWarning,
+    )
+    return _api._as_cuda_array(obj, sync=sync)
 
 
 def is_cuda_array(obj):
@@ -86,7 +62,11 @@ def is_cuda_array(obj):
 
     Does not verify the validity of the interface.
     """
-    return hasattr(obj, "__cuda_array_interface__")
+    warnings.warn(
+        "is_cuda_array is deprecated. Please prefer cupy for device array operations.",
+        DeprecatedDeviceArrayApiWarning,
+    )
+    return _api._is_cuda_array(obj)
 
 
 def is_float16_supported():
@@ -136,6 +116,10 @@ def to_device(obj, stream=0, copy=True, to=None):
 
         hary = d_ary.copy_to_host(stream=stream)
     """
+    warnings.warn(
+        "to_device is deprecated. Please prefer cupy for moving numpy arrays to the device.",
+        DeprecatedDeviceArrayApiWarning,
+    )
     if to is None:
         to, new = devicearray.auto_device(
             obj, stream=stream, copy=copy, user_explicit=True
@@ -152,10 +136,20 @@ def device_array(shape, dtype=np.float64, strides=None, order="C", stream=0):
 
     Allocate an empty device ndarray. Similar to :meth:`numpy.empty`.
     """
+    warnings.warn(
+        "device_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
+        DeprecatedDeviceArrayApiWarning,
+    )
+    return _device_array(
+        shape, dtype=dtype, strides=strides, order=order, stream=stream
+    )
+
+
+def _device_array(shape, dtype=np.float64, strides=None, order="C", stream=0):
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
     )
-    return devicearray.DeviceNDArray(
+    return devicearray.DeviceNDArray._create_nowarn(
         shape=shape, strides=strides, dtype=dtype, stream=stream
     )
 
@@ -184,6 +178,10 @@ def managed_array(
                           *host*, and memory is only accessible by devices
                           with Compute Capability 6.0 and later.
     """
+    warnings.warn(
+        "managed_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
+        DeprecatedDeviceArrayApiWarning,
+    )
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
     )
@@ -206,6 +204,10 @@ def pinned_array(shape, dtype=np.float64, strides=None, order="C"):
     Allocate an :class:`ndarray <numpy.ndarray>` with a buffer that is pinned
     (pagelocked).  Similar to :func:`np.empty() <numpy.empty>`.
     """
+    warnings.warn(
+        "pinned_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
+        DeprecatedDeviceArrayApiWarning,
+    )
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
     )
@@ -238,6 +240,10 @@ def mapped_array(
         to write by the host and to read by the device, but slower to
         write by the host and slower to write by the device.
     """
+    warnings.warn(
+        "mapped_array is deprecated. Please prefer cupy for moving numpy arrays to the device.",
+        DeprecatedDeviceArrayApiWarning,
+    )
     shape, strides, dtype = prepare_shape_strides_dtype(
         shape, strides, dtype, order
     )
@@ -410,7 +416,7 @@ def external_stream(ptr):
     :param ptr: Pointer to the external stream to wrap in a Numba Stream
     :type ptr: int
     """
-    return current_context().create_external_stream(ptr)
+    return _api.external_stream(ptr)
 
 
 # Page lock
@@ -446,7 +452,7 @@ def mapped(*arylist, **kws):
             mapped=True,
         )
         pmlist.append(pm)
-        devary = devicearray.from_array_like(ary, gpu_data=pm, stream=stream)
+        devary = devicearray._from_array_like(ary, gpu_data=pm, stream=stream)
         devarylist.append(devary)
     try:
         if len(devarylist) == 1:
diff --git a/numba_cuda/numba/cuda/args.py b/numba_cuda/numba/cuda/args.py
index f350613aa..20e22b9d5 100644
--- a/numba_cuda/numba/cuda/args.py
+++ b/numba_cuda/numba/cuda/args.py
@@ -22,7 +22,7 @@ def to_device(self, retr, stream=0):
         :param retr:
             a list of clean-up work to do after the kernel's been run.
             Append 0-arg lambdas to it!
-        :return: a value (usually an `DeviceNDArray`) to be passed to
+        :return: a value (usually an `_DeviceNDArray`) to be passed to
             the kernel
         """
 
diff --git a/numba_cuda/numba/cuda/cudadrv/devicearray.py b/numba_cuda/numba/cuda/cudadrv/devicearray.py
index 7a313f4e0..153c0f94c 100644
--- a/numba_cuda/numba/cuda/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/cudadrv/devicearray.py
@@ -12,6 +12,7 @@
 import operator
 import copy
 from ctypes import c_void_p
+import warnings
 
 import numpy as np
 
@@ -30,6 +31,22 @@
 from warnings import warn
 
 
+class DeprecatedDeviceArrayApiWarning(FutureWarning):
+    pass
+
+
+def deprecated_array_api(func):
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        warnings.warn(
+            f"{func.__name__} api is deprecated. Please prefer cupy for array functions",
+            DeprecatedDeviceArrayApiWarning,
+        )
+        return func(*args, **kwargs)
+
+    return wrapper
+
+
 def is_cuda_ndarray(obj):
     "Check if an object is a CUDA ndarray"
     return getattr(obj, "__cuda_ndarray__", False)
@@ -290,11 +307,15 @@ def copy_to_host(self, ary=None, stream=0):
                 )
         return hostary
 
+    @deprecated_array_api
     def split(self, section, stream=0):
         """Split the array into equal partition of the `section` size.
         If the array cannot be equally divided, the last section will be
         smaller.
         """
+        return self._split(section, stream)
+
+    def _split(self, section, stream=0):
         stream = self._default_stream(stream)
         if self.ndim != 1:
             raise ValueError("only support 1d array")
@@ -308,7 +329,7 @@ def split(self, section, stream=0):
             end = min(begin + section, self.size)
             shape = (end - begin,)
             gpu_data = self.gpu_data.view(begin * itemsize, end * itemsize)
-            yield DeviceNDArray(
+            yield DeviceNDArray._create_nowarn(
                 shape,
                 strides,
                 dtype=self.dtype,
@@ -320,6 +341,7 @@ def as_cuda_arg(self):
         """Returns a device memory object that is used as the argument."""
         return self.gpu_data
 
+    @deprecated_array_api
     def get_ipc_handle(self):
         """
         Returns a *IpcArrayHandle* object that is safe to serialize and transfer
@@ -331,6 +353,7 @@ def get_ipc_handle(self):
         desc = dict(shape=self.shape, strides=self.strides, dtype=self.dtype)
         return IpcArrayHandle(ipc_handle=ipch, array_desc=desc)
 
+    @deprecated_array_api
     def squeeze(self, axis=None, stream=0):
         """
         Remove axes of size one from the array shape.
@@ -350,8 +373,11 @@ def squeeze(self, axis=None, stream=0):
             Squeezed view into the array.
 
         """
+        return self._squeeze(axis=axis, stream=stream)
+
+    def _squeeze(self, axis=None, stream=0):
         new_dummy, _ = self._dummy.squeeze(axis=axis)
-        return DeviceNDArray(
+        return DeviceNDArray._create_nowarn(
             shape=new_dummy.shape,
             strides=new_dummy.strides,
             dtype=self.dtype,
@@ -359,6 +385,7 @@ def squeeze(self, axis=None, stream=0):
             gpu_data=self.gpu_data,
         )
 
+    @deprecated_array_api
     def view(self, dtype):
         """Returns a new object by reinterpretting the dtype without making a
         copy of the data.
@@ -387,7 +414,7 @@ def view(self, dtype):
 
             strides[-1] = dtype.itemsize
 
-        return DeviceNDArray(
+        return DeviceNDArray._create_nowarn(
             shape=shape,
             strides=strides,
             dtype=dtype,
@@ -476,7 +503,7 @@ def _do_getitem(self, item, stream=0):
             shape, strides, dtype = prepare_shape_strides_dtype(
                 typ.shape, None, typ.subdtype[0], "C"
             )
-            return DeviceNDArray(
+            return DeviceNDArray._create_nowarn(
                 shape=shape,
                 strides=strides,
                 dtype=dtype,
@@ -572,6 +599,20 @@ class DeviceNDArray(DeviceNDArrayBase):
     An on-GPU array type
     """
 
+    def __init__(self, *args, **kwargs):
+        warnings.warn(
+            "DeviceNDArray is deprecated. Please prefer cupy for array operations.",
+            DeprecatedDeviceArrayApiWarning,
+        )
+        super().__init__(*args, **kwargs)
+
+    @classmethod
+    def _create_nowarn(cls, *args, **kwargs):
+        """Create a DeviceNDArray without the deprecation warning."""
+        instance = cls.__new__(cls)
+        DeviceNDArrayBase.__init__(instance, *args, **kwargs)
+        return instance
+
     def is_f_contiguous(self):
         """
         Return true if the array is Fortran-contiguous.
@@ -622,7 +663,7 @@ def reshape(self, *newshape, **kws):
         cls = type(self)
         if newshape == self.shape:
             # nothing to do
-            return cls(
+            return cls._create_nowarn(
                 shape=self.shape,
                 strides=self.strides,
                 dtype=self.dtype,
@@ -632,7 +673,7 @@ def reshape(self, *newshape, **kws):
         newarr, extents = self._dummy.reshape(*newshape, **kws)
 
         if extents == [self._dummy.extent]:
-            return cls(
+            return cls._create_nowarn(
                 shape=newarr.shape,
                 strides=newarr.strides,
                 dtype=self.dtype,
@@ -652,7 +693,7 @@ def ravel(self, order="C", stream=0):
         newarr, extents = self._dummy.ravel(order=order)
 
         if extents == [self._dummy.extent]:
-            return cls(
+            return cls._create_nowarn(
                 shape=newarr.shape,
                 strides=newarr.strides,
                 dtype=self.dtype,
@@ -698,7 +739,7 @@ def _do_getitem(self, item, stream=0):
                     )
                 return hostary[0]
             else:
-                return cls(
+                return cls._create_nowarn(
                     shape=arr.shape,
                     strides=arr.strides,
                     dtype=self.dtype,
@@ -707,7 +748,7 @@ def _do_getitem(self, item, stream=0):
                 )
         else:
             newdata = self.gpu_data.view(*arr.extent)
-            return cls(
+            return cls._create_nowarn(
                 shape=arr.shape,
                 strides=arr.strides,
                 dtype=self.dtype,
@@ -748,7 +789,7 @@ def _do_setitem(self, key, value, stream=0):
             shape = arr.shape
             strides = arr.strides
 
-        lhs = type(self)(
+        lhs = type(self)._create_nowarn(
             shape=shape,
             strides=strides,
             dtype=self.dtype,
@@ -806,13 +847,14 @@ def __init__(self, ipc_handle, array_desc):
         self._array_desc = array_desc
         self._ipc_handle = ipc_handle
 
+    @deprecated_array_api
     def open(self):
         """
         Returns a new *DeviceNDArray* that shares the allocation from the
         original process.  Must not be used on the original process.
         """
         dptr = self._ipc_handle.open(devices.get_context())
-        return DeviceNDArray(gpu_data=dptr, **self._array_desc)
+        return DeviceNDArray._create_nowarn(gpu_data=dptr, **self._array_desc)
 
     def close(self):
         """
@@ -849,7 +891,16 @@ def device_setup(self, gpu_data, stream=0):
 
 def from_array_like(ary, stream=0, gpu_data=None):
     "Create a DeviceNDArray object that is like ary."
-    return DeviceNDArray(
+
+    warnings.warn(
+        "from_array_like is deprecated. Please prefer cupy for array functions",
+        DeprecatedDeviceArrayApiWarning,
+    )
+    return _from_array_like(ary, stream=stream, gpu_data=gpu_data)
+
+
+def _from_array_like(ary, stream=0, gpu_data=None):
+    return DeviceNDArray._create_nowarn(
         ary.shape, ary.strides, ary.dtype, stream=stream, gpu_data=gpu_data
     )
 
@@ -918,9 +969,9 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
     elif (
         interface := getattr(obj, "__cuda_array_interface__", None)
     ) is not None:
-        from numba.cuda.api import from_cuda_array_interface
+        from numba.cuda._api import _from_cuda_array_interface
 
-        return from_cuda_array_interface(interface, owner=obj), False
+        return _from_cuda_array_interface(interface, owner=obj), False
     else:
         if isinstance(obj, np.void):
             devobj = from_record_like(obj, stream=stream)
@@ -934,7 +985,7 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
                 obj, copy=False if numpy_version < (2, 0) else None, subok=True
             )
             sentry_contiguous(obj)
-            devobj = from_array_like(obj, stream=stream)
+            devobj = _from_array_like(obj, stream=stream)
         if copy:
             if (
                 config.CUDA_WARN_ON_IMPLICIT_COPY
@@ -1094,17 +1145,23 @@ def _to_strided_memory_view(
 
 
 def check_array_compatibility(ary1, ary2):
-    ary1sq, ary2sq = ary1.squeeze(), ary2.squeeze()
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore", category=DeprecatedDeviceArrayApiWarning
+        )
+        ary1sq = ary1.squeeze()
+        ary2sq = ary2.squeeze()
+
     if ary1.dtype != ary2.dtype:
         raise TypeError(
             "incompatible dtype: %s vs. %s" % (ary1.dtype, ary2.dtype)
         )
+
     if ary1sq.shape != ary2sq.shape:
         raise ValueError(
             "incompatible shape: %s vs. %s" % (ary1.shape, ary2.shape)
         )
-    # We check strides only if the size is nonzero, because strides are
-    # irrelevant (and can differ) for zero-length copies.
+
     if ary1.size and ary1sq.strides != ary2sq.strides:
         raise ValueError(
             "incompatible strides: %s vs. %s" % (ary1.strides, ary2.strides)
diff --git a/numba_cuda/numba/cuda/cudadrv/driver.py b/numba_cuda/numba/cuda/cudadrv/driver.py
index cc84a385e..4fe41fec5 100644
--- a/numba_cuda/numba/cuda/cudadrv/driver.py
+++ b/numba_cuda/numba/cuda/cudadrv/driver.py
@@ -1598,7 +1598,7 @@ def open_array(self, context, shape, dtype, strides=None):
             strides = dtype.itemsize
         dptr = self.open(context)
         # read the device pointer as an array
-        return devicearray.DeviceNDArray(
+        return devicearray.DeviceNDArray._create_nowarn(
             shape=shape, strides=strides, dtype=dtype, gpu_data=dptr
         )
 
diff --git a/numba_cuda/numba/cuda/deviceufunc.py b/numba_cuda/numba/cuda/deviceufunc.py
index f69093ee0..171174303 100644
--- a/numba_cuda/numba/cuda/deviceufunc.py
+++ b/numba_cuda/numba/cuda/deviceufunc.py
@@ -369,7 +369,6 @@ def attempt_ravel(a):
         if out is None:
             # No output is provided
             devout = cr.allocate_device_array(shape, resty, stream=stream)
-
             devarys.extend([devout])
             cr.launch(func, shape[0], stream, devarys)
 
diff --git a/numba_cuda/numba/cuda/kernels/reduction.py b/numba_cuda/numba/cuda/kernels/reduction.py
index da0a555eb..908b898c5 100644
--- a/numba_cuda/numba/cuda/kernels/reduction.py
+++ b/numba_cuda/numba/cuda/kernels/reduction.py
@@ -239,7 +239,7 @@ def __call__(self, arr, size=None, res=None, init=0, stream=0):
         partials_size = full_blockct
         if size_partial:
             partials_size += 1
-        partials = cuda.device_array(shape=partials_size, dtype=arr.dtype)
+        partials = cuda._api._device_array(shape=partials_size, dtype=arr.dtype)
 
         if size_full:
             # kernel for the fully populated threadblocks
@@ -259,7 +259,9 @@ def __call__(self, arr, size=None, res=None, init=0, stream=0):
 
         # handle return value
         if res is not None:
-            res[:1].copy_to_device(partials[:1], stream=stream)
+            cuda._api._from_cuda_array_interface(res.__cuda_array_interface__)[
+                :1
+            ].copy_to_device(partials[:1], stream=stream)
             return
         else:
             return partials[0]
diff --git a/numba_cuda/numba/cuda/kernels/transpose.py b/numba_cuda/numba/cuda/kernels/transpose.py
index 01e2670b0..e23820547 100644
--- a/numba_cuda/numba/cuda/kernels/transpose.py
+++ b/numba_cuda/numba/cuda/kernels/transpose.py
@@ -5,6 +5,8 @@
 from numba.cuda.cudadrv.driver import driver
 import math
 from numba.cuda.np import numpy_support as nps
+from numba.cuda.cudadrv.devicearray import DeprecatedDeviceArrayApiWarning
+import warnings
 
 
 def transpose(a, b=None):
@@ -19,14 +21,22 @@ def transpose(a, b=None):
         the device its stream will be used to perform the transpose (and to copy
         `b` to the device if necessary).
     """
+    warnings.warn(
+        "The DeviceNDArray API for transposing device arrays is deprecated. "
+        "Please prefer cupy for device array operations.",
+        DeprecatedDeviceArrayApiWarning,
+    )
+    return _transpose(a, b=b)
 
+
+def _transpose(a, b=None):
     # prefer `a`'s stream if
     stream = getattr(a, "stream", 0)
 
     if not b:
         cols, rows = a.shape
         strides = a.dtype.itemsize * cols, a.dtype.itemsize
-        b = cuda.cudadrv.devicearray.DeviceNDArray(
+        b = cuda.cudadrv.devicearray.DeviceNDArray._create_nowarn(
             (rows, cols), strides, dtype=a.dtype, stream=stream
         )
 
diff --git a/numba_cuda/numba/cuda/memory_management/nrt.py b/numba_cuda/numba/cuda/memory_management/nrt.py
index dc1ac13fb..3b7583bc1 100644
--- a/numba_cuda/numba/cuda/memory_management/nrt.py
+++ b/numba_cuda/numba/cuda/memory_management/nrt.py
@@ -152,7 +152,7 @@ def allocate(self, stream=None):
         """
         Allocate memsys on global memory
         """
-        from numba.cuda import device_array
+        from numba.cuda._api import _device_array
 
         # Check if memsys module is defined
         if self._memsys_module is None:
@@ -167,7 +167,7 @@ def allocate(self, stream=None):
         driver.cuMemcpyDtoH(
             ctypes.addressof(memsys_size), device_memsys_size, nbytes
         )
-        self._memsys = device_array(
+        self._memsys = _device_array(
             (memsys_size.value,), dtype="i1", stream=stream
         )
         self.set_memsys_to_module(self._memsys_module, stream=stream)
@@ -237,7 +237,7 @@ def memsys_stats_enabled(self, stream=None):
         Return a boolean indicating whether memsys is enabled. Synchronizes
         context
         """
-        enabled_ar = cuda.managed_array(1, np.uint8)
+        enabled_ar = cuda._api._managed_array(1, np.uint8)
         enabled_ptr = enabled_ar.device_ctypes_pointer
 
         self._single_thread_launch(
@@ -264,7 +264,7 @@ def _copy_memsys_to_host(self, stream):
             ]
         )
 
-        stats_for_read = cuda.managed_array(1, dt)
+        stats_for_read = cuda._api._managed_array(1, dt)
         stats_ptr = stats_for_read.device_ctypes_pointer
 
         self._single_thread_launch(
@@ -295,7 +295,7 @@ def _get_single_stat(self, stat, stream=None):
         """
         Get a single stat from the memsys
         """
-        got = cuda.managed_array(1, np.uint64)
+        got = cuda._api._managed_array(1, np.uint64)
         got_ptr = got.device_ctypes_pointer
 
         self._single_thread_launch(
diff --git a/numba_cuda/numba/cuda/random.py b/numba_cuda/numba/cuda/random.py
index e8e620d5c..d51df6106 100644
--- a/numba_cuda/numba/cuda/random.py
+++ b/numba_cuda/numba/cuda/random.py
@@ -3,7 +3,6 @@
 
 import math
 
-from numba import cuda
 from numba.cuda import (
     float32,
     float64,
@@ -14,6 +13,7 @@
 )
 from numba.cuda.np.numpy_support import from_dtype
 from numba.cuda import config
+from numba.cuda import _api
 
 if HAS_NUMBA:
     from numba import jit
@@ -303,6 +303,6 @@ def create_xoroshiro128p_states(n, seed, subsequence_start=0, stream=0):
     :type stream: CUDA stream
     :param stream: stream to run initialization kernel on
     """
-    states = cuda.device_array(n, dtype=xoroshiro128p_dtype, stream=stream)
+    states = _api._device_array(n, dtype=xoroshiro128p_dtype, stream=stream)
     init_xoroshiro128p_states(states, seed, subsequence_start, stream)
     return states
diff --git a/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py b/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
index 76c3696a1..fcbde51f8 100644
--- a/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
+++ b/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py
@@ -10,6 +10,7 @@
 from numba.cuda.np.numpy_support import numpy_version
 from numba.cuda.np import numpy_support
 from numba.cuda import types
+import warnings
 
 import numpy as np
 
@@ -111,6 +112,29 @@ def __init__(self, ary, stream=0):
         self._ary = ary
         self.stream = stream
 
+    @classmethod
+    def _create_nowarn(cls, shape, strides, dtype, stream=0, gpu_data=None):
+        """Create a FakeCUDAArray without the deprecation warning.
+
+        This matches the signature of DeviceNDArrayBase.__init__() but
+        creates a numpy array from the parameters since FakeCUDAArray
+        wraps numpy arrays.
+        """
+        if isinstance(shape, int):
+            shape = (shape,)
+        else:
+            shape = tuple(shape)
+
+        # Create dtype
+        dtype = np.dtype(dtype)
+
+        # For the simulator, we create a simple numpy array with the given shape
+        # and dtype. The strides parameter is typically standard C-order strides,
+        # so numpy's default behavior should work fine for most cases.
+        ary = np.empty(shape, dtype=dtype)
+
+        return FakeCUDAArray(ary, stream=stream)
+
     @property
     def _numba_type_(self):
         """
@@ -272,6 +296,13 @@ def split(self, section, stream=0):
         ]
 
 
+DeviceNDArray = FakeCUDAArray
+
+
+class DeprecatedDeviceArrayApiWarning(Warning):
+    pass
+
+
 def array_core(ary):
     """
     Extract the repeated core of a broadcast array.
diff --git a/numba_cuda/numba/cuda/testing.py b/numba_cuda/numba/cuda/testing.py
index d9a295059..972c7f530 100644
--- a/numba_cuda/numba/cuda/testing.py
+++ b/numba_cuda/numba/cuda/testing.py
@@ -13,12 +13,13 @@
 from numba.cuda import config
 from numba.cuda.tests.support import TestCase
 from pathlib import Path
-
+import warnings
 from typing import Iterable, Union
 from io import StringIO
 import unittest
 import numpy as np
 from numba.cuda import HAS_NUMBA
+from numba.cuda.cudadrv.devicearray import DeprecatedDeviceArrayApiWarning
 
 if PYVERSION >= (3, 10):
     from filecheck.matcher import Matcher
@@ -185,6 +186,20 @@ def assertFileCheckMatches(
             )
 
 
+class DeprecatedDeviceArrayApiTest(CUDATestCase):
+    def setUp(self):
+        self._warnings_filters = warnings.filters[:]
+
+        warnings.filterwarnings(
+            "ignore", category=DeprecatedDeviceArrayApiWarning
+        )
+        super().setUp()
+
+    def tearDown(self):
+        warnings.filters = self._warnings_filters
+        super().tearDown()
+
+
 def skip_on_cudasim(reason):
     """Skip this test if running on the CUDA simulator"""
     assert isinstance(reason, str)
@@ -202,6 +217,31 @@ def skip_unless_cudasim(reason):
     return unittest.skipUnless(config.ENABLE_CUDASIM, reason)
 
 
+def skip_if_cupy_unavailable(fn):
+    """
+    Skip test if CuPy is not available, unless running in simulator mode.
+
+    When running in simulator mode, the test will execute using NumPy arrays
+    (via 'import numpy as cp' pattern). When not in simulator mode, the test
+    is skipped if CuPy cannot be imported.
+
+    This decorator should be used for tests that:
+    1. Use device arrays via cupy (cp.asarray, cp.zeros, etc.)
+    2. Should still run in simulator mode with numpy arrays
+    3. Should be skipped on hardware when cupy is unavailable
+    """
+    if config.ENABLE_CUDASIM:
+        # In simulator mode, tests use numpy as cp, so don't skip
+        return fn
+
+    try:
+        import cupy
+
+        return fn
+    except ImportError:
+        return unittest.skip("CuPy not available")(fn)
+
+
 def skip_unless_conda_cudatoolkit(reason):
     """Skip test if the CUDA toolkit was not installed by Conda"""
     assert isinstance(reason, str)
diff --git a/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py b/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
index 88c8fff63..6852d01e4 100644
--- a/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
+++ b/numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py
@@ -7,6 +7,7 @@
 import numpy as np
 import pytest
 from pytest import param
+from numba.cuda.testing import DeprecatedDeviceArrayApiWarning
 
 
 pytestmark = pytest.mark.skipif(
@@ -14,12 +15,16 @@
     reason="no reason to run benchmarks in the simulator",
 )
 
+if not config.ENABLE_CUDASIM:
+    with pytest.warns(DeprecatedDeviceArrayApiWarning):
+        devary_arg = cuda.device_array(128, dtype=np.float32)
+
 
 @pytest.mark.parametrize(
     "array_func",
     [
         param(
-            lambda: cuda.device_array(128, dtype=np.float32),
+            lambda: devary_arg,
             id="device_array",
         ),
         param(
@@ -53,10 +58,7 @@ def one_arg(arr1):
     "array_func",
     [
         param(
-            lambda: [
-                cuda.device_array(128, dtype=np.float32)
-                for _ in range(len(string.ascii_lowercase))
-            ],
+            lambda: [devary_arg for _ in range(len(string.ascii_lowercase))],
             id="device_array",
         ),
         param(
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py b/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py
index 9bcb78309..bdf6dda33 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py
@@ -3,10 +3,14 @@
 
 import numpy as np
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    unittest,
+    DeprecatedDeviceArrayApiTest,
+    skip_on_cudasim,
+)
 
 
-class TestArrayAttr(CUDATestCase):
+class TestArrayAttr(DeprecatedDeviceArrayApiTest):
     def test_contigous_2d(self):
         ary = np.arange(10)
         cary = ary.reshape(2, 5)
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py b/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
index f2b171f97..6c083821a 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
@@ -4,9 +4,19 @@
 import numbers
 
 from numba import cuda
+from numba.cuda import config
 from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import skip_if_cupy_unavailable
 from numba.cuda.cudadrv import driver
 
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
+
 
 class TestContextStack(CUDATestCase):
     def setUp(self):
@@ -140,6 +150,7 @@ def test_attached_non_primary(self):
         finally:
             the_driver.cuCtxDestroy(hctx)
 
+    @skip_if_cupy_unavailable
     def test_cudajit_in_attached_primary_context(self):
         def do():
             from numba import cuda
@@ -149,9 +160,9 @@ def foo(a):
                 for i in range(a.size):
                     a[i] = i
 
-            a = cuda.device_array(10)
+            a = cp.empty(10)
             foo[1, 1](a)
-            self.assertEqual(list(a.copy_to_host()), list(range(10)))
+            self.assertEqual(list(a.get()), list(range(10)))
 
         self.test_attached_primary(do)
 
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py
index 93794d34b..55619aa95 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py
@@ -6,11 +6,15 @@
 import numpy as np
 
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    unittest,
+    DeprecatedDeviceArrayApiTest,
+    skip_on_cudasim,
+)
 from unittest.mock import patch
 
 
-class CudaArrayIndexing(CUDATestCase):
+class CudaArrayIndexing(DeprecatedDeviceArrayApiTest):
     def test_index_1d(self):
         arr = np.arange(10)
         darr = cuda.to_device(arr)
@@ -60,7 +64,7 @@ def test_index_3d(self):
             darr[0, 0, z]
 
 
-class CudaArrayStridedSlice(CUDATestCase):
+class CudaArrayStridedSlice(DeprecatedDeviceArrayApiTest):
     def test_strided_index_1d(self):
         arr = np.arange(10)
         darr = cuda.to_device(arr)
@@ -90,7 +94,7 @@ def test_strided_index_3d(self):
                     )
 
 
-class CudaArraySlicing(CUDATestCase):
+class CudaArraySlicing(DeprecatedDeviceArrayApiTest):
     def test_prefix_1d(self):
         arr = np.arange(5)
         darr = cuda.to_device(arr)
@@ -217,7 +221,7 @@ def test_empty_slice_2d(self):
         )
 
 
-class CudaArraySetting(CUDATestCase):
+class CudaArraySetting(DeprecatedDeviceArrayApiTest):
     """
     Most of the slicing logic is tested in the cases above, so these
     tests focus on the setting logic.
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py
index 272090d90..32f405ee1 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py
@@ -3,10 +3,10 @@
 
 import numpy as np
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 
 
-class TestCudaAutoContext(CUDATestCase):
+class TestCudaAutoContext(DeprecatedDeviceArrayApiTest):
     def test_auto_context(self):
         """A problem was revealed by a customer that the use cuda.to_device
         does not create a CUDA context.
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py
index 36f21cc92..0da0ca0f0 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py
@@ -8,7 +8,7 @@
     from_record_like,
     auto_device,
 )
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 from numba.cuda.testing import skip_on_cudasim
 from numba.cuda.np import numpy_support
 from numba import cuda
@@ -33,7 +33,7 @@
 
 
 @skip_on_cudasim("Device Record API unsupported in the simulator")
-class TestCudaDeviceRecord(CUDATestCase):
+class TestCudaDeviceRecord(DeprecatedDeviceArrayApiTest):
     """
     Tests the DeviceRecord class with np.void host types.
     """
@@ -113,12 +113,12 @@ class TestCudaDeviceRecordWithRecord(TestCudaDeviceRecord):
     """
 
     def setUp(self):
-        CUDATestCase.setUp(self)
+        DeprecatedDeviceArrayApiTest.setUp(self)
         self._create_data(np.recarray)
 
 
 @skip_on_cudasim("Structured array attr access not supported in simulator")
-class TestRecordDtypeWithStructArrays(CUDATestCase):
+class TestRecordDtypeWithStructArrays(DeprecatedDeviceArrayApiTest):
     """
     Test operation of device arrays on structured arrays.
     """
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py
index be5aa6796..287f0f1fe 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py
@@ -14,8 +14,14 @@
 )
 
 from numba import cuda
-from numba.cuda.cudadrv import devices, nvrtc
-from numba.cuda.testing import unittest, CUDATestCase, skip_unless_cc_90
+from numba.cuda.cudadrv import devices
+from numba.cuda.testing import (
+    unittest,
+    CUDATestCase,
+    DeprecatedDeviceArrayApiTest,
+)
+from numba.cuda.cudadrv import nvrtc
+from numba.cuda.testing import skip_unless_cc_90
 from numba.cuda.testing import skip_on_cudasim
 from numba.cuda.tests.support import override_config
 from numba.core import types
@@ -76,7 +82,7 @@
 
 
 @skip_on_cudasim("CUDA Driver API unsupported in the simulator")
-class TestCudaDriver(CUDATestCase):
+class TestCudaDriver(DeprecatedDeviceArrayApiTest):
     def setUp(self):
         super().setUp()
         self.assertTrue(len(devices.gpus) > 0)
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py
index bea1363e0..b4f232de8 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py
@@ -5,14 +5,23 @@
 import numpy as np
 from numba.cuda.cudadrv import devicearray
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase
-from numba.cuda.testing import skip_on_cudasim
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
+from numba.cuda.testing import skip_on_cudasim, skip_if_cupy_unavailable
 from numba.cuda.tests.support import IS_NUMPY_2
+from numba.cuda import config
 
 import pytest
 
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
-class TestCudaNDArray(CUDATestCase):
+
+class TestCudaNDArray(DeprecatedDeviceArrayApiTest):
     def test_device_array_interface(self):
         dary = cuda.device_array(shape=100)
         devicearray.verify_cuda_ndarray_interface(dary)
@@ -498,7 +507,7 @@ def test_issue_8477(self):
         dev_array_from_host.copy_to_device(dev_array)
 
 
-class TestArrayMethod(CUDATestCase):
+class TestArrayMethod(DeprecatedDeviceArrayApiTest):
     """Tests of the __array__() method via np.array"""
 
     def test_np_array(self):
@@ -529,7 +538,7 @@ def test_np_array_copy_true(self):
         np.testing.assert_equal(dev_array.copy_to_host(), host_array)
 
 
-class TestRecarray(CUDATestCase):
+class TestRecarray(DeprecatedDeviceArrayApiTest):
     def test_recarray(self):
         # From issue #4111
         a = np.recarray(
@@ -559,7 +568,7 @@ def test(x, out1, out2):
         np.testing.assert_array_equal(expect2, got2)
 
 
-class TestCoreContiguous(CUDATestCase):
+class TestCoreContiguous(DeprecatedDeviceArrayApiTest):
     def _test_against_array_core(self, view):
         self.assertEqual(
             devicearray.is_contiguous(view),
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py b/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py
index 63af86a3f..d85f031e1 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py
@@ -26,10 +26,10 @@ def test_max_pending_count(self):
         self.assertEqual(len(deallocs), 0)
         # deallocate to maximum count
         for i in range(config.CUDA_DEALLOCS_COUNT):
-            cuda.to_device(np.arange(1))
+            cuda._api._to_device(np.arange(1))
             self.assertEqual(len(deallocs), i + 1)
         # one more to trigger .clear()
-        cuda.to_device(np.arange(1))
+        cuda._api._to_device(np.arange(1))
         self.assertEqual(len(deallocs), 0)
 
     @skip_if_external_memmgr("Deallocation specific to Numba memory management")
@@ -55,12 +55,12 @@ def test_max_pending_bytes(self):
 
             # allocate half the max size
             # this will not trigger deallocation
-            cuda.to_device(np.ones(max_pending // 2, dtype=np.int8))
+            cuda._api._to_device(np.ones(max_pending // 2, dtype=np.int8))
             self.assertEqual(len(deallocs), 1)
 
             # allocate another remaining
             # this will not trigger deallocation
-            cuda.to_device(
+            cuda._api._to_device(
                 np.ones(
                     deallocs._max_pending_bytes - deallocs._size, dtype=np.int8
                 )
@@ -68,7 +68,7 @@ def test_max_pending_bytes(self):
             self.assertEqual(len(deallocs), 2)
 
             # another byte to trigger .clear()
-            cuda.to_device(np.ones(1, dtype=np.int8))
+            cuda._api._to_device(np.ones(1, dtype=np.int8))
             self.assertEqual(len(deallocs), 0)
         finally:
             # restore old ratio
@@ -77,12 +77,12 @@ def test_max_pending_bytes(self):
     @skip_if_external_memmgr("Deallocation specific to Numba memory management")
     def test_defer_cleanup(self):
         harr = np.arange(5)
-        darr1 = cuda.to_device(harr)
+        darr1 = cuda._api._to_device(harr)
         deallocs = cuda.current_context().memory_manager.deallocations
         deallocs.clear()
         self.assertEqual(len(deallocs), 0)
         with cuda.defer_cleanup():
-            darr2 = cuda.to_device(harr)
+            darr2 = cuda._api._to_device(harr)
             del darr1
             self.assertEqual(len(deallocs), 1)
             del darr2
@@ -96,13 +96,13 @@ def test_defer_cleanup(self):
     @skip_if_external_memmgr("Deallocation specific to Numba memory management")
     def test_nested_defer_cleanup(self):
         harr = np.arange(5)
-        darr1 = cuda.to_device(harr)
+        darr1 = cuda._api._to_device(harr)
         deallocs = cuda.current_context().memory_manager.deallocations
         deallocs.clear()
         self.assertEqual(len(deallocs), 0)
         with cuda.defer_cleanup():
             with cuda.defer_cleanup():
-                darr2 = cuda.to_device(harr)
+                darr2 = cuda._api._to_device(harr)
                 del darr1
                 self.assertEqual(len(deallocs), 1)
                 del darr2
@@ -118,7 +118,7 @@ def test_nested_defer_cleanup(self):
     @skip_if_external_memmgr("Deallocation specific to Numba memory management")
     def test_exception(self):
         harr = np.arange(5)
-        darr1 = cuda.to_device(harr)
+        darr1 = cuda._api._to_device(harr)
         deallocs = cuda.current_context().memory_manager.deallocations
         deallocs.clear()
         self.assertEqual(len(deallocs), 0)
@@ -128,7 +128,7 @@ class CustomError(Exception):
 
         with self.assertRaises(CustomError):
             with cuda.defer_cleanup():
-                darr2 = cuda.to_device(harr)
+                darr2 = cuda._api._to_device(harr)
                 del darr2
                 self.assertEqual(len(deallocs), 1)
                 deallocs.clear()
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py b/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py
index a41e13dcc..93fa53fa3 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py
@@ -7,7 +7,12 @@
 
 from numba import cuda
 from numba.cuda.core import config
-from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    unittest,
+    DeprecatedDeviceArrayApiTest,
+    skip_on_cudasim,
+    CUDATestCase,
+)
 from numba.cuda.tests.support import linux_only
 
 if not config.ENABLE_CUDASIM:
@@ -103,7 +108,7 @@ def interface_version(self):
 
 
 @skip_on_cudasim("EMM Plugins not supported on CUDA simulator")
-class TestDeviceOnlyEMMPlugin(CUDATestCase):
+class TestDeviceOnlyEMMPlugin(DeprecatedDeviceArrayApiTest):
     """
     Tests that the API of an EMM Plugin that implements device allocations
     only is used correctly by Numba.
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
index 6ebe5cf58..fc63559c0 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py
@@ -3,20 +3,29 @@
 
 import numpy as np
 from numba import cuda
+from numba.cuda import config
 from numba.cuda.testing import unittest, CUDATestCase
 from numba.cuda._compat import Device
-from numba.cuda.testing import skip_on_cudasim
+from numba.cuda.testing import skip_on_cudasim, skip_if_cupy_unavailable
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 
 class TestCudaEvent(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_event_elapsed(self):
         N = 32
-        dary = cuda.device_array(N, dtype=np.double)
         evtstart = cuda.event()
         evtend = cuda.event()
 
         evtstart.record()
-        cuda.to_device(np.arange(N, dtype=np.double), to=dary)
+        dary = cp.array(np.arange(N, dtype=np.double))  # noqa: F841
         evtend.record()
         evtend.wait()
         evtend.synchronize()
@@ -35,13 +44,17 @@ def test_event_elapsed_cuda_core_stream(self):
         self.event_elapsed_inner(stream)
 
     def event_elapsed_inner(self, stream):
-        N = 32
-        dary = cuda.device_array(N, dtype=np.double)
+        @cuda.jit
+        def kernel():
+            pass
+
         evtstart = cuda.event()
         evtend = cuda.event()
 
         evtstart.record(stream=stream)
-        cuda.to_device(np.arange(N, dtype=np.double), to=dary, stream=stream)
+
+        kernel[1, 1, stream]()
+
         evtend.record(stream=stream)
         evtend.wait(stream=stream)
         evtend.synchronize()
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py b/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py
index a4363fc44..381a05dd8 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py
@@ -4,10 +4,10 @@
 import numpy as np
 from numba.cuda.cudadrv import driver
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 
 
-class TestHostAlloc(CUDATestCase):
+class TestHostAlloc(DeprecatedDeviceArrayApiTest):
     def tearDown(self):
         cuda.current_context().reset()
 
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py b/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py
index e768d9dbb..6cec7005a 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py
@@ -4,7 +4,7 @@
 import numpy as np
 from numba.cuda.cudadrv.driver import device_memset, driver
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 from numba.cuda.testing import skip_on_cudasim, skip_on_arm
 from numba.cuda.tests.support import linux_only
 
@@ -12,7 +12,7 @@
 @skip_on_cudasim("CUDA Driver API unsupported in the simulator")
 @linux_only
 @skip_on_arm("Managed Alloc support is experimental/untested on ARM")
-class TestManagedAlloc(CUDATestCase):
+class TestManagedAlloc(DeprecatedDeviceArrayApiTest):
     def tearDown(self):
         super().tearDown()
         cuda.current_context().reset()
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py b/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
index ff51db4f1..2a243ca73 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py
@@ -5,12 +5,18 @@
 from numba.cuda.testing import unittest
 from numba.cuda.testing import skip_on_cudasim
 from numba.cuda.testing import CUDATestCase
+from numba.cuda.testing import skip_if_cupy_unavailable
 from numba.cuda import get_current_device
 from numba.cuda.cudadrv.driver import _Linker, _have_nvjitlink
 
 from numba import cuda
 from numba.cuda import config
 
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
+
 import os
 import io
 import contextlib
@@ -82,6 +88,7 @@ def test_nvjitlink_test_add_file_guess_ext_invalid_input(self):
             # because there's no way to know what kind of file to treat it as
             linker.add_file_guess_ext(content)
 
+    @skip_if_cupy_unavailable
     def test_nvjitlink_jit_with_linkable_code(self):
         files = (
             test_device_functions_a,
@@ -101,7 +108,7 @@ def test_nvjitlink_jit_with_linkable_code(self):
                     def kernel(result):
                         result[0] = add_from_numba(1, 2)
 
-                    result = cuda.device_array(1)
+                    result = cp.zeros(1)
                     kernel[1, 1](result)
                     assert result[0] == 3
 
@@ -132,6 +139,7 @@ def tearDown(self):
         config.DUMP_ASSEMBLY = self._prev_dump_assembly
         super().tearDown()
 
+    @skip_if_cupy_unavailable
     def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly(self):
         files = (
             test_device_functions_cu,
@@ -158,12 +166,13 @@ def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly(self):
                     def kernel(result):
                         result[0] = add_from_numba(1, 2)
 
-                    result = cuda.device_array(1)
+                    result = cp.zeros(1)
                     kernel[1, 1](result)
                     assert result[0] == 3
 
                 self.assertTrue("ASSEMBLY (AFTER LTO)" in f.getvalue())
 
+    @skip_if_cupy_unavailable
     def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly_warn(self):
         files = (
             test_device_functions_a,
@@ -190,7 +199,7 @@ def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly_warn(self):
                 def kernel(result):
                     result[0] = add_from_numba(1, 2)
 
-                result = cuda.device_array(1)
+                result = cp.zeros(1)
                 func = kernel[1, 1]
                 with pytest.warns(
                     UserWarning,
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py b/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py
index 919b4428d..94222066b 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py
@@ -5,10 +5,11 @@
 import platform
 
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 
 
-class TestPinned(CUDATestCase):
+# TODO
+class TestPinned(DeprecatedDeviceArrayApiTest):
     def _run_copies(self, A):
         A0 = np.copy(A)
 
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py b/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py
index f36013ca9..4d44927ff 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py
@@ -5,17 +5,24 @@
 from numba.cuda.testing import CUDATestCase
 from numba import cuda
 from numba.cuda.testing import skip_on_cudasim
+from numba.cuda.testing import skip_if_cupy_unavailable
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 @skip_on_cudasim("CUDA Profiler unsupported in the simulator")
 class TestProfiler(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_profiling(self):
         with cuda.profiling():
-            a = cuda.device_array(10)
+            a = cp.zeros(10)
             del a
 
         with cuda.profiling():
-            a = cuda.device_array(100)
+            a = cp.zeros(100)
             del a
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py b/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py
index 68a4f1db1..301369bfc 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py
@@ -9,15 +9,26 @@
 
 import numpy as np
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import (
+    unittest,
+    CUDATestCase,
+    DeprecatedDeviceArrayApiWarning,
+)
+import pytest
 
 
 def newthread(exception_queue):
     try:
+        from numba.cuda import config
+
         cuda.select_device(0)
         stream = cuda.stream()
         A = np.arange(100)
-        dA = cuda.to_device(A, stream=stream)
+        if not config.ENABLE_CUDASIM:
+            with pytest.warns(DeprecatedDeviceArrayApiWarning):
+                dA = cuda.to_device(A, stream=stream)
+        else:
+            dA = cuda.to_device(A, stream=stream)
         stream.synchronize()
         del dA
         del stream
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array.py b/numba_cuda/numba/cuda/tests/cudapy/test_array.py
index efee7339b..f9f5c1704 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 from numba.cuda.testing import skip_on_cudasim, skip_unless_cudasim
 from numba import cuda
 from numba.cuda import config
@@ -44,7 +44,7 @@ def array_reshape(arr, newshape):
     return arr.reshape(newshape)
 
 
-class TestCudaArray(CUDATestCase):
+class TestCudaArray(DeprecatedDeviceArrayApiTest):
     def test_gpu_array_zero_length(self):
         x = np.arange(0)
         dx = cuda.to_device(x)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py b/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
index 4921242a4..03a25d24e 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py
@@ -3,10 +3,18 @@
 
 import numpy as np
 from numba import cuda
-from numba.cuda.testing import CUDATestCase
+from numba.cuda.testing import CUDATestCase, skip_if_cupy_unavailable
 import unittest
 from numba.cuda import config
 
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
+
 
 def reinterpret_array_type(byte_arr, start, stop, output):
     # Tested with just one thread
@@ -43,6 +51,7 @@ def test_reinterpret_array_type(self):
             got = output[0]
             self.assertEqual(expect, got)
 
+    @skip_if_cupy_unavailable
     def test_array_copy(self):
         val = np.array([1, 2, 3])[::-1]
 
@@ -52,10 +61,10 @@ def kernel(out):
             for i in range(len(out)):
                 out[i] = q[i]
 
-        out = cuda.to_device(np.zeros(len(val), dtype="float64"))
+        out = cp.asarray(np.zeros(len(val), dtype="float64"))
 
         kernel[1, 1](out)
-        for i, j in zip(out.copy_to_host(), val):
+        for i, j in zip(out.get() if not config.ENABLE_CUDASIM else out, val):
             self.assertEqual(i, j)
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
index 0ead25770..8ee9c385f 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
@@ -4,10 +4,19 @@
 
 from numba.cuda.tests.support import TestCase, MemoryLeakMixin
 from numba import cuda
-from numba.cuda.testing import skip_on_cudasim, skip_on_nvjitlink_13_1_sm_120
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    skip_on_nvjitlink_13_1_sm_120,
+    skip_if_cupy_unavailable,
+)
 from numba.cuda.misc.special import literal_unroll
 from numba.cuda import config
 
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
+
 
 @skip_on_cudasim("doesn't work in the simulator")
 class TestArrayReductions(MemoryLeakMixin, TestCase):
@@ -28,6 +37,7 @@ def tearDown(self):
         config.DISABLE_PERFORMANCE_WARNINGS = self.old_perf_warnings_setting
         super().tearDown()
 
+    @skip_if_cupy_unavailable
     def test_all_basic(self):
         cases = (
             np.float64([1.0, 0.0, float("inf"), float("nan")]),
@@ -45,11 +55,12 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.all(a) for a in cases], dtype=np.bool_)
-        out = cuda.to_device(np.zeros(len(cases), dtype=np.bool_))
+        out = cp.zeros(len(cases), dtype=cp.bool_)
         kernel[1, 1](out)
-        got = out.copy_to_host()
+        got = out.get()
         self.assertPreciseEqual(expected, got)
 
+    @skip_if_cupy_unavailable
     def test_any_basic(self):
         cases = (
             np.float64([0.0, -0.0, 0.0, 0.0]),
@@ -68,10 +79,11 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.any(a) for a in cases], dtype=np.bool_)
-        out = cuda.to_device(np.zeros(len(cases), dtype=np.bool_))
+        out = cp.zeros(len(cases), dtype=cp.bool_)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     @skip_on_nvjitlink_13_1_sm_120(
         "sum fails at link time on sm_120 + CUDA 13.1"
     )
@@ -98,10 +110,11 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.sum(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     @skip_on_nvjitlink_13_1_sm_120(
         "mean fails at link time on sm_120 + CUDA 13.1"
     )
@@ -128,10 +141,14 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.mean(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
+    @skip_on_nvjitlink_13_1_sm_120(
+        "var fails at link time on sm_120 + CUDA 13.1"
+    )
     def test_var_basic(self):
         arrays = (
             np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
@@ -140,10 +157,6 @@ def test_var_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64(
-                [np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]
-            ),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -155,10 +168,14 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.var(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host(), prec="double")
+        self.assertPreciseEqual(expected, out.get(), prec="double")
 
+    @skip_if_cupy_unavailable
+    @skip_on_nvjitlink_13_1_sm_120(
+        "std fails at link time on sm_120 + CUDA 13.1"
+    )
     def test_std_basic(self):
         arrays = (
             np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
@@ -167,10 +184,6 @@ def test_std_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64(
-                [np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]
-            ),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -182,10 +195,11 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.std(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     def test_min_basic(self):
         arrays = (
             np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
@@ -194,10 +208,6 @@ def test_min_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64(
-                [np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]
-            ),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -209,10 +219,11 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.min(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     def test_max_basic(self):
         arrays = (
             np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
@@ -221,10 +232,6 @@ def test_max_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64(
-                [np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]
-            ),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -236,10 +243,11 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.max(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     def test_nanmin_basic(self):
         arrays = (
             np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
@@ -249,7 +257,6 @@ def test_nanmin_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -261,10 +268,11 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.nanmin(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     def test_nanmax_basic(self):
         arrays = (
             np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
@@ -274,7 +282,6 @@ def test_nanmax_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -286,10 +293,11 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.nanmax(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     @skip_on_nvjitlink_13_1_sm_120(
         "nanmean fails at link time on sm_120 + CUDA 13.1"
     )
@@ -299,10 +307,6 @@ def test_nanmean_basic(self):
             np.float64([-0.0, -1.5]),
             np.float64([-1.5, 2.5, np.nan]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64(
-                [np.nan, -1.5, 2.5, np.nan, float("inf"), -float("inf"), 3.0]
-            ),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -314,10 +318,11 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.nanmean(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     @skip_on_nvjitlink_13_1_sm_120(
         "nansum fails at link time on sm_120 + CUDA 13.1"
     )
@@ -330,7 +335,6 @@ def test_nansum_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -342,10 +346,11 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.nansum(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())
 
+    @skip_if_cupy_unavailable
     @skip_on_nvjitlink_13_1_sm_120(
         "nanprod fails at link time on sm_120 + CUDA 13.1"
     )
@@ -358,7 +363,6 @@ def test_nanprod_basic(self):
             np.float64([-1.5, 2.5, -float("inf")]),
             np.float64([-1.5, 2.5, float("inf"), -float("inf")]),
             np.float64([np.nan, -1.5, 2.5, np.nan, 3.0]),
-            np.float64([5.0, np.nan, -1.5, np.nan]),
             np.float64([np.nan, np.nan]),
         )
 
@@ -370,6 +374,6 @@ def kernel(out):
                 i += 1
 
         expected = np.array([np.nanprod(a) for a in arrays], dtype=np.float64)
-        out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
+        out = cp.zeros(len(arrays), dtype=cp.float64)
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        self.assertPreciseEqual(expected, out.get())
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py b/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
index f309c9531..f5c866b88 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py
@@ -17,6 +17,14 @@
 )
 from numba.cuda import config
 
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
+
 if not config.ENABLE_CUDASIM:
     from numba.cuda.bf16 import (
         bfloat16,
@@ -112,7 +120,7 @@
         float32_to_bfloat16_ru,
     )
 
-from numba.cuda.testing import CUDATestCase
+from numba.cuda.testing import CUDATestCase, skip_if_cupy_unavailable
 
 import math
 
@@ -131,6 +139,7 @@ def kernel():
 
         kernel[1, 1]()
 
+    @skip_if_cupy_unavailable
     def test_math_bindings(self):
         self.skip_unsupported()
 
@@ -159,7 +168,7 @@ def kernel(arr):
                     y = f(x)
                     arr[0] = float32(y)
 
-                arr = cuda.device_array((1,), dtype="float32")
+                arr = cp.zeros((1,), dtype="float32")
                 kernel[1, 1](arr)
 
                 if f in exp_functions:
@@ -167,6 +176,7 @@ def kernel(arr):
                 else:
                     self.assertAlmostEqual(arr[0], f(3.14), delta=1e-2)
 
+    @skip_if_cupy_unavailable
     def test_arithmetic_intrinsics_basic(self):
         self.skip_unsupported()
 
@@ -187,7 +197,7 @@ def kernel(out):
             out[8] = float32(hsub_rn(a, b))
             out[9] = float32(hmul_rn(a, b))
 
-        out = cuda.device_array((10,), dtype="float32")
+        out = cp.zeros((10,), dtype="float32")
         kernel[1, 1](out)
 
         a = 1.25
@@ -207,6 +217,7 @@ def kernel(out):
         for i, exp in enumerate(expected):
             self.assertAlmostEqual(out[i], exp, delta=1e-2)
 
+    @skip_if_cupy_unavailable
     def test_arithmetic_intrinsics_saturating(self):
         self.skip_unsupported()
 
@@ -220,7 +231,7 @@ def kernel(out):
             out[2] = float32(hmul_sat(a, b))  # 1.125 -> 1.0
             out[3] = float32(hfma_sat(a, b, a))  # 1.125 + 1.5 -> 1.0
 
-        out = cuda.device_array((4,), dtype="float32")
+        out = cp.zeros((4,), dtype="float32")
         kernel[1, 1](out)
 
         self.assertAlmostEqual(out[0], 1.0, delta=1e-3)
@@ -233,6 +244,7 @@ def kernel(out):
             self.assertGreaterEqual(out[i], 0.0)
             self.assertLessEqual(out[i], 1.0)
 
+    @skip_if_cupy_unavailable
     def test_fma_relu_intrinsic(self):
         self.skip_unsupported()
 
@@ -244,11 +256,12 @@ def kernel(out):
 
             out[0] = float32(hfma_relu(a, b, c))  # -3.0 -> relu -> 0.0
 
-        out = cuda.device_array((1,), dtype="float32")
+        out = cp.zeros((1,), dtype="float32")
         kernel[1, 1](out)
 
         self.assertAlmostEqual(out[0], 0.0, delta=1e-3)
 
+    @skip_if_cupy_unavailable
     def test_comparison_intrinsics(self):
         self.skip_unsupported()
 
@@ -274,7 +287,7 @@ def kernel(out, a, b):
         for cmpfn, op in zip(comparisons, ops):
             with self.subTest(cmpfn=cmpfn):
                 kernel = make_kernel(cmpfn)
-                out = cuda.device_array((1,), dtype="bool")
+                out = cp.zeros((1,), dtype="bool")
 
                 a = 3.0
                 b = 3.0
@@ -291,6 +304,7 @@ def kernel(out, a, b):
                 kernel[1, 1](out, a, b)
                 self.assertEqual(bool(out[0]), op(4.0, 3.0))
 
+    @skip_if_cupy_unavailable
     def test_hmax_hmin_intrinsics(self):
         self.skip_unsupported()
 
@@ -301,11 +315,12 @@ def kernel(out):
             out[0] = float32(hmax(a, b))
             out[1] = float32(hmin(a, b))
 
-        out = cuda.device_array((2,), dtype="float32")
+        out = cp.zeros((2,), dtype="float32")
         kernel[1, 1](out)
         self.assertAlmostEqual(out[0], 4.0, delta=1e-3)
         self.assertAlmostEqual(out[1], 3.0, delta=1e-3)
 
+    @skip_if_cupy_unavailable
     def test_nan_and_inf_intrinsics(self):
         self.skip_unsupported()
 
@@ -316,12 +331,13 @@ def kernel(out_bool, out_int):
             out_bool[0] = hisnan(nanv)
             out_int[0] = hisinf(infv)
 
-        out_bool = cuda.device_array((1,), dtype="bool")
-        out_int = cuda.device_array((1,), dtype="int32")
+        out_bool = cp.zeros((1,), dtype="bool")
+        out_int = cp.zeros((1,), dtype="int32")
         kernel[1, 1](out_bool, out_int)
         self.assertTrue(bool(out_bool[0]))
         self.assertNotEqual(int(out_int[0]), 0)
 
+    @skip_if_cupy_unavailable
     def test_hmax_nan_hmin_nan_intrinsics(self):
         self.skip_unsupported()
 
@@ -334,7 +350,7 @@ def kernel(out):
             out[2] = float32(hmax(a, b))
             out[3] = float32(hmin(a, b))
 
-        out = cuda.device_array((4,), dtype="float32")
+        out = cp.zeros((4,), dtype="float32")
         kernel[1, 1](out)
         # NaN-propagating variants should produce NaN
         self.assertTrue(math.isnan(out[0]))
@@ -343,6 +359,7 @@ def kernel(out):
         self.assertAlmostEqual(out[2], 2.0, delta=1e-3)
         self.assertAlmostEqual(out[3], 2.0, delta=1e-3)
 
+    @skip_if_cupy_unavailable
     def test_bfloat16_as_bitcast(self):
         self.skip_unsupported()
 
@@ -352,13 +369,14 @@ def roundtrip_kernel(test_val, i2, u2):
             u2[0] = uint16_as_bfloat16(bfloat16_as_uint16(test_val))
 
         test_val = np.int16(0x3FC0)  # 1.5 in bfloat16
-        i2 = cuda.device_array((1,), dtype="int16")
-        u2 = cuda.device_array((1,), dtype="uint16")
+        i2 = cp.zeros((1,), dtype="int16")
+        u2 = cp.zeros((1,), dtype="uint16")
         roundtrip_kernel[1, 1](test_val, i2, u2)
 
         self.assertEqual(i2[0], test_val)
         self.assertEqual(u2[0], test_val)
 
+    @skip_if_cupy_unavailable
     def test_to_integer_conversions(self):
         self.skip_unsupported()
 
@@ -394,17 +412,17 @@ def kernel(test_val, i1, i2, i3, i4, u1, u2, u3, u4):
             u4[3] = bfloat16_to_uint64_ru(a)
 
         # rz
-        i1 = cuda.device_array((1,), dtype="int8")
+        i1 = cp.zeros((1,), dtype="int8")
         # rn, rz, rd, ru
-        i2 = cuda.device_array((4,), dtype="int16")
-        i3 = cuda.device_array((4,), dtype="int32")
-        i4 = cuda.device_array((4,), dtype="int64")
+        i2 = cp.zeros((4,), dtype="int16")
+        i3 = cp.zeros((4,), dtype="int32")
+        i4 = cp.zeros((4,), dtype="int64")
         # rz
-        u1 = cuda.device_array((1,), dtype="uint8")
+        u1 = cp.zeros((1,), dtype="uint8")
         # rn, rz, rd, ru
-        u2 = cuda.device_array((4,), dtype="uint16")
-        u3 = cuda.device_array((4,), dtype="uint32")
-        u4 = cuda.device_array((4,), dtype="uint64")
+        u2 = cp.zeros((4,), dtype="uint16")
+        u3 = cp.zeros((4,), dtype="uint32")
+        u4 = cp.zeros((4,), dtype="uint64")
 
         test_val = np.int16(0x3FC0)  # 1.5 in bfloat16
 
@@ -413,13 +431,14 @@ def kernel(test_val, i1, i2, i3, i4, u1, u2, u3, u4):
         self.assertEqual(i1[0], 1)
         self.assertEqual(u1[0], 1)
 
-        np.testing.assert_equal(i2, np.array([2, 1, 1, 2], "int16"))
-        np.testing.assert_equal(i3, np.array([2, 1, 1, 2], "int32"))
-        np.testing.assert_equal(i4, np.array([2, 1, 1, 2], "int64"))
-        np.testing.assert_equal(u2, np.array([2, 1, 1, 2], "uint16"))
-        np.testing.assert_equal(u3, np.array([2, 1, 1, 2], "uint32"))
-        np.testing.assert_equal(u4, np.array([2, 1, 1, 2], "uint64"))
+        np.testing.assert_equal(i2.get(), np.array([2, 1, 1, 2], "int16"))
+        np.testing.assert_equal(i3.get(), np.array([2, 1, 1, 2], "int32"))
+        np.testing.assert_equal(i4.get(), np.array([2, 1, 1, 2], "int64"))
+        np.testing.assert_equal(u2.get(), np.array([2, 1, 1, 2], "uint16"))
+        np.testing.assert_equal(u3.get(), np.array([2, 1, 1, 2], "uint32"))
+        np.testing.assert_equal(u4.get(), np.array([2, 1, 1, 2], "uint64"))
 
+    @skip_if_cupy_unavailable
     def test_from_integer_conversions(self):
         self.skip_unsupported()
 
@@ -489,9 +508,9 @@ def kernel(out):
             out[22] = bfloat16_as_int16(u4rd)
             out[23] = bfloat16_as_int16(u4ru)
 
-        out = cuda.device_array((24,), dtype="int16")
+        out = cp.zeros((24,), dtype="int16")
         kernel[1, 1](out)
-        res = out.copy_to_host()
+        res = out.get()
 
         i2 = np.int16(789).astype(mldtypes_bf16).view("int16")
         i3 = np.int32(789).astype(mldtypes_bf16).view("int16")
@@ -515,6 +534,7 @@ def kernel(out):
         np.testing.assert_array_less(_bf16_ulp_distance(res[16:20], u3arr), two)
         np.testing.assert_array_less(_bf16_ulp_distance(res[20:24], u4arr), two)
 
+    @skip_if_cupy_unavailable
     def test_to_float_conversions(self):
         self.skip_unsupported()
 
@@ -523,11 +543,12 @@ def kernel(out):
             a = bfloat16(1.5)
             out[0] = bfloat16_to_float32(a)
 
-        out = cuda.device_array((1,), dtype="float32")
+        out = cp.zeros((1,), dtype="float32")
         kernel[1, 1](out)
 
         self.assertAlmostEqual(out[0], 1.5, delta=1e-7)  # conversion is exact
 
+    @skip_if_cupy_unavailable
     def test_from_float_conversions(self):
         self.skip_unsupported()
 
@@ -553,9 +574,9 @@ def kernel(out):
             out[4] = bfloat16_as_int16(f4_default)
             out[5] = bfloat16_as_int16(f8_default)
 
-        out = cuda.device_array((6,), dtype="int16")
+        out = cp.zeros((6,), dtype="int16")
         kernel[1, 1](out)
-        raw = out.copy_to_host()
+        raw = out.get()
 
         f4_expected = (
             np.array([test_val] * 4, "float32")
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
index 3612a542e..cf31585ff 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py
@@ -4,9 +4,17 @@
 import numpy as np
 import math
 from numba import cuda
-from numba.cuda import double, void
-from numba.cuda.testing import unittest, CUDATestCase
-
+from numba.cuda import config, double, void
+from numba.cuda.testing import unittest, CUDATestCase, skip_if_cupy_unavailable
+from contextlib import nullcontext
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 RISKFREE = 0.02
 VOLATILITY = 0.30
@@ -59,6 +67,7 @@ def randfloat(rand_var, low, high):
 
 
 class TestBlackScholes(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_blackscholes(self):
         OPT_N = 400
         iterations = 2
@@ -127,15 +136,24 @@ def black_scholes_cuda(callResult, putResult, S, X, T, R, V):
         # numba
         blockdim = 512, 1
         griddim = int(math.ceil(float(OPT_N) / blockdim[0])), 1
-        stream = cuda.stream()
-        d_callResult = cuda.to_device(callResultNumba, stream)
-        d_putResult = cuda.to_device(putResultNumba, stream)
-        d_stockPrice = cuda.to_device(stockPrice, stream)
-        d_optionStrike = cuda.to_device(optionStrike, stream)
-        d_optionYears = cuda.to_device(optionYears, stream)
+        stream = (
+            cp.cuda.Stream() if not config.ENABLE_CUDASIM else nullcontext()
+        )
+        nb_stream = (
+            cuda.api.external_stream(stream.ptr)
+            if not config.ENABLE_CUDASIM
+            else cuda.stream()
+        )
+
+        with stream:
+            d_callResult = cp.asarray(callResultNumba)
+            d_putResult = cp.asarray(putResultNumba)
+            d_stockPrice = cp.asarray(stockPrice)
+            d_optionStrike = cp.asarray(optionStrike)
+            d_optionYears = cp.asarray(optionYears)
 
         for i in range(iterations):
-            black_scholes_cuda[griddim, blockdim, stream](
+            black_scholes_cuda[griddim, blockdim, nb_stream](
                 d_callResult,
                 d_putResult,
                 d_stockPrice,
@@ -144,9 +162,19 @@ def black_scholes_cuda(callResult, putResult, S, X, T, R, V):
                 RISKFREE,
                 VOLATILITY,
             )
-        d_callResult.copy_to_host(callResultNumba, stream)
-        d_putResult.copy_to_host(putResultNumba, stream)
-        stream.synchronize()
+
+        with stream:
+            callResultNumba = (
+                d_callResult.get()
+                if not config.ENABLE_CUDASIM
+                else d_callResult
+            )
+            putResultNumba = (
+                d_putResult.get() if not config.ENABLE_CUDASIM else d_putResult
+            )
+
+        if not config.ENABLE_CUDASIM:
+            stream.synchronize()
 
         delta = np.abs(callResultNumpy - callResultNumba)
         L1norm = delta.sum() / np.abs(callResultNumpy).sum()
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_caching.py b/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
index 3d3eadc32..7a61bd26f 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
@@ -9,6 +9,7 @@
 import sys
 import stat
 import subprocess
+from numba.cuda import config
 
 from numba import cuda
 from numba.cuda.core.errors import NumbaWarning
@@ -19,6 +20,7 @@
     skip_if_cudadevrt_missing,
     test_data_dir,
     skip_on_standalone_numba_cuda,
+    skip_if_cupy_unavailable,
 )
 from numba.cuda.tests.support import (
     TestCase,
@@ -26,6 +28,14 @@
     import_dynamic,
 )
 import numpy as np
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 from pickle import PicklingError
 
 # Module-level global for testing that caching rejects global device arrays
@@ -372,12 +382,13 @@ def test_cannot_cache_linking_libraries(self):
             def f():
                 pass
 
+    @skip_if_cupy_unavailable
     def test_cannot_cache_captured_device_array(self):
         # Test that kernels capturing device arrays from closures cannot
         # be cached. The error can come from either NumbaPickler (for closure
         # variables) or CUDACodeLibrary._reduce_states (for referenced objects).
         host_data = np.array([1.0, 2.0, 3.0], dtype=np.float32)
-        captured_arr = cuda.to_device(host_data)
+        captured_arr = cp.asarray(host_data)
 
         msg = "global device arrays"
         with self.assertRaisesRegex(PicklingError, msg):
@@ -388,16 +399,17 @@ def cached_kernel(output):
                 if i < output.size:
                     output[i] = captured_arr[i] * 2.0
 
-            output = cuda.device_array(3, dtype=np.float32)
+            output = cp.zeros(3, dtype=np.float32)
             cached_kernel[1, 3](output)
 
+    @skip_if_cupy_unavailable
     def test_cannot_cache_global_device_array(self):
         # Test that kernels referencing module-level global device arrays
         # cannot be cached.
         global GLOBAL_DEVICE_ARRAY
 
         host_data = np.array([1.0, 2.0, 3.0], dtype=np.float32)
-        GLOBAL_DEVICE_ARRAY = cuda.to_device(host_data)
+        GLOBAL_DEVICE_ARRAY = cp.asarray(host_data)
 
         try:
             msg = "global device arrays"
@@ -409,7 +421,7 @@ def cached_kernel_global(output):
                     if i < output.size:
                         output[i] = GLOBAL_DEVICE_ARRAY[i] * 2.0
 
-                output = cuda.device_array(3, dtype=np.float32)
+                output = cp.zeros(3, dtype=np.float32)
                 cached_kernel_global[1, 3](output)
         finally:
             GLOBAL_DEVICE_ARRAY = None
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
index e246ef9e4..dc06765b6 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py
@@ -5,16 +5,29 @@
 
 from numba.cuda import vectorize, guvectorize
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase, ForeignArray
-from numba.cuda.testing import skip_on_cudasim, skip_if_external_memmgr
+from numba.cuda.testing import (
+    unittest,
+    ForeignArray,
+    DeprecatedDeviceArrayApiTest,
+)
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    skip_if_external_memmgr,
+    skip_if_cupy_unavailable,
+)
 from numba.cuda.tests.support import linux_only, override_config
 from unittest.mock import call, patch
 
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
+
 import pytest
 
 
 @skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
-class TestCudaArrayInterface(CUDATestCase):
+class TestCudaArrayInterface(DeprecatedDeviceArrayApiTest):
     def assertPointersEqual(self, a, b):
         self.assertEqual(
             a.device_ctypes_pointer.value, b.device_ctypes_pointer.value
@@ -82,6 +95,7 @@ def mutate(arr, val):
         np.testing.assert_array_equal(wrapped.copy_to_host(), h_arr + val)
         np.testing.assert_array_equal(d_arr.copy_to_host(), h_arr + val)
 
+    @skip_if_cupy_unavailable
     def test_fortran_contiguous(self):
         cp = pytest.importorskip("cupy")
 
@@ -91,12 +105,13 @@ def copy(arr, out):
                 for j in range(arr.shape[1]):
                     out[i, j] = arr[i, j]
 
-        arr = cp.asfortranarray(cp.random.random((10, 10)))
+        arr = cp.asarray(np.asfortranarray(np.random.random((10, 10))))
         out = cp.empty_like(arr)
         copy[1, 1](arr, out)
 
         np.testing.assert_array_equal(arr.get(), out.get())
 
+    @skip_if_cupy_unavailable
     def test_ufunc_arg(self):
         @vectorize(["f8(f8, f8)"], target="cuda")
         def vadd(a, b):
@@ -104,7 +119,7 @@ def vadd(a, b):
 
         # Case 1: use custom array as argument
         h_arr = np.random.random(10)
-        arr = ForeignArray(cuda.to_device(h_arr))
+        arr = ForeignArray(cp.asarray(h_arr))
         val = 6
         out = vadd(arr, val)
         np.testing.assert_array_equal(out.copy_to_host(), h_arr + val)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
index bbed82414..13440e1cc 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py
@@ -2,10 +2,20 @@
 # SPDX-License-Identifier: BSD-2-Clause
 
 from numba import cuda
+from numba.cuda import config
 import numpy as np
-from numba.cuda.testing import CUDATestCase
+from numba.cuda.testing import CUDATestCase, skip_if_cupy_unavailable
 from numba.cuda.tests.support import override_config
 import unittest
+from contextlib import nullcontext
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 
 class TestCudaJitNoTypes(CUDATestCase):
@@ -13,6 +23,7 @@ class TestCudaJitNoTypes(CUDATestCase):
     Tests the jit decorator with no types provided.
     """
 
+    @skip_if_cupy_unavailable
     def test_device_array(self):
         @cuda.jit
         def foo(x, y):
@@ -22,12 +33,12 @@ def foo(x, y):
         x = np.arange(10)
         y = np.empty_like(x)
 
-        dx = cuda.to_device(x)
-        dy = cuda.to_device(y)
+        dx = cp.asarray(x)
+        dy = cp.asarray(y)
 
         foo[10, 1](dx, dy)
 
-        dy.copy_to_host(y)
+        y = dy.get() if not config.ENABLE_CUDASIM else dy
 
         self.assertTrue(np.all(x == y))
 
@@ -58,6 +69,7 @@ def driver(A, B):
 
         np.testing.assert_allclose(Acopy + Acopy + Bcopy + Bcopy + 1, B)
 
+    @skip_if_cupy_unavailable
     def test_device_jit_2(self):
         @cuda.jit(device=True)
         def inner(arg):
@@ -70,13 +82,21 @@ def outer(argin, argout):
         a = np.zeros(1)
         b = np.zeros(1)
 
-        stream = cuda.stream()
-        d_a = cuda.to_device(a, stream)
-        d_b = cuda.to_device(b, stream)
-
-        outer[1, 1, stream](d_a, d_b)
-
-        d_b.copy_to_host(b, stream)
+        stream = (
+            cp.cuda.Stream() if not config.ENABLE_CUDASIM else nullcontext()
+        )
+        nb_stream = (
+            cuda.api.external_stream(stream.ptr)
+            if not config.ENABLE_CUDASIM
+            else cuda.stream()
+        )
+        with stream:
+            d_a = cp.asarray(a)
+            d_b = cp.asarray(b)
+
+            outer[1, 1, nb_stream](d_a, d_b)
+
+            b = d_b.get() if not config.ENABLE_CUDASIM else d_b
 
         self.assertEqual(b[0], (a[0] + 1) + (2 + 1))
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py b/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
index ca49435e4..87258f2d1 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
@@ -5,13 +5,17 @@
 
 from numba import cuda, vectorize, guvectorize
 from numba.cuda.np.numpy_support import from_dtype
-from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    skip_if_cupy_unavailable,
+    DeprecatedDeviceArrayApiTest,
+)
 import unittest
 
 import pytest
 
 
-class TestCudaDateTime(CUDATestCase):
+class TestCudaDateTime(DeprecatedDeviceArrayApiTest):
     def test_basic_datetime_kernel(self):
         @cuda.jit
         def foo(start, end, delta):
@@ -61,6 +65,7 @@ def timediff(start, end):
         self.assertPreciseEqual(delta, arr2 - arr1)
 
     @skip_on_cudasim("API unsupported in the simulator")
+    @skip_if_cupy_unavailable
     def test_datetime_cupy_inputs(self):
         cp = pytest.importorskip("cupy")
         datetime_t = from_dtype(cp.dtype("datetime64[D]"))
@@ -95,7 +100,6 @@ def timediff(start, end, out):
 
         arr1 = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
         arr2 = arr1 + np.random.randint(0, 10000, arr1.size)
-
         delta = timediff(arr1, arr2)
 
         self.assertPreciseEqual(delta, arr2 - arr1)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
index efde9a80b..045563134 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
@@ -3,11 +3,14 @@
 
 from collections import namedtuple
 from numba.cuda.tests.support import override_config, captured_stdout
-from numba.cuda.testing import skip_on_cudasim
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    skip_if_cupy_unavailable,
+    CUDATestCase,
+)
 from numba import cuda
 from numba.cuda import types
 from numba.cuda.np import numpy_support
-from numba.cuda.testing import CUDATestCase
 from numba.cuda.core import config
 from textwrap import dedent
 import math
@@ -20,6 +23,11 @@
 import numpy as np
 import inspect
 
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
+
 
 @skip_on_cudasim("Simulator does not produce debug dumps")
 class TestCudaDebugInfo(CUDATestCase):
@@ -448,6 +456,7 @@ def f(x, y):
         match = re.compile(pat6).search(llvm_ir)
         self.assertIsNotNone(match, msg=llvm_ir)
 
+    @skip_if_cupy_unavailable
     def test_union_debug(self):
         @cuda.jit("void(u8, int64[::1])", debug=True, opt=False)
         def a_union_use_case(arg, results):
@@ -460,9 +469,9 @@ def a_union_use_case(arg, results):
             results[0] = 1 if not bar else 0
 
         with captured_stdout() as out:
-            results = cuda.to_device(np.zeros(16, dtype=np.int64))
+            results = cp.zeros(16, dtype=np.int64)
             a_union_use_case[1, 1](100, results)
-            print(results.copy_to_host())
+            print(results.get())
         expected = "[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]"
         self.assertIn(expected, out.getvalue())
 
@@ -627,6 +636,7 @@ def foo():
         # and refers to the offending function
         self.assertIn(str(foo.py_func), msg)
 
+    @skip_if_cupy_unavailable
     def test_linecache_source(self):
         """Test that source from linecache (like Jupyter notebooks) works.
 
@@ -668,7 +678,7 @@ def foo(x):
             with warnings.catch_warnings(record=True) as w:
                 warnings.simplefilter("always", NumbaDebugInfoWarning)
                 ignore_internal_warnings()
-                foo[1, 1](cuda.to_device(np.zeros(1, dtype=np.int32)))
+                foo[1, 1](cp.asarray(np.zeros(1, dtype=np.int32)))
 
             # Filter for NumbaDebugInfoWarning specifically
             debug_warnings = [
@@ -711,6 +721,7 @@ def choice(cond1, cond2):
             if "llvm.dbg.declare" in line:
                 self.assertNotIn("bool", line)
 
+    @skip_if_cupy_unavailable
     def test_llvm_inliner_flag_conflict(self):
         # bar will be marked as 'alwaysinline', but when DEBUGINFO_DEFAULT is
         # set functions are not marked as 'alwaysinline' and this results in a
@@ -743,9 +754,9 @@ def foo(x, y):
 
         # check it compiles
         with override_config("DEBUGINFO_DEFAULT", 1):
-            result = cuda.device_array(1, dtype=np.float32)
+            result = cp.ones(1, dtype=np.float32)
             foo[1, 1](result, np.pi)
-            result.copy_to_host()
+            result = result.get()
 
         result_host = math.sin(np.pi) + math.cos(np.pi)
         self.assertPreciseEqual(result[0], result_host)
@@ -787,6 +798,7 @@ def foo(x, y):
         """,
         )
 
+    @skip_if_cupy_unavailable
     def test_DILocation_versioned_variables(self):
         """Tests that DILocation information for versions of variables matches
         up to their definition site."""
@@ -810,9 +822,9 @@ def foo(dest, n):
             foo.py_func
         )
 
-        result = cuda.device_array(1, dtype=np.int32)
+        result = cp.asarray([1], dtype=np.int32)
         foo[1, 1](result, 1)
-        result.copy_to_host()
+        result = result.get()
         self.assertEqual(result[0], 5)
 
         ir_content = foo.inspect_llvm()[foo.signatures[0]]
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py
index f0899475c..3bb82e100 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py
@@ -15,23 +15,29 @@
 from numba import cuda
 from numba.cuda.testing import unittest, CUDATestCase, ForeignArray
 from numba.cuda.testing import skip_on_cudasim
+from numba.cuda.testing import skip_if_cupy_unavailable
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 def make_numba_array(host_arr):
     """Create a Numba device array from host array."""
-    return cuda.to_device(host_arr)
+    return cp.asarray(host_arr)
 
 
 def make_foreign_array(host_arr):
     """Create a ForeignArray wrapping a Numba device array."""
-    return ForeignArray(cuda.to_device(host_arr))
+    return ForeignArray(cp.asarray(host_arr))
 
 
 def get_host_data(arr):
     """Copy array data back to host."""
     if isinstance(arr, ForeignArray):
-        return arr._arr.copy_to_host()
-    return arr.copy_to_host()
+        return arr._arr.get()
+    return arr.get()
 
 
 # Array factories to test: (name, factory)
@@ -45,6 +51,7 @@ def get_host_data(arr):
 class TestDeviceArrayCapture(CUDATestCase):
     """Test capturing device arrays from global scope."""
 
+    @skip_if_cupy_unavailable
     def test_basic_capture(self):
         """Test basic global capture with different array types."""
         for name, make_array in ARRAY_FACTORIES:
@@ -65,12 +72,13 @@ def kernel(output):
                         output[i] = read_global(i)
 
                 n = len(host_data)
-                output = cuda.device_array(n, dtype=np.float32)
+                output = cp.zeros(n, dtype=np.float32)
                 kernel[1, n](output)
 
-                result = output.copy_to_host()
+                result = output.get()
                 np.testing.assert_array_equal(result, host_data)
 
+    @skip_if_cupy_unavailable
     def test_computation(self):
         """Test captured global arrays used in computations."""
         for name, make_array in ARRAY_FACTORIES:
@@ -91,13 +99,14 @@ def kernel(output):
                         output[i] = double_global_value(i)
 
                 n = len(host_data)
-                output = cuda.device_array(n, dtype=np.float32)
+                output = cp.zeros(n, dtype=np.float32)
                 kernel[1, n](output)
 
-                result = output.copy_to_host()
+                result = output.get()
                 expected = host_data * 2.0
                 np.testing.assert_array_equal(result, expected)
 
+    @skip_if_cupy_unavailable
     def test_mutability(self):
         """Test that captured arrays can be written to (mutability)."""
         for name, make_array in ARRAY_FACTORIES:
@@ -117,6 +126,7 @@ def write_kernel():
                 expected = np.array([1.0, 2.0, 3.0, 4.0, 5.0], dtype=np.float32)
                 np.testing.assert_array_equal(result, expected)
 
+    @skip_if_cupy_unavailable
     def test_multiple_arrays(self):
         """Test capturing multiple arrays from globals."""
         for name, make_array in ARRAY_FACTORIES:
@@ -136,13 +146,14 @@ def kernel(output):
                     if i < output.size:
                         output[i] = add_globals(i)
 
-                output = cuda.device_array(3, dtype=np.float32)
+                output = cp.zeros(3, dtype=np.float32)
                 kernel[1, 3](output)
 
-                result = output.copy_to_host()
+                result = output.get()
                 expected = np.array([11.0, 22.0, 33.0], dtype=np.float32)
                 np.testing.assert_array_equal(result, expected)
 
+    @skip_if_cupy_unavailable
     def test_multidimensional(self):
         """Test capturing multidimensional arrays."""
         for name, make_array in ARRAY_FACTORIES:
@@ -164,13 +175,14 @@ def kernel(output):
                         col = i % 2
                         output[i] = read_2d(row, col)
 
-                output = cuda.device_array(6, dtype=np.float32)
+                output = cp.zeros(6, dtype=np.float32)
                 kernel[1, 6](output)
 
-                result = output.copy_to_host()
+                result = output.get()
                 expected = host_2d.flatten()
                 np.testing.assert_array_equal(result, expected)
 
+    @skip_if_cupy_unavailable
     def test_dtypes(self):
         """Test capturing arrays with different dtypes."""
         dtypes = [
@@ -194,12 +206,11 @@ def kernel(output):
                         if i < output.size:
                             output[i] = read_arr(i)
 
-                    output = cuda.device_array(len(host_data), dtype=dtype)
+                    output = cp.zeros(len(host_data), dtype=dtype)
                     kernel[1, len(host_data)](output)
-                    np.testing.assert_array_equal(
-                        output.copy_to_host(), host_data
-                    )
+                    np.testing.assert_array_equal(output.get(), host_data)
 
+    @skip_if_cupy_unavailable
     def test_direct_kernel_access(self):
         """Test direct kernel access (not via device function)."""
         for name, make_array in ARRAY_FACTORIES:
@@ -213,13 +224,14 @@ def direct_access_kernel(output):
                     if i < output.size:
                         output[i] = global_direct[i] + 1.0
 
-                output = cuda.device_array(3, dtype=np.float32)
+                output = cp.zeros(3, dtype=np.float32)
                 direct_access_kernel[1, 3](output)
 
-                result = output.copy_to_host()
+                result = output.get()
                 expected = np.array([8.0, 9.0, 10.0], dtype=np.float32)
                 np.testing.assert_array_equal(result, expected)
 
+    @skip_if_cupy_unavailable
     def test_zero_dimensional(self):
         """Test capturing 0-D (scalar) device arrays."""
         for name, make_array in ARRAY_FACTORIES:
@@ -231,10 +243,10 @@ def test_zero_dimensional(self):
                 def kernel_0d(output):
                     output[()] = global_0d[()] * 2.0
 
-                output = cuda.device_array((), dtype=np.float32)
+                output = cp.zeros((), dtype=np.float32)
                 kernel_0d[1, 1](output)
 
-                result = output.copy_to_host()
+                result = output.get()
                 expected = 84.0
                 self.assertEqual(result, expected)
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
index 3fa62728b..d807d994e 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
@@ -12,6 +12,7 @@
     test_data_dir,
     unittest,
     CUDATestCase,
+    skip_if_cupy_unavailable,
 )
 from numba import cuda
 from numba.cuda import float32, int32, types
@@ -19,11 +20,19 @@
 from numba.cuda.tests.support import skip_unless_cffi
 from numba.cuda.testing import skip_on_standalone_numba_cuda
 from types import ModuleType
-from numba.cuda import HAS_NUMBA
+from numba.cuda import HAS_NUMBA, config
 
 if HAS_NUMBA:
     from numba import jit
 
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
+
 
 class TestDeviceFunc(CUDATestCase):
     def test_use_add2f(self):
@@ -185,6 +194,7 @@ def f():
         )
 
     @skip_on_cudasim("cudasim ignores casting by jit decorator signature")
+    @skip_if_cupy_unavailable
     def test_device_casting(self):
         # Ensure that casts to the correct type are forced when calling a
         # device function with a signature. This test ensures that:
@@ -206,10 +216,8 @@ def rgba(r, g, b, a):
         def rgba_caller(x, channels):
             x[0] = rgba(channels[0], channels[1], channels[2], channels[3])
 
-        x = cuda.device_array(1, dtype=np.int32)
-        channels = cuda.to_device(
-            np.asarray([1.0, 2.0, 3.0, 4.0], dtype=np.float32)
-        )
+        x = cp.asarray([1], dtype=np.int32)
+        channels = cp.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32)
 
         rgba_caller[1, 1](x, channels)
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py b/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
index aa837dc1b..4e48c4869 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
@@ -22,9 +22,18 @@
     skip_on_cudasim,
     unittest,
     CUDATestCase,
+    skip_if_cupy_unavailable,
 )
 import math
 
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
+
 
 def add(x, y):
     return x + y
@@ -486,6 +495,7 @@ def add_device(a, b):
         self.assertEqual("Add two integers, device version", add_device.__doc__)
 
     @skip_on_cudasim("Cudasim does not have device pointers")
+    @skip_if_cupy_unavailable
     def test_dispatcher_cpointer_arguments(self):
         ptr = types.CPointer(types.int32)
         sig = void(ptr, int32, ptr, ptr, uint32)
@@ -500,9 +510,9 @@ def axpy(r, a, x, y, n):
         a = 5
         hx = np.arange(10, dtype=np.int32)
         hy = np.arange(10, dtype=np.int32) * 2
-        dx = cuda.to_device(hx)
-        dy = cuda.to_device(hy)
-        dr = cuda.device_array_like(dx)
+        dx = cp.array(hx)
+        dy = cp.array(hy)
+        dr = cp.asarray(dx)
 
         r_ptr = dr.__cuda_array_interface__["data"][0]
         x_ptr = dx.__cuda_array_interface__["data"][0]
@@ -511,7 +521,7 @@ def axpy(r, a, x, y, n):
         axpy[1, 32](r_ptr, a, x_ptr, y_ptr, N)
 
         expected = a * hx + hy
-        actual = dr.copy_to_host()
+        actual = dr.get()
         np.testing.assert_equal(expected, actual)
 
 
@@ -847,6 +857,7 @@ def add_one_sig(x):
                         if i < len(x):
                             x[i] = i + 1
 
+    @skip_if_cupy_unavailable
     def test_shared_memory_carveout_valid_values(self):
         carveout_values = ["MaxL1", "MaxShared", "default", 0, 50, 100, -1]
 
@@ -862,9 +873,9 @@ def add_one(x):
                     if i < x.size:
                         x[i] = i + 1
 
-                d_x = cuda.to_device(x)
+                d_x = cp.asarray(x)
                 add_one[1, 10](d_x)
-                np.testing.assert_array_equal(d_x.copy_to_host(), expected)
+                np.testing.assert_array_equal(d_x.get(), expected)
 
                 # with signature
                 @cuda.jit("void(int32[:])", shared_memory_carveout=carveout)
@@ -873,9 +884,9 @@ def add_one_sig(x):
                     if i < x.size:
                         x[i] = i + 1
 
-                d_x = cuda.to_device(x)
+                d_x = cp.asarray(x)
                 add_one_sig[1, 10](d_x)
-                np.testing.assert_array_equal(d_x.copy_to_host(), expected)
+                np.testing.assert_array_equal(d_x.get(), expected)
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
index da36e8635..23aee6869 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py
@@ -8,11 +8,21 @@
 from numba.cuda import void, int32, float32, float64
 from numba.cuda import guvectorize
 from numba import cuda
-from numba.cuda.testing import skip_on_cudasim, CUDATestCase
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    skip_if_cupy_unavailable,
+    CUDATestCase,
+    DeprecatedDeviceArrayApiWarning,
+)
 import unittest
 from numba.cuda.core.errors import NumbaPerformanceWarning, TypingError
 from numba.cuda.tests.support import override_config
 
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
+
 
 def _get_matmulcore_gufunc(dtype=float32):
     @guvectorize(
@@ -49,6 +59,7 @@ def test_gufunc_small(self):
         Gold = np.matmul(A, B)
         self.assertTrue(np.allclose(C, Gold))
 
+    @skip_if_cupy_unavailable
     def test_gufunc_auto_transfer(self):
         gufunc = _get_matmulcore_gufunc()
 
@@ -60,7 +71,7 @@ def test_gufunc_auto_transfer(self):
             matrix_ct, 4, 5
         )
 
-        dB = cuda.to_device(B)
+        dB = cp.asarray(B)
 
         C = gufunc(A, dB).copy_to_host()
         Gold = np.matmul(A, B)
@@ -119,12 +130,16 @@ def test_gufunc_stream(self):
         )
 
         stream = cuda.stream()
-        dA = cuda.to_device(A, stream)
-        dB = cuda.to_device(B, stream)
-
-        dC = cuda.device_array(shape=(1001, 2, 5), dtype=A.dtype, stream=stream)
-        dC = gufunc(dA, dB, out=dC, stream=stream)
-        C = dC.copy_to_host(stream=stream)
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
+            dA = cuda.to_device(A, stream)
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
+            dB = cuda.to_device(B, stream)
+
+            dC = cuda.device_array(
+                shape=(1001, 2, 5), dtype=A.dtype, stream=stream
+            )
+            dC = gufunc(dA, dB, out=dC, stream=stream)
+            C = dC.copy_to_host(stream=stream)
         stream.synchronize()
 
         Gold = np.matmul(A, B)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
index 64769f585..26d8f4de7 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
@@ -9,12 +9,24 @@
 
 import numpy as np
 from numba import cuda, guvectorize
-from numba.cuda.testing import skip_on_cudasim, CUDATestCase
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    CUDATestCase,
+    DeprecatedDeviceArrayApiWarning,
+    skip_if_cupy_unavailable,
+)
 import unittest
+import pytest
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
 
 
 @skip_on_cudasim("ufunc API unsupported in the simulator")
 class TestGUFuncScalar(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_gufunc_scalar_output(self):
         #    function type:
         #        - has no void return type
@@ -42,13 +54,13 @@ def sum_row(inp, out):
         out1 = np.empty(100, dtype=inp.dtype)
         out2 = np.empty(100, dtype=inp.dtype)
 
-        dev_inp = cuda.to_device(inp)  # alloc and copy input data
-        dev_out1 = cuda.to_device(out1, copy=False)  # alloc only
+        dev_inp = cp.asarray(inp)  # alloc and copy input data
+        dev_out1 = cp.empty(out1.shape, dtype=out1.dtype)  # alloc only
 
         sum_row(dev_inp, out=dev_out1)  # invoke the gufunc
         dev_out2 = sum_row(dev_inp)  # invoke the gufunc
 
-        dev_out1.copy_to_host(out1)  # retrieve the result
+        out1 = dev_out1.get()  # retrieve the result
         dev_out2.copy_to_host(out2)  # retrieve the result
 
         # verify result
@@ -119,7 +131,11 @@ def foo(a, b, out):
 
         # test error
         a = np.array(a)
-        da = cuda.to_device(a)
+
+        # As this test specifically tests the behavior of passing a DeviceNDArray,
+        # we'll catch the expected warning explicitly here.
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
+            da = cuda.to_device(a)
         self.assertEqual(da.dtype, np.int64)
         with self.assertRaises(TypeError) as raises:
             foo(da, b)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py b/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
index 9550e3e3b..a0fe365a8 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py
@@ -3,11 +3,20 @@
 
 import numpy as np
 from numba import cuda
-from numba.cuda import float32, float64, int32, void
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda import config, float32, float64, int32, void
+from numba.cuda.testing import unittest, CUDATestCase, skip_if_cupy_unavailable
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 
 class TestCudaIDiv(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_inplace_div(self):
         @cuda.jit(void(float32[:, :], int32, int32))
         def div(grid, l_x, l_y):
@@ -15,12 +24,12 @@ def div(grid, l_x, l_y):
                 for y in range(l_y):
                     grid[x, y] /= 2.0
 
-        x = np.ones((2, 2), dtype=np.float32)
-        grid = cuda.to_device(x)
+        grid = cp.ones((2, 2), dtype=np.float32)
         div[1, 1](grid, 2, 2)
-        y = grid.copy_to_host()
+        y = grid.get() if not config.ENABLE_CUDASIM else grid
         self.assertTrue(np.all(y == 0.5))
 
+    @skip_if_cupy_unavailable
     def test_inplace_div_double(self):
         @cuda.jit(void(float64[:, :], int32, int32))
         def div_double(grid, l_x, l_y):
@@ -28,10 +37,9 @@ def div_double(grid, l_x, l_y):
                 for y in range(l_y):
                     grid[x, y] /= 2.0
 
-        x = np.ones((2, 2), dtype=np.float64)
-        grid = cuda.to_device(x)
+        grid = cp.ones((2, 2), dtype=np.float64)
         div_double[1, 1](grid, 2, 2)
-        y = grid.copy_to_host()
+        y = grid.get() if not config.ENABLE_CUDASIM else grid
         self.assertTrue(np.all(y == 0.5))
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py b/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py
index f22eec2e0..126a1d93a 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py
@@ -16,7 +16,7 @@
     skip_on_cudasim,
     skip_under_cuda_memcheck,
     skip_on_wsl2,
-    CUDATestCase,
+    DeprecatedDeviceArrayApiTest,
     ForeignArray,
 )
 from numba.cuda.tests.support import linux_only, windows_only
@@ -64,11 +64,21 @@ def ipc_array_test(ipcarr, parent_pid):
     return arr
 
 
-class CUDAIpcTestCase(CUDATestCase):
+def _suppress_deprecated_warnings():
+    import warnings
+
+    # adjust the import path to the actual warning class
+    from numba.cuda import DeprecatedDeviceArrayApiWarning
+
+    warnings.filterwarnings("ignore", category=DeprecatedDeviceArrayApiWarning)
+
+
+class CUDAIpcTestCase(DeprecatedDeviceArrayApiTest):
     @classmethod
     def setUpClass(cls) -> None:
         cls.exe = concurrent.futures.ProcessPoolExecutor(
-            mp_context=mp.get_context("spawn")
+            mp_context=mp.get_context("spawn"),
+            initializer=_suppress_deprecated_warnings,
         )
 
     @classmethod
@@ -249,7 +259,7 @@ def test_ipc_array(self):
 
 @windows_only
 @skip_on_cudasim("Ipc not available in CUDASIM")
-class TestIpcNotSupported(CUDATestCase):
+class TestIpcNotSupported(DeprecatedDeviceArrayApiTest):
     def test_unsupported(self):
         arr = np.arange(10, dtype=np.intp)
         devarr = cuda.to_device(arr)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py b/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
index 8874c449d..46b9493b2 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py
@@ -4,8 +4,17 @@
 import numpy as np
 from numba import cuda
 from numba.cuda import float64, void
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, CUDATestCase, skip_if_cupy_unavailable
 from numba.cuda.core import config
+from contextlib import nullcontext
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 # NOTE: CUDA kernel does not return any value
 
@@ -17,6 +26,7 @@
 
 
 class TestCudaLaplace(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_laplace_small(self):
         @cuda.jit(float64(float64, float64), device=True, inline="always")
         def get_max(a, b):
@@ -93,21 +103,31 @@ def jocabi_relax_core(A, Anew, error):
 
         error_grid = np.zeros(griddim)
 
-        stream = cuda.stream()
+        cp_stream = (
+            cp.cuda.Stream() if not config.ENABLE_CUDASIM else nullcontext()
+        )
+        stream = (
+            cuda.api.external_stream(cp_stream.ptr)
+            if not config.ENABLE_CUDASIM
+            else cuda.stream()
+        )
 
-        dA = cuda.to_device(A, stream)  # to device and don't come back
-        dAnew = cuda.to_device(Anew, stream)  # to device and don't come back
-        derror_grid = cuda.to_device(error_grid, stream)
+        with cp_stream:
+            dA = cp.asarray(A)  # to device and don't come back
+            dAnew = cp.asarray(Anew)  # to device and don't come back
+            derror_grid = cp.asarray(error_grid)
 
         while error > tol and iter < iter_max:
             self.assertTrue(error_grid.dtype == np.float64)
 
             jocabi_relax_core[griddim, blockdim, stream](dA, dAnew, derror_grid)
 
-            derror_grid.copy_to_host(error_grid, stream=stream)
-
-            # error_grid is available on host
-            stream.synchronize()
+            with cp_stream:
+                error_grid = (
+                    derror_grid.get()
+                    if not config.ENABLE_CUDASIM
+                    else derror_grid
+                )
 
             error = np.abs(error_grid).max()
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
index 4c47207e6..36ffda2e7 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py
@@ -6,6 +6,13 @@
 from numba import cuda
 from numba.cuda import float32, void
 from numba.cuda.core import config
+import pytest
+from contextlib import nullcontext
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    cp = pytest.importorskip("cupy")
 
 # Ensure the test takes a reasonable amount of time in the simulator
 if config.ENABLE_CUDASIM:
@@ -47,21 +54,28 @@ def cu_square_matrix_mul(A, B, C):
 
             cuda.syncthreads()
 
-        if x < n and y < n:
-            C[y, x] = acc
+            if x < n and y < n:
+                C[y, x] = acc
 
     np.random.seed(42)
     A = np.array(np.random.random((n, n)), dtype=np.float32)
     B = np.array(np.random.random((n, n)), dtype=np.float32)
     C = np.empty_like(A)
 
-    stream = cuda.stream()
-    with stream.auto_synchronize():
-        dA = cuda.to_device(A, stream)
-        dB = cuda.to_device(B, stream)
-        dC = cuda.to_device(C, stream)
-        cu_square_matrix_mul[(bpg, bpg), (tpb, tpb), stream](dA, dB, dC)
-        dC.copy_to_host(C, stream)
+    stream = cp.cuda.Stream() if not config.ENABLE_CUDASIM else nullcontext()
+    nb_stream = (
+        cuda.api.external_stream(stream.ptr)
+        if not config.ENABLE_CUDASIM
+        else cuda.stream()
+    )
+    with stream:
+        dA = cp.asarray(A)
+        dB = cp.asarray(B)
+        dC = cp.asarray(C)
+
+    cu_square_matrix_mul[(bpg, bpg), (tpb, tpb), nb_stream](dA, dB, dC)
+    with stream:
+        C = dC.get() if not config.ENABLE_CUDASIM else dC
 
     # Host compute
     Cans = np.dot(A, B)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py b/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py
index 10df01fc0..35976ff6a 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py
@@ -2,11 +2,24 @@
 # SPDX-License-Identifier: BSD-2-Clause
 
 from numba import cuda
+from numba.cuda import config
 import numpy as np
-from numba.cuda.testing import skip_on_cudasim, CUDATestCase
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    CUDATestCase,
+    skip_if_cupy_unavailable,
+)
 import threading
 import unittest
 
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
+
 
 class TestMultiGPUContext(CUDATestCase):
     @unittest.skipIf(len(cuda.gpus) < 2, "need more than 1 gpus")
@@ -51,11 +64,12 @@ def check(inp, out):
         check(A, B)
 
     @skip_on_cudasim("Simulator does not support multiple threads")
+    @skip_if_cupy_unavailable
     def test_multithreaded(self):
         def work(gpu, dA, results, ridx):
             try:
                 with gpu:
-                    arr = dA.copy_to_host()
+                    arr = dA.get()
 
             except Exception as e:
                 results[ridx] = e
@@ -63,7 +77,7 @@ def work(gpu, dA, results, ridx):
             else:
                 results[ridx] = np.all(arr == np.arange(10))
 
-        dA = cuda.to_device(np.arange(10))
+        dA = cp.asarray(np.arange(10))
 
         nthreads = 10
         results = [None] * nthreads
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
index d432d2939..7c83bf2bd 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py
@@ -8,16 +8,23 @@
 from numba.cuda.testing import (
     skip_on_cudasim,
     skip_under_cuda_memcheck,
-    CUDATestCase,
+    skip_if_cupy_unavailable,
+    DeprecatedDeviceArrayApiTest,
 )
 import unittest
 
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
+
 from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
 
 
 has_mp_get_context = hasattr(multiprocessing, "get_context")
 
 
+@skip_if_cupy_unavailable
 def check_concurrent_compiling():
     @cuda.jit
     def foo(x):
@@ -27,17 +34,17 @@ def use_foo(x):
         foo[1, 1](x)
         return x
 
-    arrays = [cuda.to_device(np.arange(10)) for i in range(10)]
+    arrays = [cp.asarray(np.arange(10)) for i in range(10)]
     expected = np.arange(10)
     expected[0] += 1
     with ThreadPoolExecutor(max_workers=4) as e:
         for ary in e.map(use_foo, arrays):
-            np.testing.assert_equal(ary, expected)
+            np.testing.assert_equal(ary.get(), expected)
 
 
 @skip_under_cuda_memcheck("Hangs cuda-memcheck")
 @skip_on_cudasim("disabled for cudasim")
-class TestMultiThreadCompiling(CUDATestCase):
+class TestMultiThreadCompiling(DeprecatedDeviceArrayApiTest):
     def test_concurrent_compiling(self):
         check_concurrent_compiling()
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py b/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
index 2becebd0a..df291a215 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py
@@ -3,17 +3,26 @@
 
 import numpy as np
 from numba import cuda
-from numba.cuda import float32, void
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda import config, float32, void
+from numba.cuda.testing import unittest, CUDATestCase, skip_if_cupy_unavailable
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 
 def generate_input(n):
-    A = np.array(np.arange(n * n).reshape(n, n), dtype=np.float32)
-    B = np.array(np.arange(n) + 0, dtype=A.dtype)
+    A = cp.array(np.arange(n * n).reshape(n, n), dtype=np.float32)
+    B = cp.array(np.arange(n) + 0, dtype=A.dtype)
     return A, B
 
 
 class TestCudaNonDet(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_for_pre(self):
         """Test issue with loop not running due to bad sign-extension at the for
         loop precondition.
@@ -33,20 +42,21 @@ def diagproduct(c, a, b):
 
         N = 8
 
-        A, B = generate_input(N)
-
-        F = np.empty(A.shape, dtype=A.dtype)
+        dA, dB = generate_input(N)
+        dF = cp.empty(dA.shape, dtype=dA.dtype)
 
         blockdim = (32, 8)
         griddim = (1, 1)
 
-        dA = cuda.to_device(A)
-        dB = cuda.to_device(B)
-        dF = cuda.to_device(F, copy=False)
         diagproduct[griddim, blockdim](dF, dA, dB)
 
-        E = np.dot(A, np.diag(B))
-        np.testing.assert_array_almost_equal(dF.copy_to_host(), E)
+        E = np.dot(
+            dA.get() if not config.ENABLE_CUDASIM else dA,
+            np.diag(dB.get() if not config.ENABLE_CUDASIM else dB),
+        )
+        np.testing.assert_array_almost_equal(
+            dF.get() if not config.ENABLE_CUDASIM else dF, E
+        )
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_random.py b/numba_cuda/numba/cuda/tests/cudapy/test_random.py
index c99e29aa5..4c3faeb99 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_random.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_random.py
@@ -20,7 +20,6 @@
     xoroshiro128p_normal_float64,
 )
 
-
 # Distributions
 UNIFORM = 1
 NORMAL = 2
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py b/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py
index 9a741b938..2add394e5 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py
@@ -3,9 +3,18 @@
 
 import numpy as np
 from numba import cuda
-from numba.cuda.core.config import ENABLE_CUDASIM
-from numba.cuda.testing import CUDATestCase
+from numba.cuda import config
+from numba.cuda.testing import CUDATestCase, skip_if_cupy_unavailable
 import unittest
+from numba.cuda import config
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 # Avoid recompilation of the sum_reduce function by keeping it at global scope
 sum_reduce = cuda.Reduce(lambda a, b: a + b)
@@ -19,7 +28,7 @@ def _sum_reduce(self, n):
         self.assertEqual(expect, got)
 
     def test_sum_reduce(self):
-        if ENABLE_CUDASIM:
+        if config.ENABLE_CUDASIM:
             # Minimal test set for the simulator (which only wraps
             # functools.reduce)
             test_sizes = [1, 16]
@@ -53,9 +62,10 @@ def test_empty_array_host(self):
         got = sum_reduce(A)
         self.assertEqual(expect, got)
 
+    @skip_if_cupy_unavailable
     def test_empty_array_device(self):
         A = np.arange(0, dtype=np.float64) + 1
-        dA = cuda.to_device(A)
+        dA = cp.asarray(A)
         expect = A.sum()
         got = sum_reduce(dA)
         self.assertEqual(expect, got)
@@ -81,9 +91,10 @@ def test_non_identity_init(self):
         got = sum_reduce(A, init=init)
         self.assertEqual(expect, got)
 
+    @skip_if_cupy_unavailable
     def test_result_on_device(self):
         A = np.arange(10, dtype=np.float64) + 1
-        got = cuda.to_device(np.zeros(1, dtype=np.float64))
+        got = cp.zeros(1, dtype=np.float64)
         expect = A.sum()
         res = sum_reduce(A, res=got)
         self.assertIsNone(res)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py b/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py
index 9dedfcfd2..22d47d370 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import unittest, DeprecatedDeviceArrayApiTest
 
 
 def foo(inp, out):
@@ -16,7 +16,7 @@ def copy(inp, out):
     cufoo(inp[i, :], out[i, :])
 
 
-class TestCudaSlicing(CUDATestCase):
+class TestCudaSlicing(DeprecatedDeviceArrayApiTest):
     def test_slice_as_arg(self):
         global cufoo
         cufoo = cuda.jit("void(int32[:], int32[:])", device=True)(foo)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
index 663cf88fd..a6f177db7 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_sm.py
@@ -9,12 +9,27 @@
     from numba.core.errors import TypingError as NumbaTypingError
 from numba.cuda.core.errors import TypingError
 from numba.cuda import types
-from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    unittest,
+    CUDATestCase,
+    skip_on_cudasim,
+    skip_if_cupy_unavailable,
+    DeprecatedDeviceArrayApiWarning,
+)
 
 import numpy as np
+from numba.cuda import config
 from numba.cuda.np import numpy_support as nps
 
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 from .extensions_usecases import struct_model_type, MyStruct
+import pytest
 
 recordwith2darray = np.dtype([("i", np.int32), ("j", np.float32, (3, 2))])
 
@@ -65,6 +80,7 @@ def test_issue_fp16_support(self):
         self._check_shared_array_size_fp16(2, 2, types.float16)
         self._check_shared_array_size_fp16(2, 2, np.float16)
 
+    @skip_if_cupy_unavailable
     def test_issue_2393(self):
         """
         Test issue of warp misalign address due to nvvm not knowing the
@@ -91,7 +107,7 @@ def costs_func(d_block_costs):
             d_block_costs[0] = s_initialcost[0] + prediction
 
         block_costs = np.zeros(num_blocks, dtype=np.float64)
-        d_block_costs = cuda.to_device(block_costs)
+        d_block_costs = cp.asarray(block_costs)
 
         costs_func[num_blocks, threads_per_block](d_block_costs)
 
@@ -130,7 +146,13 @@ def use_sm_chunk_copy(x, y):
                 for j in range(nthreads):
                     y[bd * bx + j] = sm[j]
 
-        d_result = cuda.device_array_like(arr)
+        if not config.ENABLE_CUDASIM:
+            with pytest.warns(DeprecatedDeviceArrayApiWarning):
+                # waiting on cupy support for record dtypes
+                d_result = cuda.to_device(arr)
+        else:
+            d_result = cuda.to_device(arr)
+
         use_sm_chunk_copy[nblocks, nthreads](arr, d_result)
         host_result = d_result.copy_to_host()
         np.testing.assert_array_equal(arr, host_result)
@@ -352,6 +374,7 @@ def slice_nonunit_reverse_stride(x):
         expected = np.array([99, 3, 99, 2, 99, 1], dtype=np.int32)
         self._test_dynshared_slice(slice_nonunit_reverse_stride, arr, expected)
 
+    @skip_if_cupy_unavailable
     def test_issue_5073(self):
         # An example with which Bug #5073 (slices of dynamic shared memory all
         # alias) was discovered. The kernel uses all threads in the block to
@@ -393,9 +416,9 @@ def sm_slice_copy(x, y, chunksize):
                     y[bd * bx + j] = sm1[j]
                     y[bd * bx + j + chunksize] = sm2[j]
 
-        d_result = cuda.device_array_like(arr)
+        d_result = cp.asarray(arr)
         sm_slice_copy[nblocks, nthreads, 0, nshared](arr, d_result, chunksize)
-        host_result = d_result.copy_to_host()
+        host_result = d_result.get() if not config.ENABLE_CUDASIM else d_result
         np.testing.assert_array_equal(arr, host_result)
 
     @skip_on_cudasim("Can't check typing in simulator")
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py b/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
index 2f242451f..5fa03deda 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_ssa.py
@@ -12,13 +12,24 @@
 
 from numba.cuda import types
 from numba import cuda
-from numba.cuda import jit
+from numba.cuda import config, jit
 from numba.cuda.core import errors
 
 from numba.cuda.extending import overload
 from numba.cuda.tests.support import override_config
-from numba.cuda.testing import CUDATestCase, skip_on_cudasim
-
+from numba.cuda.testing import (
+    CUDATestCase,
+    skip_on_cudasim,
+    skip_if_cupy_unavailable,
+)
+
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 
 _DEBUG = False
 
@@ -38,11 +49,15 @@ class SSABaseTest(CUDATestCase):
     def check_func(self, func, result_array, *args):
         # For CUDA kernels, we need to create output arrays and call with [1,1] launch config
         # Create GPU array with same shape as expected result array
-        gpu_result_array = cuda.to_device(np.zeros_like(result_array))
+        gpu_result_array = cp.asarray(np.zeros_like(result_array))
 
         # Call the CUDA kernel
         func[1, 1](gpu_result_array, *copy.deepcopy(args))
-        gpu_result = gpu_result_array.copy_to_host()
+        gpu_result = (
+            gpu_result_array.get()
+            if not config.ENABLE_CUDASIM
+            else gpu_result_array
+        )
 
         # Call the original Python function for expected result
         cpu_result = np.zeros_like(result_array)
@@ -57,6 +72,7 @@ class TestSSA(SSABaseTest):
     Contains tests to help isolate problems in SSA
     """
 
+    @skip_if_cupy_unavailable
     def test_argument_name_reused(self):
         @jit
         def foo(result, x):
@@ -65,6 +81,7 @@ def foo(result, x):
 
         self.check_func(foo, np.array([124.0]), 123)
 
+    @skip_if_cupy_unavailable
     def test_if_else_redefine(self):
         @jit
         def foo(result, x, y):
@@ -78,6 +95,7 @@ def foo(result, x, y):
         self.check_func(foo, np.array([2.0]), 3, 2)
         self.check_func(foo, np.array([2.0]), 2, 3)
 
+    @skip_if_cupy_unavailable
     def test_sum_loop(self):
         @jit
         def foo(result, n):
@@ -89,6 +107,7 @@ def foo(result, n):
         self.check_func(foo, np.array([0.0]), 0)
         self.check_func(foo, np.array([45.0]), 10)
 
+    @skip_if_cupy_unavailable
     def test_sum_loop_2vars(self):
         @jit
         def foo(result, n):
@@ -103,6 +122,7 @@ def foo(result, n):
         self.check_func(foo, np.array([0.0, 0.0]), 0)
         self.check_func(foo, np.array([45.0, 110.0]), 10)
 
+    @skip_if_cupy_unavailable
     def test_sum_2d_loop(self):
         @jit
         def foo(result, n):
@@ -145,12 +165,14 @@ def foo(result, n):
     @skip_on_cudasim(
         "Numba variable warnings are not supported in the simulator"
     )
+    @skip_if_cupy_unavailable
     def test_undefined_var(self):
         with override_config("ALWAYS_WARN_UNINIT_VAR", 0):
             self.check_undefined_var(should_warn=False)
         with override_config("ALWAYS_WARN_UNINIT_VAR", 1):
             self.check_undefined_var(should_warn=True)
 
+    @skip_if_cupy_unavailable
     def test_phi_propagation(self):
         @jit
         def foo(result, actions):
@@ -250,6 +272,7 @@ class TestReportedSSAIssues(SSABaseTest):
     # Tests from issues
     # https://github.com/numba/numba/issues?q=is%3Aopen+is%3Aissue+label%3ASSA
 
+    @skip_if_cupy_unavailable
     def test_issue2194(self):
         @jit
         def foo(result, V):
@@ -264,6 +287,7 @@ def foo(result, V):
         V = np.empty(1)
         self.check_func(foo, np.array([1.0]), V)
 
+    @skip_if_cupy_unavailable
     def test_issue3094(self):
         @jit
         def foo(result, pred):
@@ -275,6 +299,7 @@ def foo(result, pred):
 
         self.check_func(foo, np.array([0]), False)
 
+    @skip_if_cupy_unavailable
     def test_issue3931(self):
         @jit
         def foo(result, arr):
@@ -289,6 +314,7 @@ def foo(result, arr):
         result_gpu = np.zeros((3, 2))
         self.check_func(foo, result_gpu, np.zeros((3, 2)))
 
+    @skip_if_cupy_unavailable
     def test_issue3976(self):
         def overload_this(a):
             return 42
@@ -309,6 +335,7 @@ def ol(a):
 
         self.check_func(foo, np.array([42]), True)
 
+    @skip_if_cupy_unavailable
     def test_issue3979(self):
         @jit
         def foo(result, A, B):
@@ -325,6 +352,7 @@ def foo(result, A, B):
             foo, np.array([2, 4]), np.array([1, 2]), np.array([3, 4])
         )
 
+    @skip_if_cupy_unavailable
     def test_issue5219(self):
         def overload_this(a, b=None):
             if isinstance(b, tuple):
@@ -348,6 +376,7 @@ def test_tuple(result, a, b):
 
         self.check_func(test_tuple, np.array([2]), 1, (2,))
 
+    @skip_if_cupy_unavailable
     def test_issue5223(self):
         @jit
         def bar(result, x):
@@ -364,6 +393,7 @@ def bar(result, x):
         expected = np.ones(5)  # Since len(a) == 5, it should return unchanged
         self.check_func(bar, expected, a)
 
+    @skip_if_cupy_unavailable
     def test_issue5243(self):
         @jit
         def foo(result, q, lin):
@@ -374,6 +404,7 @@ def foo(result, q, lin):
         lin = np.array([0.1, 0.6, 0.3])
         self.check_func(foo, np.array([0.1]), np.zeros((2, 2)), lin)
 
+    @skip_if_cupy_unavailable
     def test_issue5482_missing_variable_init(self):
         # Test error that lowering fails because variable is missing
         # a definition before use.
@@ -394,6 +425,7 @@ def foo(result, x, v, n):
 
         self.check_func(foo, np.array([10]), 1, 5, 3)
 
+    @skip_if_cupy_unavailable
     def test_issue5493_unneeded_phi(self):
         # Test error that unneeded phi is inserted because variable does not
         # have a dominance definition.
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py b/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py
index 776936849..63130fe83 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py
@@ -5,14 +5,14 @@
 from numba import cuda
 from numba.cuda.kernels.transpose import transpose
 from numba.cuda.testing import unittest
-from numba.cuda.testing import skip_on_cudasim, CUDATestCase
+from numba.cuda.testing import skip_on_cudasim, DeprecatedDeviceArrayApiTest
 
 
 recordwith2darray = np.dtype([("i", np.int32), ("j", np.float32, (3, 2))])
 
 
 @skip_on_cudasim("Device Array API unsupported in the simulator")
-class TestTranspose(CUDATestCase):
+class TestTranspose(DeprecatedDeviceArrayApiTest):
     def test_transpose(self):
         variants = (
             (5, 6, np.float64),
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
index 222dee7d0..569cc1db4 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py
@@ -12,7 +12,15 @@
 from numba.cuda.cudadrv.driver import CudaAPIError, driver
 from numba.cuda.testing import skip_on_cudasim
 from numba.cuda.testing import CUDATestCase
+from numba.cuda.testing import skip_if_cupy_unavailable
 import unittest
+import pytest
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
+from numba.cuda.testing import DeprecatedDeviceArrayApiWarning
 
 
 # Signatures to test with - these are all homogeneous in dtype, so the output
@@ -78,6 +86,7 @@ def vector_add(a, b):
                 np.testing.assert_allclose(expected, actual)
                 self.assertEqual(actual.dtype, ty)
 
+    @skip_if_cupy_unavailable
     def test_1d_async(self):
         for vectorize in vectorize_funcs:
 
@@ -85,13 +94,15 @@ def test_1d_async(self):
             def vector_add(a, b):
                 return a + b
 
-            stream = cuda.stream()
+            nb_stream = cuda.stream()
+            stream = cp.cuda.Stream()
 
             for ty in dtypes:
                 data = np.array(np.random.random(self.N), dtype=ty)
-                device_data = cuda.to_device(data, stream)
+                with stream:
+                    device_data = cp.asarray(data)
 
-                dresult = vector_add(device_data, device_data, stream=stream)
+                dresult = vector_add(device_data, device_data, stream=nb_stream)
                 actual = dresult.copy_to_host()
 
                 expected = np.add(data, data)
@@ -153,6 +164,7 @@ def vector_add(a, b):
                 # to be using addition). Instead, compare against the input dtype.
                 self.assertEqual(dtype, actual.dtype)
 
+    @skip_if_cupy_unavailable
     def test_reduce_async(self):
         for vectorize in vectorize_funcs:
 
@@ -160,18 +172,21 @@ def test_reduce_async(self):
             def vector_add(a, b):
                 return a + b
 
-            stream = cuda.stream()
+            nb_stream = cuda.stream()
+            stream = cp.cuda.Stream()
             dtype = np.int32
 
             for n in input_sizes:
                 x = np.arange(n, dtype=dtype)
                 expected = np.add.reduce(x)
-                dx = cuda.to_device(x, stream)
-                actual = vector_add.reduce(dx, stream=stream)
+                with stream:
+                    dx = cp.asarray(x)
+                actual = vector_add.reduce(dx, stream=nb_stream)
                 np.testing.assert_allclose(expected, actual)
                 # Compare against the input dtype as in test_reduce().
                 self.assertEqual(dtype, actual.dtype)
 
+    @skip_if_cupy_unavailable
     def test_manual_transfer(self):
         for vectorize in vectorize_funcs:
 
@@ -181,12 +196,13 @@ def vector_add(a, b):
 
             n = 10
             x = np.arange(n, dtype=np.int32)
-            dx = cuda.to_device(x)
+            dx = cp.asarray(x)
             expected = x + x
             actual = vector_add(x, dx).copy_to_host()
             np.testing.assert_equal(expected, actual)
             self.assertEqual(expected.dtype, actual.dtype)
 
+    @skip_if_cupy_unavailable
     def test_ufunc_output_2d(self):
         for vectorize in vectorize_funcs:
 
@@ -196,11 +212,11 @@ def vector_add(a, b):
 
             n = 10
             x = np.arange(n, dtype=np.int32).reshape(2, 5)
-            dx = cuda.to_device(x)
+            dx = cp.asarray(x)
             vector_add(dx, dx, out=dx)
 
             expected = x + x
-            actual = dx.copy_to_host()
+            actual = dx.get()
             np.testing.assert_equal(expected, actual)
             self.assertEqual(expected.dtype, actual.dtype)
 
@@ -256,56 +272,76 @@ def bar(x):
 
             self.assertEqual(bar.__name__, "bar")
 
-    def test_no_transfer_for_device_data(self):
-        for vectorize in vectorize_funcs:
-            # Initialize test data on the device prior to banning host <-> device
-            # transfer
-
-            noise = np.random.randn(1, 3, 64, 64).astype(np.float32)
-            noise = cuda.to_device(noise)
-
-            # A mock of a CUDA function that always raises a CudaAPIError
-
-            def raising_transfer(*args, **kwargs):
-                raise CudaAPIError(999, "Transfer not allowed")
-
-            # Use the mock for transfers between the host and device
 
-            old_HtoD = getattr(driver, "cuMemcpyHtoD", None)
-            old_DtoH = getattr(driver, "cuMemcpyDtoH", None)
-
-            driver.cuMemcpyHtoD = raising_transfer
-            driver.cuMemcpyDtoH = raising_transfer
-
-            # Ensure that the mock functions are working as expected
+@skip_on_cudasim("ufunc API unsupported in the simulator")
+class TestCUDAVectorizeNoTransfer(CUDATestCase):
+    """Test that vectorize operations on device data don't induce transfers."""
+
+    def setUp(self):
+        """Set up mocks to block host <-> device transfers."""
+        super().setUp()
+
+        # Initialize test data on the device prior to banning host <-> device
+        # transfer
+        self.noise = np.random.randn(1, 3, 64, 64).astype(np.float32)
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
+            self.device_noise = cuda.to_device(self.noise)
+
+        # A mock of a CUDA function that always raises a CudaAPIError
+        def raising_transfer(*args, **kwargs):
+            raise CudaAPIError(999, "Transfer not allowed")
+
+        # Save the original implementations
+        self.old_HtoD = getattr(driver, "cuMemcpyHtoD", None)
+        self.old_DtoH = getattr(driver, "cuMemcpyDtoH", None)
+
+        # Replace with mocks that prevent transfers
+        driver.cuMemcpyHtoD = raising_transfer
+        driver.cuMemcpyDtoH = raising_transfer
+
+    def tearDown(self):
+        """Restore original transfer functions."""
+        # Replace our mocks with the original implementations. If there was
+        # no original implementation, simply remove ours.
+        if self.old_HtoD is not None:
+            driver.cuMemcpyHtoD = self.old_HtoD
+        else:
+            if hasattr(driver, "cuMemcpyHtoD"):
+                del driver.cuMemcpyHtoD
+
+        if self.old_DtoH is not None:
+            driver.cuMemcpyDtoH = self.old_DtoH
+        else:
+            if hasattr(driver, "cuMemcpyDtoH"):
+                del driver.cuMemcpyDtoH
+
+        super().tearDown()
+
+    def test_mock_blocks_device_to_host_transfer(self):
+        """Verify that the mock successfully blocks device-to-host transfers."""
+        with self.assertRaisesRegex(CudaAPIError, "Transfer not allowed"):
+            self.device_noise.copy_to_host()
+
+    def test_mock_blocks_host_to_device_transfer(self):
+        """Verify that the mock successfully blocks host-to-device transfers."""
+        with self.assertRaisesRegex(CudaAPIError, "Transfer not allowed"):
+            with pytest.warns(DeprecatedDeviceArrayApiWarning):
+                cuda.to_device([1])
 
-            with self.assertRaisesRegex(CudaAPIError, "Transfer not allowed"):
-                noise.copy_to_host()
+    def test_vectorize_with_device_data_no_transfer(self):
+        """Test that vectorize operations on device data don't induce transfers."""
+        for vectorize in vectorize_funcs:
+            # Define and call a ufunc with data on the device
+            # This should not induce any transfers (which would raise CudaAPIError)
+            @vectorize(["float32(float32)"])
+            def func(noise):
+                return noise + 1.0
 
-            with self.assertRaisesRegex(CudaAPIError, "Transfer not allowed"):
-                cuda.to_device([1])
+            # This should succeed without raising CudaAPIError
+            result = func(self.device_noise)
 
-            try:
-                # Check that defining and calling a ufunc with data on the device
-                # induces no transfers
-
-                @vectorize(["float32(float32)"])
-                def func(noise):
-                    return noise + 1.0
-
-                func(noise)
-            finally:
-                # Replace our mocks with the original implementations. If there was
-                # no original implementation, simply remove ours.
-
-                if old_HtoD is not None:
-                    driver.cuMemcpyHtoD = old_HtoD
-                else:
-                    del driver.cuMemcpyHtoD
-                if old_DtoH is not None:
-                    driver.cuMemcpyDtoH = old_DtoH
-                else:
-                    del driver.cuMemcpyDtoH
+            # Verify the result is still on the device and has the right shape
+            self.assertEqual(result.shape, self.device_noise.shape)
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
index b2b8b2015..356a82515 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py
@@ -4,11 +4,22 @@
 import numpy as np
 import math
 
-from numba import cuda
 from numba.cuda import vectorize, int32, uint32, float32, float64
-from numba.cuda.testing import skip_on_cudasim, CUDATestCase
+from numba.cuda import config
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    CUDATestCase,
+    skip_if_cupy_unavailable,
+)
 from numba.cuda.tests.support import CheckWarningsMixin
 
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
 import unittest
 
 
@@ -146,6 +157,7 @@ def fngpu(a, b):
         got = fngpu(a, b)
         np.testing.assert_almost_equal(expect, got)
 
+    @skip_if_cupy_unavailable
     def test_device_broadcast(self):
         """
         Same test as .test_broadcast() but with device array as inputs
@@ -162,7 +174,7 @@ def fngpu(a, b):
             return a - b
 
         expect = fn(a, b)
-        got = fngpu(cuda.to_device(a), cuda.to_device(b))
+        got = fngpu(cp.asarray(a), cp.asarray(b))
         np.testing.assert_almost_equal(expect, got.copy_to_host())
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py
index 169e2c6b8..b6c53731f 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py
@@ -3,23 +3,32 @@
 
 import numpy as np
 from numba.cuda import vectorize
-from numba import cuda
 from numba.cuda import float64
-from numba.cuda.testing import skip_on_cudasim, CUDATestCase
+from numba.cuda.testing import (
+    skip_on_cudasim,
+    skip_if_cupy_unavailable,
+    CUDATestCase,
+)
 import unittest
 
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
+
 sig = [float64(float64, float64)]
 
 
 @skip_on_cudasim("ufunc API unsupported in the simulator")
 class TestCUDAVectorizeScalarArg(CUDATestCase):
+    @skip_if_cupy_unavailable
     def test_vectorize_scalar_arg(self):
         @vectorize(sig, target="cuda")
         def vector_add(a, b):
             return a + b
 
         A = np.arange(10, dtype=np.float64)
-        dA = cuda.to_device(A)
+        dA = cp.asarray(A)
         v = vector_add(1.0, dA)
 
         np.testing.assert_array_almost_equal(
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_warning.py b/numba_cuda/numba/cuda/tests/cudapy/test_warning.py
index 94f83c24d..5bae6ce77 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_warning.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_warning.py
@@ -21,6 +21,7 @@
 )
 from numba.cuda.core import config
 import warnings
+from numba.cuda.testing import DeprecatedDeviceArrayApiWarning
 
 
 @skip_on_cudasim("cudasim does not raise performance warnings")
@@ -87,7 +88,8 @@ def foo(r, x):
             r[0] = x + 1
 
         N = 10
-        ary = cuda.pinned_array(N, dtype=np.float32)
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
+            ary = cuda.pinned_array(N, dtype=np.float32)
 
         func = foo[1, N]
         with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
@@ -103,7 +105,8 @@ def foo(r, x):
             r[0] = x + 1
 
         N = 10
-        ary = cuda.mapped_array(N, dtype=np.float32)
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
+            ary = cuda.mapped_array(N, dtype=np.float32)
 
         with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
             with warnings.catch_warnings(record=True) as w:
@@ -118,7 +121,8 @@ def foo(r, x):
             r[0] = x + 1
 
         N = 10
-        ary = cuda.managed_array(N, dtype=np.float32)
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
+            ary = cuda.managed_array(N, dtype=np.float32)
 
         with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
             with warnings.catch_warnings(record=True) as w:
@@ -132,7 +136,9 @@ def foo(r, x):
             r[0] = x + 1
 
         N = 10
-        ary = cuda.device_array(N, dtype=np.float32)
+
+        with pytest.warns(DeprecatedDeviceArrayApiWarning):
+            ary = cuda.device_array(N, dtype=np.float32)
 
         with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
             with warnings.catch_warnings(record=True) as w:
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py b/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py
index 2d85c1241..3370a01e3 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py
@@ -3,9 +3,18 @@
 
 import unittest
 
-from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    CUDATestCase,
+    skip_if_cupy_unavailable,
+    skip_on_cudasim,
+)
 from numba.cuda.tests.support import captured_stdout
 
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
+
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
 class TestCPointer(CUDATestCase):
@@ -25,6 +34,7 @@ def tearDown(self):
         self._captured_stdout.__exit__(None, None, None)
         super().tearDown()
 
+    @skip_if_cupy_unavailable
     def test_ex_cpointer(self):
         # ex_cpointer.sig.begin
         import numpy as np
@@ -47,10 +57,10 @@ def add_one(x, n):
         # ex_cpointer.kernel.end
 
         # ex_cpointer.launch.begin
-        x = cuda.to_device(np.arange(10, dtype=np.uint8))
+        x = cp.arange(10, dtype=np.uint8)
 
         # Print initial values of x
-        print(x.copy_to_host())  # [0 1 2 3 4 5 6 7 8 9]
+        print(x.get())  # [0 1 2 3 4 5 6 7 8 9]
 
         # Obtain a pointer to the data from from the CUDA Array Interface
         x_ptr = x.__cuda_array_interface__["data"][0]
@@ -60,7 +70,7 @@ def add_one(x, n):
         add_one[1, 32](x_ptr, x_len)
 
         # Demonstrate that the data was updated by the kernel
-        print(x.copy_to_host())  # [ 1  2  3  4  5  6  7  8  9 10]
+        print(x.get())  # [ 1  2  3  4  5  6  7  8  9 10]
         # ex_cpointer.launch.end
 
 
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py b/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
index 1a578a9af..22ce0dbb2 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py
@@ -10,6 +10,9 @@
 )
 from numba.cuda.tests.support import captured_stdout
 import numpy as np
+import pytest
+
+cp = pytest.importorskip("cupy")
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
@@ -39,10 +42,10 @@ def test_ex_cpu_gpu_compat(self):
         # ex_cpu_gpu_compat.import.end
 
         # ex_cpu_gpu_compat.allocate.begin
-        X = cuda.to_device([1, 10, 234])
-        Y = cuda.to_device([2, 2, 4014])
-        Z = cuda.to_device([3, 14, 2211])
-        results = cuda.to_device([0.0, 0.0, 0.0])
+        X = cp.asarray([1, 10, 234])
+        Y = cp.asarray([2, 2, 4014])
+        Z = cp.asarray([3, 14, 2211])
+        results = cp.asarray([0.0, 0.0, 0.0])
         # ex_cpu_gpu_compat.allocate.end
 
         # ex_cpu_gpu_compat.define.begin
@@ -72,9 +75,12 @@ def f(res, xarr, yarr, zarr):
         # [-126.79644737231007, 416.28324559588634, -218912930.2987788]
         # ex_cpu_gpu_compat.launch.end
 
-        expect = [business_logic(x, y, z) for x, y, z in zip(X, Y, Z)]
+        expect = [
+            business_logic(x, y, z)
+            for x, y, z in zip(X.get(), Y.get(), Z.get())
+        ]
 
-        np.testing.assert_equal(expect, results.copy_to_host())
+        np.testing.assert_equal(expect, results.get())
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py b/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
index 40913a150..6b50aac9f 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_globals.py
@@ -3,9 +3,18 @@
 
 import unittest
 
-from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    CUDATestCase,
+    skip_if_cupy_unavailable,
+    skip_on_cudasim,
+)
 from numba.cuda.tests.support import captured_stdout
 
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
+
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
 class TestGlobals(CUDATestCase):
@@ -25,6 +34,7 @@ def tearDown(self):
         self._captured_stdout.__exit__(None, None, None)
         super().tearDown()
 
+    @skip_if_cupy_unavailable
     def test_ex_globals_constant_capture(self):
         """
         Test demonstrating how global variables are captured as constants.
@@ -42,14 +52,12 @@ def compute_totals(quantities, totals):
             if i < totals.size:
                 totals[i] = quantities[i] * PRICES[i] * (1 + TAX_RATE)
 
-        d_quantities = cuda.to_device(
-            np.array([1, 2, 3, 4, 5], dtype=np.float64)
-        )
-        d_totals = cuda.device_array(5, dtype=np.float64)
+        d_quantities = cp.asarray(np.array([1, 2, 3, 4, 5], dtype=np.float64))
+        d_totals = cp.zeros(5, dtype=np.float64)
 
         # First kernel call - compiles and captures values
         compute_totals[1, 32](d_quantities, d_totals)
-        print("Value of d_totals:", d_totals.copy_to_host())
+        print("Value of d_totals:", d_totals.get())
 
         # These modifications have no effect on subsequent kernel calls
         TAX_RATE = 0.10  # noqa: F841
@@ -57,13 +65,14 @@ def compute_totals(quantities, totals):
 
         # Second kernel call still uses the original values
         compute_totals[1, 32](d_quantities, d_totals)
-        print("Value of d_totals:", d_totals.copy_to_host())
+        print("Value of d_totals:", d_totals.get())
         # magictoken.ex_globals_constant_capture.end
 
         # Verify the values are the same (original values were captured)
         expected = np.array([10.8, 54.0, 16.2, 64.8, 162.0])
-        np.testing.assert_allclose(d_totals.copy_to_host(), expected)
+        np.testing.assert_allclose(d_totals.get(), expected)
 
+    @skip_if_cupy_unavailable
     def test_ex_globals_device_array_capture(self):
         """
         Test demonstrating how global device arrays are captured by pointer.
@@ -73,7 +82,7 @@ def test_ex_globals_device_array_capture(self):
         from numba import cuda
 
         # Global device array - pointer is captured, not data
-        PRICES = cuda.to_device(
+        PRICES = cp.asarray(
             np.array([10.0, 25.0, 5.0, 15.0, 30.0], dtype=np.float32)
         )
 
@@ -83,28 +92,26 @@ def compute_totals(quantities, totals):
             if i < totals.size:
                 totals[i] = quantities[i] * PRICES[i]
 
-        d_quantities = cuda.to_device(
+        d_quantities = cp.asarray(
             np.array([1.0, 1.0, 1.0, 1.0, 1.0], dtype=np.float32)
         )
-        d_totals = cuda.device_array(5, dtype=np.float32)
+        d_totals = cp.zeros(5, dtype=np.float32)
 
         # First kernel call
         compute_totals[1, 32](d_quantities, d_totals)
-        print(d_totals.copy_to_host())  # [10. 25.  5. 15. 30.]
+        print(d_totals.get())  # [10. 25.  5. 15. 30.]
 
         # Mutate the device array in-place
-        PRICES.copy_to_device(
-            np.array([20.0, 50.0, 10.0, 30.0, 60.0], dtype=np.float32)
-        )
+        PRICES[:] = cp.array([20.0, 50.0, 10.0, 30.0, 60.0], dtype=np.float32)
 
         # Second kernel call sees the updated values
         compute_totals[1, 32](d_quantities, d_totals)
-        print(d_totals.copy_to_host())  # [20. 50. 10. 30. 60.]
+        print(d_totals.get())  # [20. 50. 10. 30. 60.]
         # magictoken.ex_globals_device_array_capture.end
 
         # Verify the second call sees updated values
         expected = np.array([20.0, 50.0, 10.0, 30.0, 60.0], dtype=np.float32)
-        np.testing.assert_allclose(d_totals.copy_to_host(), expected)
+        np.testing.assert_allclose(d_totals.get(), expected)
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py b/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
index 01eefbf4f..be0517bff 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py
@@ -10,6 +10,9 @@
     skip_unless_cc_60,
 )
 from numba.cuda.tests.support import captured_stdout
+import pytest
+
+cp = pytest.importorskip("cupy")
 
 
 @skip_if_cudadevrt_missing
@@ -49,10 +52,10 @@ def test_ex_laplace(self):
 
         # Middle element is made very hot
         data[500] = 10000
-        buf_0 = cuda.to_device(data)
+        buf_0 = cp.asarray(data)
 
         # This extra array is used for synchronization purposes
-        buf_1 = cuda.device_array_like(buf_0)
+        buf_1 = cp.zeros_like(buf_0)
 
         niter = 10000
         # ex_laplace.allocate.end
@@ -63,7 +66,7 @@ def test_ex_laplace(self):
             fig, ax = plt.subplots(figsize=(16 * 0.66, 9 * 0.66))
             plt.plot(
                 np.arange(len(buf_0)),
-                buf_0.copy_to_host(),
+                buf_0.get(),
                 lw=3,
                 marker="*",
                 color="black",
@@ -128,7 +131,7 @@ def solve_heat_equation(buf_0, buf_1, timesteps, k):
         solve_heat_equation.forall(len(data))(buf_0, buf_1, niter, 0.25)
         # ex_laplace.launch.end
 
-        results = buf_1.copy_to_host()
+        results = buf_1.get()
         if plot:
             fig, ax = plt.subplots(figsize=(16 * 0.66, 9 * 0.66))
             plt.plot(
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py b/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
index f68fde4bd..3b0ad46ff 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py
@@ -10,8 +10,18 @@
 "magictoken" is used for markers as beginning and ending of example text.
 """
 
-from numba.cuda.testing import skip_on_cudasim
+import unittest
+
+from numba.cuda.testing import (
+    CUDATestCase,
+    skip_if_cupy_unavailable,
+    skip_on_cudasim,
+)
 from numba.cuda.tests.support import captured_stdout
+import pytest
+
+
+cp = pytest.importorskip("cupy")
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
@@ -44,9 +54,9 @@ def matmul(A, B, C):
         y_h = np.ones([4, 4])
         z_h = np.zeros([4, 4])
 
-        x_d = cuda.to_device(x_h)
-        y_d = cuda.to_device(y_h)
-        z_d = cuda.to_device(z_h)
+        x_d = cp.asarray(x_h)
+        y_d = cp.asarray(y_h)
+        z_d = cp.asarray(z_h)
 
         threadsperblock = (16, 16)
         blockspergrid_x = math.ceil(z_h.shape[0] / threadsperblock[0])
@@ -54,7 +64,7 @@ def matmul(A, B, C):
         blockspergrid = (blockspergrid_x, blockspergrid_y)
 
         matmul[blockspergrid, threadsperblock](x_d, y_d, z_d)
-        z_h = z_d.copy_to_host()
+        z_h = z_d.get()
         print(z_h)
         print(x_h @ y_h)
         # magictoken.ex_run_matmul.end
@@ -114,9 +124,9 @@ def fast_matmul(A, B, C):
         y_h = np.ones([4, 4])
         z_h = np.zeros([4, 4])
 
-        x_d = cuda.to_device(x_h)
-        y_d = cuda.to_device(y_h)
-        z_d = cuda.to_device(z_h)
+        x_d = cp.asarray(x_h)
+        y_d = cp.asarray(y_h)
+        z_d = cp.asarray(z_h)
 
         threadsperblock = (TPB, TPB)
         blockspergrid_x = math.ceil(z_h.shape[0] / threadsperblock[0])
@@ -124,7 +134,7 @@ def fast_matmul(A, B, C):
         blockspergrid = (blockspergrid_x, blockspergrid_y)
 
         fast_matmul[blockspergrid, threadsperblock](x_d, y_d, z_d)
-        z_h = z_d.copy_to_host()
+        z_h = z_d.get()
         print(z_h)
         print(x_h @ y_h)
         # magictoken.ex_run_fast_matmul.end
@@ -139,9 +149,9 @@ def fast_matmul(A, B, C):
         y_h = np.ones([23, 7])
         z_h = np.zeros([5, 7])
 
-        x_d = cuda.to_device(x_h)
-        y_d = cuda.to_device(y_h)
-        z_d = cuda.to_device(z_h)
+        x_d = cp.asarray(x_h)
+        y_d = cp.asarray(y_h)
+        z_d = cp.asarray(z_h)
 
         threadsperblock = (TPB, TPB)
         grid_y_max = max(x_h.shape[0], y_h.shape[0])
@@ -151,7 +161,7 @@ def fast_matmul(A, B, C):
         blockspergrid = (blockspergrid_x, blockspergrid_y)
 
         fast_matmul[blockspergrid, threadsperblock](x_d, y_d, z_d)
-        z_h = z_d.copy_to_host()
+        z_h = z_d.get()
         print(z_h)
         print(x_h @ y_h)
         # magictoken.ex_run_nonsquare.end
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py b/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py
index 67ad2bc88..d7ddb9f27 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py
@@ -5,11 +5,17 @@
 
 from numba.cuda.testing import (
     CUDATestCase,
+    skip_if_cupy_unavailable,
     skip_on_cudasim,
     skip_on_standalone_numba_cuda,
 )
 from numba.cuda.tests.support import captured_stdout
 
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
+
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
 class TestMonteCarlo(CUDATestCase):
@@ -29,6 +35,7 @@ def tearDown(self):
         super().tearDown()
 
     @skip_on_standalone_numba_cuda
+    @skip_if_cupy_unavailable
     def test_ex_montecarlo(self):
         # ex_montecarlo.import.begin
         import numba
@@ -80,7 +87,7 @@ def mc_integrate(lower_lim, upper_lim, nsamps):
             approximate the definite integral of `func` from
             `lower_lim` to `upper_lim`
             """
-            out = cuda.to_device(np.zeros(nsamps, dtype="float32"))
+            out = cp.zeros(nsamps, dtype="float32")
             rng_states = create_xoroshiro128p_states(nsamps, seed=42)
 
             # jit the function for use in CUDA kernels
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_random.py b/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
index f8c198a2c..f34553fe6 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_random.py
@@ -5,7 +5,11 @@
 # "magictoken" is used for markers as beginning and ending of example text.
 
 import unittest
+
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+import pytest
+
+cp = pytest.importorskip("cupy")
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
@@ -50,12 +54,12 @@ def random_3d(arr, rng_states):
         rng_states = create_xoroshiro128p_states(nthreads, seed=1)
 
         # Generate random numbers
-        arr = cuda.device_array((X, Y, Z), dtype=np.float32)
+        arr = cp.zeros((X, Y, Z), dtype=np.float32)
         random_3d[(gx, gy, gz), (bx, by, bz)](arr, rng_states)
         # magictoken.ex_3d_grid.end
 
         # Some basic tests of the randomly-generated numbers
-        host_arr = arr.copy_to_host()
+        host_arr = arr.get()
         self.assertGreater(np.mean(host_arr), 0.49)
         self.assertLess(np.mean(host_arr), 0.51)
         self.assertTrue(np.all(host_arr <= 1.0))
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py b/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
index dbb9dc079..024d5713a 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py
@@ -3,8 +3,15 @@
 
 import unittest
 
-from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    CUDATestCase,
+    skip_if_cupy_unavailable,
+    skip_on_cudasim,
+)
 from numba.cuda.tests.support import captured_stdout
+import pytest
+
+cp = pytest.importorskip("cupy")
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
@@ -33,7 +40,7 @@ def test_ex_reduction(self):
 
         # ex_reduction.allocate.begin
         # generate data
-        a = cuda.to_device(np.arange(1024))
+        a = cp.asarray(np.arange(1024))
         nelem = len(a)
         # ex_reduction.allocate.end
 
@@ -69,11 +76,11 @@ def array_sum(data):
 
         # ex_reduction.launch.begin
         array_sum[1, nelem](a)
-        print(a[0])  # 523776
+        print(a.get()[0])  # 523776
         print(sum(np.arange(1024)))  # 523776
         # ex_reduction.launch.end
 
-        np.testing.assert_equal(a[0], sum(np.arange(1024)))
+        np.testing.assert_equal(a.get()[0], sum(np.arange(1024)))
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py b/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py
index c22bebf76..1b165b3f5 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py
@@ -6,11 +6,17 @@
 from numba.cuda.testing import (
     CUDATestCase,
     skip_if_cudadevrt_missing,
+    skip_if_cupy_unavailable,
     skip_on_cudasim,
     skip_unless_cc_60,
 )
 from numba.cuda.tests.support import captured_stdout
 
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
+
 
 @skip_if_cudadevrt_missing
 @skip_unless_cc_60
@@ -31,6 +37,7 @@ def tearDown(self):
         self._captured_stdout.__exit__(None, None, None)
         super().tearDown()
 
+    @skip_if_cupy_unavailable
     def test_ex_sessionize(self):
         # ex_sessionize.import.begin
         import numpy as np
@@ -42,41 +49,39 @@ def test_ex_sessionize(self):
 
         # ex_sessionize.allocate.begin
         # Generate data
-        ids = cuda.to_device(
-            np.array(
-                [
-                    1,
-                    1,
-                    1,
-                    1,
-                    1,
-                    1,
-                    2,
-                    2,
-                    2,
-                    3,
-                    3,
-                    3,
-                    3,
-                    3,
-                    3,
-                    3,
-                    3,
-                    3,
-                    3,
-                    4,
-                    4,
-                    4,
-                    4,
-                    4,
-                    4,
-                    4,
-                    4,
-                    4,
-                ]
-            )
+        ids = cp.array(
+            [
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                2,
+                2,
+                2,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                3,
+                4,
+                4,
+                4,
+                4,
+                4,
+                4,
+                4,
+                4,
+                4,
+            ]
         )
-        sec = cuda.to_device(
+        sec = cp.asarray(
             np.array(
                 [
                     1,
@@ -109,10 +114,11 @@ def test_ex_sessionize(self):
                     25003,
                 ],
                 dtype="datetime64[ns]",
-            ).astype("int64")  # Cast to int64 for compatibility
+            ).astype("int64")
         )
+
         # Create a vector to hold the results
-        results = cuda.to_device(np.zeros(len(ids)))
+        results = cp.zeros(len(ids))
         # ex_sessionize.allocate.end
 
         # ex_sessionize.kernel.begin
@@ -161,7 +167,7 @@ def sessionize(user_id, timestamp, results):
         # ex_sessionize.launch.begin
         sessionize.forall(len(ids))(ids, sec, results)
 
-        print(results.copy_to_host())
+        print(results.get())
         # array([ 0.,  0.,  0.,  3.,  3.,  3.,
         #         6.,  6.,  6.,  9.,  9., 11.,
         #         11., 13., 13., 13., 13., 17.,
@@ -199,7 +205,7 @@ def sessionize(user_id, timestamp, results):
             24,
             24,
         ]
-        np.testing.assert_equal(expect, results.copy_to_host())
+        np.testing.assert_equal(expect, results.get())
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py b/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
index 3172298fe..1dd5aad85 100644
--- a/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
+++ b/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py
@@ -5,6 +5,9 @@
 
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
 from numba.cuda.tests.support import captured_stdout
+import pytest
+
+cp = pytest.importorskip("cupy")
 
 
 @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
@@ -48,14 +51,14 @@ def f(a, b, c):
 
         # ex_vecadd.allocate.begin
         N = 100000
-        a = cuda.to_device(np.random.random(N))
-        b = cuda.to_device(np.random.random(N))
-        c = cuda.device_array_like(a)
+        a = cp.asarray(np.random.random(N))
+        b = cp.asarray(np.random.random(N))
+        c = cp.empty(a.shape)
         # ex_vecadd.allocate.end
 
         # ex_vecadd.forall.begin
         f.forall(len(a))(a, b, c)
-        print(c.copy_to_host())
+        print(c.get())
         # ex_vecadd.forall.end
 
         # ex_vecadd.launch.begin
@@ -64,12 +67,10 @@ def f(a, b, c):
         # Enough blocks to cover the entire vector depending on its length
         nblocks = (len(a) // nthreads) + 1
         f[nblocks, nthreads](a, b, c)
-        print(c.copy_to_host())
+        print(c.get())
         # ex_vecadd.launch.end
 
-        np.testing.assert_equal(
-            c.copy_to_host(), a.copy_to_host() + b.copy_to_host()
-        )
+        np.testing.assert_equal(c.get(), a.get() + b.get())
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py b/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py
index 5a27f2b87..05f926a36 100644
--- a/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py
+++ b/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py
@@ -5,7 +5,7 @@
 import itertools
 import numpy as np
 from numba.cuda.cudadrv.dummyarray import Array
-from numba.cuda.testing import skip_on_cudasim
+from numba.cuda.testing import skip_on_cudasim, DeprecatedDeviceArrayApiTest
 
 
 @skip_on_cudasim("Tests internals of the CUDA driver device array")
@@ -421,8 +421,9 @@ def test_empty_array_flags(self):
                 self.assertTrue(arr.flags["F_CONTIGUOUS"])
 
 
+# Typing of DeviceNDarray is deprecated
 @skip_on_cudasim("Tests CUDA device array type inference")
-class TestEmptyArrayTypeInference(unittest.TestCase):
+class TestEmptyArrayTypeInference(DeprecatedDeviceArrayApiTest):
     def test_empty_array_typeof(self):
         from numba import cuda, typeof
 
diff --git a/numba_cuda/numba/cuda/tests/nrt/test_nrt.py b/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
index 459032d71..0d2011f8e 100644
--- a/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
+++ b/numba_cuda/numba/cuda/tests/nrt/test_nrt.py
@@ -6,7 +6,11 @@
 
 import numpy as np
 import unittest
-from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+from numba.cuda.testing import (
+    CUDATestCase,
+    skip_on_cudasim,
+    skip_if_cupy_unavailable,
+)
 from numba.cuda.tests.support import run_in_subprocess, override_config
 from numba.cuda import get_current_device
 from numba.cuda.cudadrv.nvrtc import compile
@@ -24,6 +28,14 @@
     Object,
 )
 
+if config.ENABLE_CUDASIM:
+    import numpy as cp
+else:
+    try:
+        import cupy as cp
+    except ImportError:
+        cp = None
+
 TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
 
 if not config.ENABLE_CUDASIM:
@@ -382,6 +394,7 @@ def foo():
             self.assertEqual(stats.free, stats_free)
             self.assertEqual(stats.mi_free, stats_mi_free)
 
+    @skip_if_cupy_unavailable
     def test_nrt_toggle_enabled(self):
         def array_reshape1d(arr, newshape, got):
             y = arr.reshape(newshape)
@@ -398,7 +411,7 @@ def kernel(out):
                 out = out.reshape(out.shape)
                 out[0] = 1
 
-            out = cuda.to_device(np.zeros(1, dtype=np.float64))
+            out = cp.zeros(1, dtype=np.float64)
             kernel[1, 1](out)
 
         with override_config("CUDA_ENABLE_NRT", False):
diff --git a/numba_cuda/numba/cuda/vectorizers.py b/numba_cuda/numba/cuda/vectorizers.py
index 5bc53c335..2ef068233 100644
--- a/numba_cuda/numba/cuda/vectorizers.py
+++ b/numba_cuda/numba/cuda/vectorizers.py
@@ -9,6 +9,9 @@
     GeneralizedUFunc,
     GUFuncCallSteps,
 )
+from numba.cuda import _api
+import warnings
+from numba.cuda.cudadrv.devicearray import DeprecatedDeviceArrayApiWarning
 
 
 class CUDAUFuncDispatcher:
@@ -54,7 +57,7 @@ def reduce(self, arg, stream=0):
             if cuda.cudadrv.devicearray.is_cuda_ndarray(arg):
                 mem = arg
             else:
-                mem = cuda.to_device(arg, stream)
+                mem = cuda._api._to_device(arg, stream)
                 # do reduction
             out = self.__reduce(mem, gpu_mems, stream)
             # use a small buffer to store the result element
@@ -66,7 +69,11 @@ def reduce(self, arg, stream=0):
     def __reduce(self, mem, gpu_mems, stream):
         n = mem.shape[0]
         if n % 2 != 0:  # odd?
-            fatcut, thincut = mem.split(n - 1)
+            with warnings.catch_warnings():
+                warnings.filterwarnings(
+                    "ignore", category=DeprecatedDeviceArrayApiWarning
+                )
+                fatcut, thincut = mem.split(n - 1)
             # prevent freeing during async mode
             gpu_mems.append(fatcut)
             gpu_mems.append(thincut)
@@ -75,7 +82,11 @@ def __reduce(self, mem, gpu_mems, stream):
             gpu_mems.append(out)
             return self(out, thincut, out=out, stream=stream)
         else:  # even?
-            left, right = mem.split(n // 2)
+            with warnings.catch_warnings():
+                warnings.filterwarnings(
+                    "ignore", category=DeprecatedDeviceArrayApiWarning
+                )
+                left, right = mem.split(n // 2)
             # prevent freeing during async mode
             gpu_mems.append(left)
             gpu_mems.append(right)
@@ -97,7 +108,7 @@ def __init__(self, nin, nout, args, kwargs):
         self._stream = kwargs.get("stream", 0)
 
     def is_device_array(self, obj):
-        return cuda.is_cuda_array(obj)
+        return _api._is_cuda_array(obj)
 
     def as_device_array(self, obj):
         # We don't want to call as_cuda_array on objects that are already Numba
@@ -107,17 +118,19 @@ def as_device_array(self, obj):
         # When we have a Numba device array, we can simply return it.
         if cuda.cudadrv.devicearray.is_cuda_ndarray(obj):
             return obj
-        return cuda.as_cuda_array(obj)
+        return cuda._api._as_cuda_array(obj)
 
     def to_device(self, hostary):
-        return cuda.to_device(hostary, stream=self._stream)
+        return _api._to_device(hostary, stream=self._stream)
 
     def to_host(self, devary, hostary):
         out = devary.copy_to_host(hostary, stream=self._stream)
         return out
 
     def allocate_device_array(self, shape, dtype):
-        return cuda.device_array(shape=shape, dtype=dtype, stream=self._stream)
+        return cuda._api._device_array(
+            shape=shape, dtype=dtype, stream=self._stream
+        )
 
     def launch_kernel(self, kernel, nelem, args):
         kernel.forall(nelem, stream=self._stream)(*args)
@@ -133,7 +146,7 @@ def _call_steps(self):
         return _CUDAGUFuncCallSteps
 
     def _broadcast_scalar_input(self, ary, shape):
-        return cuda.cudadrv.devicearray.DeviceNDArray(
+        return cuda.cudadrv.devicearray.DeviceNDArray._create_nowarn(
             shape=shape, strides=(0,), dtype=ary.dtype, gpu_data=ary.gpu_data
         )
 
@@ -141,7 +154,7 @@ def _broadcast_add_axis(self, ary, newshape):
         newax = len(newshape) - len(ary.shape)
         # Add 0 strides for missing dimension
         newstrides = (0,) * newax + ary.strides
-        return cuda.cudadrv.devicearray.DeviceNDArray(
+        return cuda.cudadrv.devicearray.DeviceNDArray._create_nowarn(
             shape=newshape,
             strides=newstrides,
             dtype=ary.dtype,
@@ -160,7 +173,7 @@ def launch(self, func, count, stream, args):
         func.forall(count, stream=stream)(*args)
 
     def is_device_array(self, obj):
-        return cuda.is_cuda_array(obj)
+        return cuda._api._is_cuda_array(obj)
 
     def as_device_array(self, obj):
         # We don't want to call as_cuda_array on objects that are already Numba
@@ -170,16 +183,18 @@ def as_device_array(self, obj):
         # When we have a Numba device array, we can simply return it.
         if cuda.cudadrv.devicearray.is_cuda_ndarray(obj):
             return obj
-        return cuda.as_cuda_array(obj)
+        return _api._as_cuda_array(obj)
 
     def to_device(self, hostary, stream):
-        return cuda.to_device(hostary, stream=stream)
+        return _api._to_device(hostary, stream=stream)
 
     def to_host(self, devary, stream):
         return devary.copy_to_host(stream=stream)
 
     def allocate_device_array(self, shape, dtype, stream):
-        return cuda.device_array(shape=shape, dtype=dtype, stream=stream)
+        # want to return a deprecated DeviceNDArray without warning
+        #
+        return _api._device_array(shape=shape, dtype=dtype, stream=stream)
 
     def broadcast_device(self, ary, shape):
         ax_differs = [
@@ -194,7 +209,7 @@ def broadcast_device(self, ary, shape):
         for ax in ax_differs:
             strides[ax] = 0
 
-        return cuda.cudadrv.devicearray.DeviceNDArray(
+        return cuda.cudadrv.devicearray.DeviceNDArray._create_nowarn(
             shape=shape, strides=strides, dtype=ary.dtype, gpu_data=ary.gpu_data
         )
 
diff --git a/pixi.lock b/pixi.lock
index 0a2ceb9fb..e6a1d87d6 100644
--- a/pixi.lock
+++ b/pixi.lock
@@ -13,23 +13,44 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2
       - conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_8.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.1.4-hbd8a1cb_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/cffi-2.0.0-py314h6fefde3_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cffi-2.0.0-py314h4a8dc5f_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cfgv-3.5.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_linux-64-13.1.115-ha770c72_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_linux-64-13.1.80-h376f20c_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_linux-64-13.1.80-h376f20c_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_linux-64-13.1.80-h376f20c_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvrtc-13.1.115-hecca717_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py314h972ecce_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py314h3ed1f13_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py314h8c728da_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/icu-78.2-h33c6efd_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-1.2.2-py_0.tar.bz2
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_105.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-13.2.1.1-h676940d_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-12.1.0.78-hecca717_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurand-10.4.1.81-h676940d_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-12.0.9.81-h676940d_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.7.3.1-hecca717_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran-15.2.0-h69a702a_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-15.2.0-h68bc16d_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_16.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.11.0-5_h47877c9_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.2-hb03c661_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb9d3cd8_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libnvjitlink-13.1.115-hecca717_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.30-pthreads_h94d23a6_4.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.51.2-hf4e2dac_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.3-h5347b49_0.conda
@@ -37,20 +58,21 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/make-4.4.1-hb9d3cd8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/nodeenv-1.10.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.4.2-py314h2b28147_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.0-h26f9b46_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.5.1-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pluggy-1.6.0-pyhf9edf01_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pre-commit-4.5.1-pyha770c72_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/psutil-7.2.1-py314h3f2afee_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/psutil-7.2.2-py314h0f05182_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/py-cpuinfo-9.0.0-pyhd8ed1ab_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-8.4.2-pyhcf101f3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-benchmark-5.2.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.8.0-pyhd8ed1ab_0.conda
-      - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.2-he1279bd_0_cp314t.conda
-      - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314t.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.2-h32b2ec7_101_cp314.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pyyaml-6.0.3-pyh7db6752_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.3-h853b02a_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-80.9.0-pyhff2d567_0.conda
@@ -62,29 +84,50 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h280c20c_3.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
-      - pypi: https://files.pythonhosted.org/packages/79/2b/a826ba18d2179a56e144aef69e57fb2ab7c464ef0b2111940ee8a3a223a2/ml_dtypes-0.5.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
-      - pypi: https://files.pythonhosted.org/packages/10/a7/cfbe475c35371cae1358e61f20c5f075badc18c4797ab4354140e1d283cf/numpy-2.4.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
+      - pypi: https://files.pythonhosted.org/packages/c6/bb/82c7dcf38070b46172a517e2334e665c5bf374a262f99a283ea454bece7c/ml_dtypes-0.5.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
       linux-aarch64:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-2_gnu.tar.bz2
+      - conda: https://conda.anaconda.org/conda-forge/noarch/arm-variant-1.2.0-sbsa.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-h4777abc_8.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.1.4-hbd8a1cb_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cffi-2.0.0-py314h0bd77cf_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cfgv-3.5.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_linux-aarch64-13.1.115-h579c4fd_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_linux-aarch64-13.1.80-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_linux-aarch64-13.1.80-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_linux-aarch64-13.1.80-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cuda-nvrtc-13.1.115-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py314h3ec1dcb_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py314heaf0aa5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py314h3642cf7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/icu-78.2-hb1525cb_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.45-default_h1979696_105.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.11.0-5_haddc8a3_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.11.0-5_hd72aa62_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-13.2.1.1-he38c790_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-12.1.0.78-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.4.1.81-he38c790_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-12.0.9.81-he38c790_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.7.3.1-h8f3c8d4_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.3-hfae3067_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-hd65408f_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran-15.2.0-he9431aa_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran5-15.2.0-h1b7bec0_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgomp-15.2.0-h8acb6b2_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblapack-3.11.0-5_h88aeb00_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblzma-5.8.1-h86ecc28_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libmpdec-4.0.0-h86ecc28_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libnvjitlink-13.1.115-h8f3c8d4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libopenblas-0.3.30-pthreads_h9d3fd7e_4.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.51.2-h10b116e_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-15.2.0-hef695bb_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.41.3-h1022ec0_0.conda
@@ -92,6 +135,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/make-4.4.1-h2a6d0cb_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.5-ha32ae93_3.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/nodeenv-1.10.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-2.4.2-py314haac167e_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.6.0-h8e36d6e_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.5.1-pyhcf101f3_0.conda
@@ -119,7 +163,6 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.7-h85ac4a6_6.conda
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/04/f9/067b84365c7e83bda15bba2b06c6ca250ce27b20630b1128c435fb7a09aa/ml_dtypes-0.5.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl
-      - pypi: https://files.pythonhosted.org/packages/c0/c4/2e7908915c0e32ca636b92e4e4a3bdec4cb1e7eb0f8aedf1ed3c68a0d8cd/numpy-2.4.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl
       win-64:
       - conda: https://conda.anaconda.org/conda-forge/win-64/_openmp_mutex-4.5-2_gnu.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/bzip2-1.0.8-h0ad9c76_8.conda
@@ -127,23 +170,49 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/cffi-2.0.0-py314h5a2d7ad_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cfgv-3.5.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cccl_win-64-13.1.115-h57928b3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-dev_win-64-13.1.80-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart-static_win-64-13.1.80-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-cudart_win-64-13.1.80-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvrtc-13.1.115-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py314h59d4d8c_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py314hc101868_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py314h8b4fd5f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libblas-3.11.0-5_hf2e6a31_mkl.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcblas-3.11.0-5_h2a3cdd5_mkl.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-13.2.1.1-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-12.1.0.78-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.4.1.81-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-12.0.9.81-hac47afa_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.7.3.1-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libexpat-2.7.3-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libffi-3.5.2-h52bdfb6_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libgcc-15.2.0-h8ee18e1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libgomp-15.2.0-h8ee18e1_16.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libhwloc-2.12.2-default_h4379cf1_1000.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libiconv-1.18-hc1393d2_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/liblapack-3.11.0-5_hf9ab0e9_mkl.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/liblzma-5.8.1-h2466b09_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libmpdec-4.0.0-h2466b09_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libnvjitlink-13.1.115-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libsqlite-3.51.2-hf5d6505_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libwinpthread-12.0.0.r4.gg4f2fc60ca-h57928b3_10.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libxml2-16-2.15.1-h3cfd58e_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libxml2-2.15.1-h779ef1b_1.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libzlib-1.3.1-h2466b09_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/llvm-openmp-21.1.8-h4fa8253_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/make-4.4.1-h0e40799_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/mkl-2025.3.0-hac47afa_455.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/nodeenv-1.10.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/numpy-2.4.2-py314h06c3c77_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/openssl-3.6.0-h725018a_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.5.1-pyhcf101f3_0.conda
@@ -160,6 +229,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/pyyaml-6.0.3-pyh7db6752_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-80.9.0-pyhff2d567_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/tbb-2022.3.0-h3155e25_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/tk-8.6.13-h2c6b04d_3.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/tomli-2.4.0-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.15.0-pyhcf101f3_0.conda
@@ -174,7 +244,6 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/e9/93/2bfed22d2498c468f6bcd0d9f56b033eaa19f33320389314c19ef6766413/ml_dtypes-0.5.4-cp314-cp314-win_amd64.whl
-      - pypi: https://files.pythonhosted.org/packages/7e/bb/c6513edcce5a831810e2dddc0d3452ce84d208af92405a0c2e58fd8e7881/numpy-2.4.1-cp314-cp314-win_amd64.whl
   cu-12-0-py310:
     channels:
     - url: https://conda.anaconda.org/conda-forge/
@@ -215,9 +284,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py310h8c3aed4_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py310hbc0d89f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py310h25320af_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-12.4.0-h26ba24d_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-12.4.0-h6b7512a_10.conda
@@ -230,8 +302,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.0.1.189-hd3aeb46_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.0.0.21-hd3aeb46_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufile-1.5.0.59-hd3aeb46_1.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurand-10.3.1.50-hd3aeb46_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.4.2.57-hd3aeb46_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.0.0.76-hd3aeb46_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_16.conda
@@ -321,9 +397,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py310h556c47b_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py310h967c7ba_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py310heccc163_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-12.4.0-h628656a_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-12.4.0-heb3b579_10.conda
@@ -336,7 +415,11 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.45-default_h1979696_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.11.0-5_haddc8a3_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.11.0-5_hd72aa62_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.0.1.189-hac28a21_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.0.0.21-hac28a21_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.3.1.50-hac28a21_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.4.2.57-hac28a21_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.0.0.76-hac28a21_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.3-hfae3067_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-hd65408f_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_16.conda
@@ -422,16 +505,23 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py310h9349102_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py310h867cfc4_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py310h699e580_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libblas-3.11.0-5_hf2e6a31_mkl.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcblas-3.11.0-5_h2a3cdd5_mkl.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.0.1.189-h63175ca_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.0.0.21-h63175ca_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.3.1.50-h63175ca_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.4.2.57-h63175ca_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.0.0.76-h63175ca_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libexpat-2.7.3-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libffi-3.5.2-h52bdfb6_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libgcc-15.2.0-h8ee18e1_16.conda
@@ -485,7 +575,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py310hf0cc224_0
+        build: py310h5d23e43_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/c7/a3/51886727bd16e2f47587997b802dd56398692ce8c6c03c2e5bb32ecafe26/ml_dtypes-0.5.4-cp310-cp310-win_amd64.whl
   cu-12-0-py311:
@@ -528,9 +618,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py311h72da3fd_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py311he30c881_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py311hc665b79_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-12.4.0-h26ba24d_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-12.4.0-h6b7512a_10.conda
@@ -543,8 +636,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.0.1.189-hd3aeb46_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.0.0.21-hd3aeb46_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufile-1.5.0.59-hd3aeb46_1.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurand-10.3.1.50-hd3aeb46_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.4.2.57-hd3aeb46_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.0.0.76-hd3aeb46_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_16.conda
@@ -634,9 +731,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py311h1f68eda_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py311h6a7bbfe_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py311h8e4e6a5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-12.4.0-h628656a_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-12.4.0-heb3b579_10.conda
@@ -649,7 +749,11 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.45-default_h1979696_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.11.0-5_haddc8a3_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.11.0-5_hd72aa62_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.0.1.189-hac28a21_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.0.0.21-hac28a21_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.3.1.50-hac28a21_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.4.2.57-hac28a21_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.0.0.76-hac28a21_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.3-hfae3067_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-hd65408f_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_16.conda
@@ -735,16 +839,23 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.0-hffde075_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py311h3856ebc_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py311h3f47771_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py311h5dfdfe8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libblas-3.11.0-5_hf2e6a31_mkl.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcblas-3.11.0-5_h2a3cdd5_mkl.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.0.1.189-h63175ca_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.0.0.21-h63175ca_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.3.1.50-h63175ca_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.4.2.57-h63175ca_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.0.0.76-h63175ca_2.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libexpat-2.7.3-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libffi-3.5.2-h52bdfb6_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libgcc-15.2.0-h8ee18e1_16.conda
@@ -798,7 +909,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py311h17f48b4_0
+        build: py311hb9e802a_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/b4/24/70bd59276883fdd91600ca20040b41efd4902a923283c4d6edcb1de128d2/ml_dtypes-0.5.4-cp311-cp311-win_amd64.whl
   cu-12-2-py311:
@@ -848,9 +959,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.2-he2b69de_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py311h72da3fd_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py311he30c881_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py311hc665b79_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-12.4.0-h26ba24d_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-12.4.0-h6b7512a_10.conda
@@ -863,8 +977,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.2.5.6-hd3aeb46_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.0.8.103-hd3aeb46_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufile-1.7.2.10-hd3aeb46_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurand-10.3.3.141-hd3aeb46_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.5.2.141-hd3aeb46_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.1.2.141-hd3aeb46_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_16.conda
@@ -961,9 +1079,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.2-he2b69de_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py311h1f68eda_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py311h6a7bbfe_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py311h8e4e6a5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-12.4.0-h628656a_2.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-12.4.0-heb3b579_10.conda
@@ -976,8 +1097,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.45-default_h1979696_105.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.11.0-5_haddc8a3_openblas.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.11.0-5_hd72aa62_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.2.5.6-hac28a21_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.0.8.103-hac28a21_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufile-1.7.2.10-hac28a21_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.3.3.141-hac28a21_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.5.2.141-hac28a21_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.1.2.141-hac28a21_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.3-hfae3067_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.5.2-hd65408f_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-15.2.0-h8acb6b2_16.conda
@@ -1070,16 +1195,23 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-pathfinder-1.3.3-pyhcf101f3_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.2-he2b69de_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py311h3856ebc_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py311h3f47771_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py311h5dfdfe8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.3.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libblas-3.11.0-5_hf2e6a31_mkl.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcblas-3.11.0-5_h2a3cdd5_mkl.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.2.5.6-h63175ca_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.0.8.103-h63175ca_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.3.3.141-h63175ca_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.5.2.141-h63175ca_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.1.2.141-h63175ca_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libexpat-2.7.3-hac47afa_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libffi-3.5.2-h52bdfb6_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/libgcc-15.2.0-h8ee18e1_16.conda
@@ -1133,7 +1265,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py311h17f48b4_0
+        build: py311hb9e802a_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/b4/24/70bd59276883fdd91600ca20040b41efd4902a923283c4d6edcb1de128d2/ml_dtypes-0.5.4-cp311-cp311-win_amd64.whl
   cu-12-8-py310:
@@ -1187,9 +1319,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py310h8c3aed4_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py310hbc0d89f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py310h25320af_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-14.3.0-he8b2097_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-14.3.0-h298d278_17.conda
@@ -1316,9 +1451,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py310h556c47b_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py310h967c7ba_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py310heccc163_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-14.3.0-hda29b82_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-14.3.0-h118592a_17.conda
@@ -1439,9 +1577,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py310h9349102_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py310h867cfc4_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py310h699e580_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -1511,7 +1652,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py310hf0cc224_0
+        build: py310h5d23e43_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/c7/a3/51886727bd16e2f47587997b802dd56398692ce8c6c03c2e5bb32ecafe26/ml_dtypes-0.5.4-cp310-cp310-win_amd64.whl
   cu-12-8-py311:
@@ -1565,9 +1706,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py311h72da3fd_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py311he30c881_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py311hc665b79_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-14.3.0-he8b2097_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-14.3.0-h298d278_17.conda
@@ -1694,9 +1838,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py311h1f68eda_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py311h6a7bbfe_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py311h8e4e6a5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-14.3.0-hda29b82_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-14.3.0-h118592a_17.conda
@@ -1817,9 +1964,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py311h3856ebc_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py311h3f47771_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py311h5dfdfe8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -1889,7 +2039,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py311h17f48b4_0
+        build: py311hb9e802a_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/b4/24/70bd59276883fdd91600ca20040b41efd4902a923283c4d6edcb1de128d2/ml_dtypes-0.5.4-cp311-cp311-win_amd64.whl
   cu-12-8-py312:
@@ -1943,9 +2093,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py312h0317cef_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py312h16a6543_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py312h8285ef7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-14.3.0-he8b2097_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-14.3.0-h298d278_17.conda
@@ -2072,9 +2225,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py312h500e0d2_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py312hdcd7d0a_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py312hf55c4e8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-14.3.0-hda29b82_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-14.3.0-h118592a_17.conda
@@ -2195,9 +2351,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py312hf676df9_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py312hc3434b0_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py312ha1a9051_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -2267,7 +2426,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py312h61be6c2_0
+        build: py312ha067a5a_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/f5/f0/0cfadd537c5470378b1b32bd859cf2824972174b51b873c9d95cfd7475a5/ml_dtypes-0.5.4-cp312-cp312-win_amd64.whl
   cu-12-8-py313:
@@ -2321,9 +2480,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py313h586c94b_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py313h28b6081_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py313h5d5ffb9_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-14.3.0-he8b2097_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-14.3.0-h298d278_17.conda
@@ -2448,9 +2610,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py313h7988abe_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py313h6b3a76b_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py313h59403f9_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-14.3.0-hda29b82_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-14.3.0-h118592a_17.conda
@@ -2570,9 +2735,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-12.9.5-pyh698daf1_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-12.8.1-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-12.8-h5d125a7_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py313h5dfe2c3_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py313ha16128a_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py313h927ade5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -2643,7 +2811,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py313h96b86a2_0
+        build: py313he80dd91_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/e1/8b/200088c6859d8221454825959df35b5244fa9bdf263fd0249ac5fb75e281/ml_dtypes-0.5.4-cp313-cp313-win_amd64.whl
   cu-12-9-py312:
@@ -3134,7 +3302,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py312h61be6c2_0
+        build: py312ha067a5a_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/f5/f0/0cfadd537c5470378b1b32bd859cf2824972174b51b873c9d95cfd7475a5/ml_dtypes-0.5.4-cp312-cp312-win_amd64.whl
   cu-13-0-py312:
@@ -3188,9 +3356,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py312h045ee1a_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py312h1a70bb2_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py312h8285ef7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-15.2.0-hc5723f1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-15.2.0-h862fb80_17.conda
@@ -3319,9 +3490,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py312h63ce5a7_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py312hc495b10_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py312hf55c4e8_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-15.2.0-habb1d5c_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-15.2.0-h0139441_17.conda
@@ -3444,9 +3618,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py312h050d4bf_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py312h7babc83_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py312ha1a9051_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -3518,7 +3695,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py312h61be6c2_0
+        build: py312ha067a5a_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/f5/f0/0cfadd537c5470378b1b32bd859cf2824972174b51b873c9d95cfd7475a5/ml_dtypes-0.5.4-cp312-cp312-win_amd64.whl
   cu-13-0-py313:
@@ -3572,9 +3749,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py313h727d180_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py313h0630d88_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py313h5d5ffb9_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-15.2.0-hc5723f1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-15.2.0-h862fb80_17.conda
@@ -3701,9 +3881,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py313h1bad292_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py313h407dc6c_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py313h59403f9_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-15.2.0-habb1d5c_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-15.2.0-h0139441_17.conda
@@ -3825,9 +4008,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py313h670e13b_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py313haef2af9_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py313h927ade5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -3900,7 +4086,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py313h96b86a2_0
+        build: py313he80dd91_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/e1/8b/200088c6859d8221454825959df35b5244fa9bdf263fd0249ac5fb75e281/ml_dtypes-0.5.4-cp313-cp313-win_amd64.whl
   cu-13-0-py314:
@@ -3954,9 +4140,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py314h972ecce_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py314h3ed1f13_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py314h8c728da_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-15.2.0-hc5723f1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-15.2.0-h862fb80_17.conda
@@ -4083,9 +4272,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py314h3ec1dcb_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py314heaf0aa5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py314h3642cf7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-15.2.0-habb1d5c_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-15.2.0-h0139441_17.conda
@@ -4207,9 +4399,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.0.2-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.0-hc7b4dd1_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py314h59d4d8c_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py314hc101868_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py314h8b4fd5f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -4282,7 +4477,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py314h3be3d12_0
+        build: py314h625260f_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/e9/93/2bfed22d2498c468f6bcd0d9f56b033eaa19f33320389314c19ef6766413/ml_dtypes-0.5.4-cp314-cp314-win_amd64.whl
   cu-13-1-py314:
@@ -4336,9 +4531,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py314h972ecce_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py314h3ed1f13_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py314h8c728da_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-15.2.0-hc5723f1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-15.2.0-h862fb80_17.conda
@@ -4465,9 +4663,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py314h3ec1dcb_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py314heaf0aa5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py314h3642cf7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-15.2.0-habb1d5c_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-15.2.0-h0139441_17.conda
@@ -4589,9 +4790,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py314h59d4d8c_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py314hc101868_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py314h8b4fd5f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.15-pyhd8ed1ab_0.conda
@@ -4664,7 +4868,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py314h3be3d12_0
+        build: py314h625260f_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/e9/93/2bfed22d2498c468f6bcd0d9f56b033eaa19f33320389314c19ef6766413/ml_dtypes-0.5.4-cp314-cp314-win_amd64.whl
   default:
@@ -4718,9 +4922,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py314h972ecce_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py314h3ed1f13_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py314h8c728da_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-15.2.0-hc5723f1_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-15.2.0-h862fb80_17.conda
@@ -4847,9 +5054,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-ha804496_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py314h3ec1dcb_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py314heaf0aa5_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py314h3642cf7_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_impl_linux-aarch64-15.2.0-habb1d5c_16.conda
       - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/gcc_linux-aarch64-15.2.0-h0139441_17.conda
@@ -4971,9 +5181,12 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-python-13.1.1-pyhc455866_1.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-runtime-13.1.0-h7428d3b_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/cuda-version-13.1-h2ff5cdb_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py314h59d4d8c_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py314hc101868_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.3.1-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.2-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py314h8b4fd5f_2.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/filelock-3.20.3-pyhd8ed1ab_0.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/icu-78.2-h637d24d_0.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/identify-2.6.16-pyhd8ed1ab_0.conda
@@ -5046,7 +5259,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py314h3be3d12_0
+        build: py314h625260f_0
       - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/e9/93/2bfed22d2498c468f6bcd0d9f56b033eaa19f33320389314c19ef6766413/ml_dtypes-0.5.4-cp314-cp314-win_amd64.whl
   dev:
@@ -5416,7 +5629,7 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/noarch/win_inet_pton-1.1.0-pyh7428d3b_8.conda
       - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda
       - conda: .
-        build: py314h3be3d12_0
+        build: py314h625260f_0
       - pypi: https://files.pythonhosted.org/packages/8d/3f/95338030883d8c8b91223b4e21744b04d11b161a3ef117295d8241f50ab4/accessible_pygments-0.0.5-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl
       - pypi: https://files.pythonhosted.org/packages/8c/79/017fab2f7167a9a9795665f894d04f77aafceca80821b51589bb4b23ff5c/nvidia_sphinx_theme-0.0.9.post1-py3-none-any.whl
@@ -5947,22 +6160,6 @@ packages:
   - pkg:pypi/cffi?source=hash-mapping
   size: 300271
   timestamp: 1761203085220
-- conda: https://conda.anaconda.org/conda-forge/linux-64/cffi-2.0.0-py314h6fefde3_1.conda
-  sha256: b5214aa3e0853240f41c79b6cbdb3b3c6b6bfc384713ffad1b41b1442d44737a
-  md5: 1598bfc06ced45b100bca5117c9b3a4b
-  depends:
-  - __glibc >=2.17,<3.0.a0
-  - libffi >=3.5.2,<3.6.0a0
-  - libgcc >=14
-  - pycparser
-  - python >=3.14,<3.15.0a0
-  - python_abi 3.14.* *_cp314t
-  license: MIT
-  license_family: MIT
-  purls:
-  - pkg:pypi/cffi?source=hash-mapping
-  size: 304412
-  timestamp: 1761202966547
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cffi-2.0.0-py310h0826a50_1.conda
   sha256: 63458040026be843a189e319190a0622486017c92ef251d4dff7ec847f9a8418
   md5: 152a5ba791642d8a81fe02d134ab3839
@@ -11588,6 +11785,18 @@ packages:
   purls: []
   size: 68354405
   timestamp: 1757018387981
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvrtc-13.1.115-hecca717_0.conda
+  sha256: 9cc4f9df70c02eea5121cdb0e865207b04cd52591f57ebcac2ba44fada10eb5b
+  md5: df16c9049d882cdaf4f83a5b90079589
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 35339417
+  timestamp: 1768272955912
 - conda: https://conda.anaconda.org/conda-forge/linux-64/cuda-nvrtc-13.1.80-hecca717_0.conda
   sha256: d6b326bdbf6fa7bfa0fa617dda547dc585159816b8f130f2535740c4e53fd12c
   md5: 7ef874b2dc4ca388ecef3b3893305459
@@ -11663,6 +11872,18 @@ packages:
   purls: []
   size: 32555050
   timestamp: 1757018424779
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cuda-nvrtc-13.1.115-h8f3c8d4_0.conda
+  sha256: a1ec61512cecb093797e00590ad381ecd5852d2a32440ff22b34f78c743f3d5a
+  md5: 34da2ff2c64054d65eb8f04d76c40cca
+  depends:
+  - arm-variant * sbsa
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 33616576
+  timestamp: 1768272976976
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cuda-nvrtc-13.1.80-h8f3c8d4_0.conda
   sha256: 5e10ce4dd84c22c73e58a9f8359fb1e5ef4596afd3a0bc12b9fbde73b388ec0d
   md5: 0473ebdb01f2f4024177b024fc19fa72
@@ -11735,6 +11956,18 @@ packages:
   purls: []
   size: 59235886
   timestamp: 1757018672897
+- conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvrtc-13.1.115-hac47afa_0.conda
+  sha256: a8869b7d997722f90b9f8a602dc0b1d0d497f2a6f3561dc89383aeb2cd379a66
+  md5: 372d3c612a832d5f87d8dd9702d487b2
+  depends:
+  - cuda-version >=13.1,<13.2.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 31006920
+  timestamp: 1768273107962
 - conda: https://conda.anaconda.org/conda-forge/win-64/cuda-nvrtc-13.1.80-hac47afa_0.conda
   sha256: 3f67de8a9eb182fa20bbc80bda7185afb676cfe8894f6a0549173bd752a7d2f4
   md5: 7b42337a35cd887ec3eed254b5ed606f
@@ -12881,6 +13114,46 @@ packages:
   purls: []
   size: 19915
   timestamp: 1762823943653
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py310h8c3aed4_2.conda
+  sha256: bab72866e713729c4824323aa4ff9346a48d0c74dff21d2cebb49331c9c58f57
+  md5: 9e5f2f1fc83026ad80f0660895ea3994
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py310hbc0d89f_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.10,<3.11.0a0
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359719
+  timestamp: 1757733038131
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py311h72da3fd_2.conda
+  sha256: 01f0f69dbc66ca8fe7182678258915425573f5ae5aef338efb963aceb444ef1f
+  md5: 7ff80f6526ae96cff25f226544e72baa
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py311he30c881_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.11,<3.12.0a0
+  - python_abi 3.11.* *_cp311
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359669
+  timestamp: 1757732902729
 - conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py312h0317cef_2.conda
   sha256: 078e83045e252b7c616c4e6b580acc1c12b4ade24b4ecd71be4d5dc767387bca
   md5: 8cee37f4bad743e108f904e902f65df1
@@ -12901,6 +13174,126 @@ packages:
   purls: []
   size: 359152
   timestamp: 1757733115653
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py312h045ee1a_2.conda
+  sha256: 806110d9c5c6802006eec55d012e2e82dddadf8f7c9743297a25eef5800d6a25
+  md5: 2be1fbddb4658b3325d531e3e8f62abe
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py312h1a70bb2_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.12,<3.13.0a0
+  - python_abi 3.12.* *_cp312
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359604
+  timestamp: 1757731606512
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py313h586c94b_2.conda
+  sha256: 8e1b0bf555b5ac78d620ccfd20d70c45b717eb6f074631b1a9e962c5d8f0e484
+  md5: 0685ae3980f823b2ca78552f7d8d4033
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py313h28b6081_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359537
+  timestamp: 1757732883343
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py313h727d180_2.conda
+  sha256: 0cf7e5f9461b144320ff2d30f1e7d74c7990e69aa15ec8211cc117f1214a9985
+  md5: 9a9af89f20555cbb1892f81d096b937d
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py313h0630d88_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359195
+  timestamp: 1757731600945
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-13.6.0-py314h972ecce_2.conda
+  sha256: fc4c9e4286c943f8ce6e3f5f29e4ac750939b46cecd06ff70b00d6ba0472af02
+  md5: 5efa78fb77f5f07b02dde55a66bbff24
+  depends:
+  - cuda-cudart-dev_linux-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py314h3ed1f13_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.14.0rc2,<3.15.0a0
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359816
+  timestamp: 1757731942829
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py310h556c47b_2.conda
+  sha256: 3595e84792c1e36fa79348a404d71b94ad7fd2db8d0ca2551377661dbe40a9ea
+  md5: 6a547864445662481528190824613fef
+  depends:
+  - cuda-cudart-dev_linux-aarch64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py310h967c7ba_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.10,<3.11.0a0
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359399
+  timestamp: 1757733587754
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py311h1f68eda_2.conda
+  sha256: 9bfa8bbc0a630e331a04359675c3a728bc9a856284807b5042e24bab4cb16f28
+  md5: 0c76272fc6fa05ff39c53ea5ea5d1154
+  depends:
+  - cuda-cudart-dev_linux-aarch64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py311h6a7bbfe_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.11,<3.12.0a0
+  - python_abi 3.11.* *_cp311
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359395
+  timestamp: 1757733506707
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py312h500e0d2_2.conda
   sha256: 05992a9fddee5bded2f68aeaaba937901ef3b5b246132f7f25478579cc99d48a
   md5: 73a45823cac7c3926192682b7a71ed94
@@ -12921,14 +13314,14 @@ packages:
   purls: []
   size: 359411
   timestamp: 1757733170501
-- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py312hf676df9_2.conda
-  sha256: 6636ac902b44dbc8f8e14d8d2593d057af2f7b722b704edbe01600a2c90c752c
-  md5: 270e90ae04455f4f85b8763ec1755373
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py312h63ce5a7_2.conda
+  sha256: b58e3b72197504103175bfccce853f71de94716e832faa3fb69a22508242185a
+  md5: 6a3767487d9c694dee98bced05c7d048
   depends:
-  - cuda-cudart-dev_win-64
+  - cuda-cudart-dev_linux-aarch64
   - cuda-nvrtc
-  - cuda-version >=12,<13.0a0
-  - cupy-core 13.6.0 py312hc3434b0_2
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py312hc495b10_2
   - libcublas
   - libcufft
   - libcurand
@@ -12939,70 +13332,735 @@ packages:
   license: MIT
   license_family: MIT
   purls: []
-  size: 361552
-  timestamp: 1757734756770
-- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py312h16a6543_2.conda
-  sha256: ebe205ad39f19067898f4513816d8c44ac8036d0c4b9f1ee5aa0233e0f5dc1d7
-  md5: e0667d2bf17e4ff3bd50861f245ed961
+  size: 359083
+  timestamp: 1757732404821
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py313h1bad292_2.conda
+  sha256: f41cf3dae5a43376eb47a172ebc684337f5ed623a16f165dc5ae1444598b5910
+  md5: 38504562c74c201725045cfbf54699ee
   depends:
-  - __glibc >=2.17,<3.0.a0
-  - fastrlock >=0.8.3,<0.9.0a0
-  - libgcc >=14
-  - libstdcxx >=14
-  - numpy >=1.22
-  - python >=3.12,<3.13.0a0
-  - python_abi 3.12.* *_cp312
-  constrains:
-  - cuda-nvrtc >=12,<13.0a0
-  - libcufft >=11,<12.0a0
-  - libcurand >=10,<11.0a0
-  - scipy >=1.7,<1.17
-  - optuna ~=3.0
-  - cutensor >=2.3.1.0,<3.0a0
-  - cuda-version >=12,<13.0a0
-  - cupy >=13.6.0,<13.7.0a0
-  - libcusparse >=12,<13.0a0
-  - libcusolver >=11,<12.0a0
-  - nccl >=2.27.7.1,<3.0a0
-  - libcublas >=12,<13.0a0
-  - __cuda >=12.0
+  - cuda-cudart-dev_linux-aarch64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py313h407dc6c_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
   license: MIT
   license_family: MIT
-  purls:
-  - pkg:pypi/cupy?source=hash-mapping
-  size: 56720768
-  timestamp: 1757733006716
-- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py312hdcd7d0a_2.conda
-  sha256: bc3cf5f1f0b0b4653d573507087ee56bfa04900232133e87c9baebfe6a128612
-  md5: 07720f931f710f3d2061b0bdcb808b82
+  purls: []
+  size: 359766
+  timestamp: 1757732380354
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py313h7988abe_2.conda
+  sha256: 3263457e1415b2695cbba24e45b6d200b05f98120169ce56ac266ef9b29f38b7
+  md5: d378f8038cb5acfb9e24650b7b581f48
   depends:
-  - fastrlock >=0.8.3,<0.9.0a0
-  - libgcc >=14
-  - libstdcxx >=14
-  - numpy >=1.22
-  - python >=3.12,<3.13.0a0
-  - python >=3.12,<3.13.0a0 *_cpython
+  - cuda-cudart-dev_linux-aarch64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py313h6b3a76b_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359576
+  timestamp: 1757733613485
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-13.6.0-py314h3ec1dcb_2.conda
+  sha256: 9b78e1d7c9f42ad09dcad9e8784bfe95aec35ff30c10bc0a8f7cc92033e4c11f
+  md5: ef63ce910ca3d9278fa7b411740e6064
+  depends:
+  - cuda-cudart-dev_linux-aarch64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py314heaf0aa5_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.14.0rc2,<3.15.0a0
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 359844
+  timestamp: 1757732501296
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py310h9349102_2.conda
+  sha256: a9de522e66ff07d1567b4011f7a6e6c858f573053c989bf8a3a91276cf211bdc
+  md5: 3f610f7dce9af31ba31ff4bc8e4cc0ef
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py310h867cfc4_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.10,<3.11.0a0
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361800
+  timestamp: 1757734323240
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py311h3856ebc_2.conda
+  sha256: 7fff0c303355730c2e29386159fab97f31b3423bb5fd856e7e449ec735ef8e07
+  md5: 8e5df8d8969bf8dbf85740207e354e4c
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py311h3f47771_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.11,<3.12.0a0
+  - python_abi 3.11.* *_cp311
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361007
+  timestamp: 1757734548861
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py312h050d4bf_2.conda
+  sha256: 2f80b492e9bd02d36583caabc9933db381aa4313b25ff9b98e4386f39e2d6244
+  md5: 083c371b7832142e6ea9842088a96f55
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py312h7babc83_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.12,<3.13.0a0
   - python_abi 3.12.* *_cp312
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361055
+  timestamp: 1757732736235
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py312hf676df9_2.conda
+  sha256: 6636ac902b44dbc8f8e14d8d2593d057af2f7b722b704edbe01600a2c90c752c
+  md5: 270e90ae04455f4f85b8763ec1755373
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py312hc3434b0_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.12,<3.13.0a0
+  - python_abi 3.12.* *_cp312
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361552
+  timestamp: 1757734756770
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py313h5dfe2c3_2.conda
+  sha256: b36285a74901926ddab1b49e86936957715c1db476207c6e524338867eef9683
+  md5: 01e63e587cf8c7477d53a3e98782e81d
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=12,<13.0a0
+  - cupy-core 13.6.0 py313ha16128a_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361341
+  timestamp: 1757734712476
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py313h670e13b_2.conda
+  sha256: 13b870d34d8df1cd72a60892cc95f150d01e8915f4f11f92a7622602fbe847dc
+  md5: 1c75580206c0367647f7b23bfabb8a93
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py313haef2af9_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361420
+  timestamp: 1757731939881
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-13.6.0-py314h59d4d8c_2.conda
+  sha256: b0755d67f3e501cdfbf6ac6ed5a8a3a37adbb41df25bb6e2922e9b6c59919bd0
+  md5: f60dd8b8db34ab07021459d9a0ad4a8a
+  depends:
+  - cuda-cudart-dev_win-64
+  - cuda-nvrtc
+  - cuda-version >=13,<14.0a0
+  - cupy-core 13.6.0 py314hc101868_2
+  - libcublas
+  - libcufft
+  - libcurand
+  - libcusolver
+  - libcusparse
+  - python >=3.14.0rc2,<3.15.0a0
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls: []
+  size: 361792
+  timestamp: 1757732239805
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py310hbc0d89f_2.conda
+  sha256: 59ee4ca6f4166e575b0f0174941bef5759035e1098abf8f3c6816cc497206c6e
+  md5: 54e7f3bcf179555759acc4341921f3db
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.10,<3.11.0a0
+  - python_abi 3.10.* *_cp310
+  constrains:
+  - nccl >=2.27.7.1,<3.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - scipy >=1.7,<1.17
+  - libcusolver >=11,<12.0a0
+  - libcufft >=11,<12.0a0
+  - libcusparse >=12,<13.0a0
+  - cuda-version >=12,<13.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  - libcublas >=12,<13.0a0
+  - optuna ~=3.0
+  - libcurand >=10,<11.0a0
+  - __cuda >=12.0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 56537348
+  timestamp: 1757732911282
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py311he30c881_2.conda
+  sha256: 45e67d3a56d36935e4189b17e707bf6b887d21df6411fab9d835455a10250db8
+  md5: c9ca2bae852b83675f256aec6c518396
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.11,<3.12.0a0
+  - python_abi 3.11.* *_cp311
+  constrains:
+  - __cuda >=12.0
+  - cuda-version >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcusparse >=12,<13.0a0
+  - scipy >=1.7,<1.17
+  - libcufft >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  - optuna ~=3.0
+  - libcusolver >=11,<12.0a0
+  - libcublas >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 56743670
+  timestamp: 1757732786905
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py312h16a6543_2.conda
+  sha256: ebe205ad39f19067898f4513816d8c44ac8036d0c4b9f1ee5aa0233e0f5dc1d7
+  md5: e0667d2bf17e4ff3bd50861f245ed961
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.12,<3.13.0a0
+  - python_abi 3.12.* *_cp312
+  constrains:
+  - cuda-nvrtc >=12,<13.0a0
+  - libcufft >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  - scipy >=1.7,<1.17
+  - optuna ~=3.0
+  - cutensor >=2.3.1.0,<3.0a0
+  - cuda-version >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcusparse >=12,<13.0a0
+  - libcusolver >=11,<12.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - libcublas >=12,<13.0a0
+  - __cuda >=12.0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 56720768
+  timestamp: 1757733006716
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py312h1a70bb2_2.conda
+  sha256: 955e08c61145c77fcafe91d88bded3fe7bfe87e46a08db2f1345980d56a5444d
+  md5: b7613be94326f391c4b6edd7f114d3ee
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.12,<3.13.0a0
+  - python_abi 3.12.* *_cp312
+  constrains:
+  - optuna ~=3.0
+  - libcurand >=10,<11.0a0
+  - cuda-nvrtc >=13,<14.0a0
+  - __cuda >=13.0
+  - nccl >=2.27.7.1,<3.0a0
+  - cuda-version >=13,<14.0a0
+  - libcublas >=13,<14.0a0
+  - libcusolver >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcufft >=12,<13.0a0
+  - scipy >=1.7,<1.17
+  - libcusparse >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 31539281
+  timestamp: 1757731547163
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py313h0630d88_2.conda
+  sha256: 82c950c3118d81368ad0dee224ab946c963b57ccad34911cacdcc52fc046d792
+  md5: a2a6a0df7ef6e9ae482bae698cfd7476
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  constrains:
+  - libcusolver >=12,<13.0a0
+  - cuda-nvrtc >=13,<14.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcublas >=13,<14.0a0
+  - __cuda >=13.0
+  - libcufft >=12,<13.0a0
+  - optuna ~=3.0
+  - nccl >=2.27.7.1,<3.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcurand >=10,<11.0a0
+  - scipy >=1.7,<1.17
+  - cuda-version >=13,<14.0a0
+  - libcusparse >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 31734692
+  timestamp: 1757731531047
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py313h28b6081_2.conda
+  sha256: 56b2ebb09c8a74746f1dbf660bb7a50af562e9416a2f9733d8e8715503cca81a
+  md5: 388fb72307f756f7f2c7f5928647bc6b
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  constrains:
+  - libcusolver >=11,<12.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  - libcufft >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcusparse >=12,<13.0a0
+  - scipy >=1.7,<1.17
+  - optuna ~=3.0
+  - cutensor >=2.3.1.0,<3.0a0
+  - __cuda >=12.0
+  - cuda-version >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - libcublas >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 56733316
+  timestamp: 1757732780713
+- conda: https://conda.anaconda.org/conda-forge/linux-64/cupy-core-13.6.0-py314h3ed1f13_2.conda
+  sha256: f377afaddccdaead2963bdabc3fa550e8d3e5d6aa6fc632cc01eadfd11442ef8
+  md5: d80c89a6489cb472feb8b009c34d3c11
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.14.0rc2,<3.15.0a0
+  - python_abi 3.14.* *_cp314
+  constrains:
+  - libcusparse >=12,<13.0a0
+  - scipy >=1.7,<1.17
+  - __cuda >=13.0
+  - cuda-version >=13,<14.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcusolver >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - optuna ~=3.0
+  - libcufft >=12,<13.0a0
+  - cuda-nvrtc >=13,<14.0a0
+  - libcublas >=13,<14.0a0
+  - libcurand >=10,<11.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 31824862
+  timestamp: 1757731889554
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py310h967c7ba_2.conda
+  sha256: ecc9ed490591577e7a0d9a994a1c4ae3a2f0b8605cdb3c67548fd8c1aeb48c95
+  md5: f77bbe8edf8f4c9e1be06aebb99bebec
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.10,<3.11.0a0
+  - python >=3.10,<3.11.0a0 *_cpython
+  - python_abi 3.10.* *_cp310
+  constrains:
+  - scipy >=1.7,<1.17
+  - __cuda >=12.0
+  - libcufft >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  - libcusolver >=11,<12.0a0
+  - cuda-version >=12,<13.0a0
+  - libcusparse >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - libcublas >=12,<13.0a0
+  - optuna ~=3.0
+  - cutensor >=2.3.1.0,<3.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 63896600
+  timestamp: 1757733496346
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py311h6a7bbfe_2.conda
+  sha256: 8ebdcc75e0cf89d5f73b34dde93dad9387b76b883b197fe3e41cee7b116376fb
+  md5: 5f61a21425c550d4badcdbf96c8723f9
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.11,<3.12.0a0
+  - python >=3.11,<3.12.0a0 *_cpython
+  - python_abi 3.11.* *_cp311
+  constrains:
+  - cupy >=13.6.0,<13.7.0a0
+  - libcufft >=11,<12.0a0
+  - cuda-version >=12,<13.0a0
+  - libcusolver >=11,<12.0a0
+  - __cuda >=12.0
+  - libcublas >=12,<13.0a0
+  - libcurand >=10,<11.0a0
+  - optuna ~=3.0
+  - cuda-nvrtc >=12,<13.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - scipy >=1.7,<1.17
+  - libcusparse >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 64073249
+  timestamp: 1757733413707
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py312hc495b10_2.conda
+  sha256: f862a404f82ca39e331802d3373d4b75dc4e53e885d8c5e6e222dfa59feab962
+  md5: 363b56bd0a936dc789f017ef904d4c75
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.12,<3.13.0a0
+  - python >=3.12,<3.13.0a0 *_cpython
+  - python_abi 3.12.* *_cp312
+  constrains:
+  - cuda-nvrtc >=13,<14.0a0
+  - __cuda >=13.0
+  - libcufft >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcusolver >=12,<13.0a0
+  - libcublas >=13,<14.0a0
+  - libcusparse >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - cuda-version >=13,<14.0a0
+  - scipy >=1.7,<1.17
+  - libcurand >=10,<11.0a0
+  - optuna ~=3.0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 36623787
+  timestamp: 1757732346566
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py312hdcd7d0a_2.conda
+  sha256: bc3cf5f1f0b0b4653d573507087ee56bfa04900232133e87c9baebfe6a128612
+  md5: 07720f931f710f3d2061b0bdcb808b82
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.12,<3.13.0a0
+  - python >=3.12,<3.13.0a0 *_cpython
+  - python_abi 3.12.* *_cp312
+  constrains:
+  - cuda-nvrtc >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcusparse >=12,<13.0a0
+  - cuda-version >=12,<13.0a0
+  - libcurand >=10,<11.0a0
+  - libcufft >=11,<12.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - libcusolver >=11,<12.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - __cuda >=12.0
+  - libcublas >=12,<13.0a0
+  - optuna ~=3.0
+  - scipy >=1.7,<1.17
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 63865734
+  timestamp: 1757733078190
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py313h407dc6c_2.conda
+  sha256: 69a3cd03e492b76d29d66d83a533d132ffcb99f8f6831191ecb99e8372e8f76e
+  md5: ff191fa08a0238048035b0638e21220b
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python >=3.13,<3.14.0a0 *_cp313
+  - python_abi 3.13.* *_cp313
+  constrains:
+  - cuda-nvrtc >=13,<14.0a0
+  - scipy >=1.7,<1.17
+  - nccl >=2.27.7.1,<3.0a0
+  - libcurand >=10,<11.0a0
+  - cuda-version >=13,<14.0a0
+  - libcusparse >=12,<13.0a0
+  - __cuda >=13.0
+  - libcufft >=12,<13.0a0
+  - optuna ~=3.0
+  - libcublas >=13,<14.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcusolver >=12,<13.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 36458540
+  timestamp: 1757732319930
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py313h6b3a76b_2.conda
+  sha256: 004036b2bd95684b64ae4a56663988da4026136c57be580de080e137b4e48b43
+  md5: 0fb0ebad3a2eb9f4c860465c47955131
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python >=3.13,<3.14.0a0 *_cp313
+  - python_abi 3.13.* *_cp313
+  constrains:
+  - __cuda >=12.0
+  - nccl >=2.27.7.1,<3.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - scipy >=1.7,<1.17
+  - libcurand >=10,<11.0a0
+  - libcusparse >=12,<13.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  - cuda-version >=12,<13.0a0
+  - libcublas >=12,<13.0a0
+  - libcufft >=11,<12.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcusolver >=11,<12.0a0
+  - optuna ~=3.0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 63810519
+  timestamp: 1757733528386
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/cupy-core-13.6.0-py314heaf0aa5_2.conda
+  sha256: 354bc9f675b433909ff8854371e4b3606817dc304c0cd95c63d67d480ab80462
+  md5: c37b0c4ab40f2b23de6ab31042b2476a
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - fastrlock >=0.8.3,<0.9.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  - numpy >=1.22
+  - python >=3.14.0rc2,<3.15.0a0
+  - python >=3.14.0rc2,<3.15.0a0 *_cp314
+  - python_abi 3.14.* *_cp314
   constrains:
-  - cuda-nvrtc >=12,<13.0a0
-  - cupy >=13.6.0,<13.7.0a0
+  - scipy >=1.7,<1.17
   - libcusparse >=12,<13.0a0
+  - libcusolver >=12,<13.0a0
+  - nccl >=2.27.7.1,<3.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - cuda-version >=13,<14.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - cuda-nvrtc >=13,<14.0a0
+  - optuna ~=3.0
+  - libcufft >=12,<13.0a0
+  - __cuda >=13.0
+  - libcublas >=13,<14.0a0
+  - libcurand >=10,<11.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 36619576
+  timestamp: 1757732433081
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py310h867cfc4_2.conda
+  sha256: 115131c370be8c410fba05e78d83f197b4c48a8b30fd8a32c3bd8d9a3ad80215
+  md5: 2b22c1eb70d5f8f86c4babac37703437
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.10,<3.11.0a0
+  - python_abi 3.10.* *_cp310
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
   - cuda-version >=12,<13.0a0
+  - optuna ~=3.0
+  - scipy >=1.7,<1.17
+  - __cuda >=12.0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcublas >=12,<13.0a0
   - libcurand >=10,<11.0a0
-  - libcufft >=11,<12.0a0
-  - nccl >=2.27.7.1,<3.0a0
   - libcusolver >=11,<12.0a0
   - cutensor >=2.3.1.0,<3.0a0
-  - __cuda >=12.0
+  - libcusparse >=12,<13.0a0
+  - libcufft >=11,<12.0a0
+  - cuda-nvrtc >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 54351742
+  timestamp: 1757734211315
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py311h3f47771_2.conda
+  sha256: c874fd562f9750b468f009dfcc4ed6de6bb41ccc3bb6d65957ab3c1498613675
+  md5: 16602f6836e4e866c52a78a21feb1560
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.11,<3.12.0a0
+  - python_abi 3.11.* *_cp311
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
+  - libcusolver >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  - libcufft >=11,<12.0a0
+  - libcusparse >=12,<13.0a0
+  - cuda-version >=12,<13.0a0
   - libcublas >=12,<13.0a0
   - optuna ~=3.0
+  - cutensor >=2.3.1.0,<3.0a0
+  - cuda-nvrtc >=12,<13.0a0
   - scipy >=1.7,<1.17
+  - __cuda >=12.0
+  - cupy >=13.6.0,<13.7.0a0
   license: MIT
   license_family: MIT
   purls:
   - pkg:pypi/cupy?source=hash-mapping
-  size: 63865734
-  timestamp: 1757733078190
+  size: 54764492
+  timestamp: 1757734470749
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py312h7babc83_2.conda
+  sha256: 18582d52c9abd1e5008af3e4bd38552b8410713777066ac8024415c99d7a83e8
+  md5: 33050f8f5af87ae8cbfbb4e40de61fbf
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.12,<3.13.0a0
+  - python_abi 3.12.* *_cp312
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
+  - cupy >=13.6.0,<13.7.0a0
+  - libcurand >=10,<11.0a0
+  - cuda-version >=13,<14.0a0
+  - libcufft >=12,<13.0a0
+  - libcusolver >=12,<13.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - __cuda >=13.0
+  - scipy >=1.7,<1.17
+  - libcusparse >=12,<13.0a0
+  - cuda-nvrtc >=13,<14.0a0
+  - optuna ~=3.0
+  - libcublas >=13,<14.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 29913800
+  timestamp: 1757732657370
 - conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py312hc3434b0_2.conda
   sha256: 9aae135cb29962786adafa0b3bae094f9fce0b4ca386aaaa7d038ae518efcba6
   md5: 9e1c32b5b8172ae6666850b583355257
@@ -13033,6 +14091,96 @@ packages:
   - pkg:pypi/cupy?source=hash-mapping
   size: 54685402
   timestamp: 1757734676711
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py313ha16128a_2.conda
+  sha256: 7df3f437c45ba61754643a2c61f4e6c7c5b4be3bf58fa029d39e4fc8ddb7e54b
+  md5: 5a270c8af5e377ff40932ce8ec8472e3
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
+  - libcusolver >=11,<12.0a0
+  - cuda-version >=12,<13.0a0
+  - __cuda >=12.0
+  - scipy >=1.7,<1.17
+  - libcublas >=12,<13.0a0
+  - optuna ~=3.0
+  - cuda-nvrtc >=12,<13.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcusparse >=12,<13.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcufft >=11,<12.0a0
+  - libcurand >=10,<11.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 54751648
+  timestamp: 1757734626461
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py313haef2af9_2.conda
+  sha256: b207060087be5dcb79c533d4d160730f3a7de23d5e96253fe0770b1dc03cc124
+  md5: 60df31229f6e6084a0c8a7ee07976133
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.13,<3.14.0a0
+  - python_abi 3.13.* *_cp313
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
+  - __cuda >=13.0
+  - cuda-nvrtc >=13,<14.0a0
+  - scipy >=1.7,<1.17
+  - libcusolver >=12,<13.0a0
+  - libcublas >=13,<14.0a0
+  - libcusparse >=12,<13.0a0
+  - libcufft >=12,<13.0a0
+  - cuda-version >=13,<14.0a0
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcurand >=10,<11.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - optuna ~=3.0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 29792078
+  timestamp: 1757731883397
+- conda: https://conda.anaconda.org/conda-forge/win-64/cupy-core-13.6.0-py314hc101868_2.conda
+  sha256: f6533a698dd95c9d18efea957d3b524906f9fed4d69ba67b51b158be8aa51a64
+  md5: 06b1af7b5254c0864e82e1105f9f0f2e
+  depends:
+  - fastrlock >=0.8.3,<0.9.0a0
+  - numpy >=1.22
+  - python >=3.14.0rc2,<3.15.0a0
+  - python_abi 3.14.* *_cp314
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  constrains:
+  - cutensor >=2.3.1.0,<3.0a0
+  - libcublas >=13,<14.0a0
+  - cuda-version >=13,<14.0a0
+  - scipy >=1.7,<1.17
+  - cuda-nvrtc >=13,<14.0a0
+  - cupy >=13.6.0,<13.7.0a0
+  - libcurand >=10,<11.0a0
+  - libcusparse >=12,<13.0a0
+  - optuna ~=3.0
+  - libcufft >=12,<13.0a0
+  - __cuda >=13.0
+  - libcusolver >=12,<13.0a0
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/cupy?source=hash-mapping
+  size: 29836019
+  timestamp: 1757732178441
 - conda: https://conda.anaconda.org/conda-forge/noarch/distlib-0.4.0-pyhd8ed1ab_0.conda
   sha256: 6d977f0b2fc24fee21a9554389ab83070db341af6d6f09285360b2e09ef8b26e
   md5: 003b8ba0a94e2f1e117d0bd46aebc901
@@ -13076,6 +14224,38 @@ packages:
   - pkg:pypi/execnet?source=hash-mapping
   size: 39499
   timestamp: 1762974150770
+- conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py310h25320af_2.conda
+  sha256: 95eea806cb216036e4d0446fcff724c334c8899d02be2368a430ec5361ed29a4
+  md5: 8dbd4fc06661c78fdc2daedf23824bfe
+  depends:
+  - python
+  - libgcc >=14
+  - libstdcxx >=14
+  - libgcc >=14
+  - __glibc >=2.17,<3.0.a0
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 40665
+  timestamp: 1756729198132
+- conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py311hc665b79_2.conda
+  sha256: 5299a4aeaf04fbc2f8f46e707ae16c1f4e594905e6df18457f18ba002a886110
+  md5: ac18884886449ce97b76f8906462ff27
+  depends:
+  - python
+  - libgcc >=14
+  - libstdcxx >=14
+  - libgcc >=14
+  - __glibc >=2.17,<3.0.a0
+  - python_abi 3.11.* *_cp311
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 41082
+  timestamp: 1756729161435
 - conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py312h8285ef7_2.conda
   sha256: b0e5b19d2148816914920fe5c3148d5b5bf7c46bc34a2cac5124883bd1b83d05
   md5: 94fb93ec1751a3614d3a6f184832fd87
@@ -13090,24 +14270,154 @@ packages:
   license_family: MIT
   purls:
   - pkg:pypi/fastrlock?source=hash-mapping
-  size: 41672
-  timestamp: 1756729175159
-- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py312hf55c4e8_2.conda
-  sha256: 5c5cfaf55a0165c45ee63beb92abf4aa2ae1ef28d8064f7c884749ec4bd00a22
-  md5: 7ec9d6889be02f9bf66cfb9dd3112c8b
+  size: 41672
+  timestamp: 1756729175159
+- conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py313h5d5ffb9_2.conda
+  sha256: 30498ed45133f457fd9ed14d5fac6512347f05d11fe1ed89842c7dfdb516f78f
+  md5: 9bcbd351966dc56a24fc0c368da5ad99
+  depends:
+  - python
+  - __glibc >=2.17,<3.0.a0
+  - libstdcxx >=14
+  - libgcc >=14
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 41201
+  timestamp: 1756729160955
+- conda: https://conda.anaconda.org/conda-forge/linux-64/fastrlock-0.8.3-py314h8c728da_2.conda
+  sha256: 1ea4fd24e37d27692b04b57fa51f14fd2217ea251087ce1c0701af234c1452d9
+  md5: f1f936bb0ff435f3190ca1c17fa327e7
+  depends:
+  - python
+  - libstdcxx >=14
+  - libgcc >=14
+  - __glibc >=2.17,<3.0.a0
+  - libgcc >=14
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 41496
+  timestamp: 1756729160091
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py310heccc163_2.conda
+  sha256: aad519c924568a72bd4dcab74c793d4b09e339dce6bd3c5c027bd498eef7ccc4
+  md5: caafa6b88cc2cff22a72280c8f083a31
+  depends:
+  - python
+  - python 3.10.* *_cpython
+  - libgcc >=14
+  - libstdcxx >=14
+  - libgcc >=14
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 44918
+  timestamp: 1756729193056
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py311h8e4e6a5_2.conda
+  sha256: f186881661b83be9fb8d47c71340997b929fa5e0673ead9070082b8e390d6a73
+  md5: 9251413f2e3ea6eb586b21423f849536
+  depends:
+  - python
+  - libstdcxx >=14
+  - libgcc >=14
+  - python 3.11.* *_cpython
+  - libgcc >=14
+  - python_abi 3.11.* *_cp311
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 45171
+  timestamp: 1756729186510
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py312hf55c4e8_2.conda
+  sha256: 5c5cfaf55a0165c45ee63beb92abf4aa2ae1ef28d8064f7c884749ec4bd00a22
+  md5: 7ec9d6889be02f9bf66cfb9dd3112c8b
+  depends:
+  - python
+  - libgcc >=14
+  - python 3.12.* *_cpython
+  - libstdcxx >=14
+  - libgcc >=14
+  - python_abi 3.12.* *_cp312
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 45432
+  timestamp: 1756729166837
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py313h59403f9_2.conda
+  sha256: e28da81b99c8970e19e4f3ef7758a7a695263e0d3ff7d9fbdf232690bef6519d
+  md5: 59043167df894cee605e4cf470302bda
+  depends:
+  - python
+  - python 3.13.* *_cp313
+  - libgcc >=14
+  - libstdcxx >=14
+  - libgcc >=14
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 44449
+  timestamp: 1756729165562
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/fastrlock-0.8.3-py314h3642cf7_2.conda
+  sha256: 512662de1d9d4231feaf6f818014317dad4f2a60d8ef0d859f72116a69062583
+  md5: 685382bf317bd1d7f174e763c91d98a1
+  depends:
+  - python
+  - python 3.14.* *_cp314
+  - libstdcxx >=14
+  - libgcc >=14
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 44705
+  timestamp: 1756729193250
+- conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py310h699e580_2.conda
+  sha256: 57deb00090c09edc841a43499f23396bb35d51aa5aaa6886d4ae1d0ff969b3dd
+  md5: 3207527dea58c115e7e97856709465db
+  depends:
+  - python
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - python_abi 3.10.* *_cp310
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 36960
+  timestamp: 1756729187087
+- conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py311h5dfdfe8_2.conda
+  sha256: dd0a2552a36565545aedc65739ffc11574167c263340b32ff6314ce998168e08
+  md5: 4fb7d2650ac4a3967e8e57d68e801db3
   depends:
   - python
-  - libgcc >=14
-  - python 3.12.* *_cpython
-  - libstdcxx >=14
-  - libgcc >=14
-  - python_abi 3.12.* *_cp312
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - python_abi 3.11.* *_cp311
   license: MIT
   license_family: MIT
   purls:
   - pkg:pypi/fastrlock?source=hash-mapping
-  size: 45432
-  timestamp: 1756729166837
+  size: 37145
+  timestamp: 1756729198099
 - conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py312ha1a9051_2.conda
   sha256: bbcc17eb4acf110032fe8092d4e54a6d262b72d504597103e72a958fb248579f
   md5: b6ff9e7af087d51a24353f16d1a3ed06
@@ -13126,6 +14436,42 @@ packages:
   - pkg:pypi/fastrlock?source=hash-mapping
   size: 37498
   timestamp: 1756729168844
+- conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py313h927ade5_2.conda
+  sha256: 2a23cce182f04de8e522d47a9e41f9f9a85eb25a2d67d52356ce1d6522bbbe79
+  md5: 1fc8d6295c7ebff653118d2ba22cf226
+  depends:
+  - python
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - python_abi 3.13.* *_cp313
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 36385
+  timestamp: 1756729186432
+- conda: https://conda.anaconda.org/conda-forge/win-64/fastrlock-0.8.3-py314h8b4fd5f_2.conda
+  sha256: 1d341146022014b0f0d4b33630ba1757246dd6b5ecefdada0d49e6db774a18a9
+  md5: ac8c973aff08071df98933eccd5a7fa5
+  depends:
+  - python
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - python_abi 3.14.* *_cp314
+  license: MIT
+  license_family: MIT
+  purls:
+  - pkg:pypi/fastrlock?source=hash-mapping
+  size: 36661
+  timestamp: 1756729190828
 - pypi: https://files.pythonhosted.org/packages/5c/40/69ca9ea803303e14301fff9d4931b6d080b9603e134df0419c55e9764df4/filecheck-1.0.3-py3-none-any.whl
   name: filecheck
   version: 1.0.3
@@ -13975,6 +15321,32 @@ packages:
   purls: []
   size: 68079
   timestamp: 1765819124349
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.0.1.189-hd3aeb46_3.conda
+  sha256: a3c89c1b6018d16c22fc583887f728b3065a1f50a82d8a40a793a973aac606c5
+  md5: 626745031f369cf70670283436cc6742
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-nvrtc
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 233989011
+  timestamp: 1701931830910
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.2.5.6-hd3aeb46_0.conda
+  sha256: 7af6a21b53736b5a53c1044808ffd781a6ee1f0a66b618bf3c834a71bdb706aa
+  md5: c216c28589360a5acee904b480911c14
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-nvrtc
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 258710189
+  timestamp: 1702976169266
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-12.8.4.1-h9ab20c4_1.conda
   sha256: 3d3f7344db000feced2f9154cf0b3f3d245a1d317a1981e43b8b15f7baaaf6f1
   md5: 3ba4fd8bef181c020173d29ac67cae68
@@ -14027,6 +15399,47 @@ packages:
   purls: []
   size: 393920044
   timestamp: 1764897195935
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcublas-13.2.1.1-h676940d_0.conda
+  sha256: c38f5041d0a99d94cee17f26029e4c02f3247bfb39cbe12d8f2c3dcf5f656eaa
+  md5: f904a04f3e173de15d3c31bd3dfc21c7
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - cuda-nvrtc
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 376501341
+  timestamp: 1768276465220
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.0.1.189-hac28a21_3.conda
+  sha256: befa2389febbff1541fa2bf542c98b3b32f2c569c53fca95c439796224c0dae3
+  md5: b16ccbf9d633bdce9cf5b3363a468c41
+  depends:
+  - cuda-nvrtc
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 233886126
+  timestamp: 1701931743428
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.2.5.6-hac28a21_0.conda
+  sha256: 52ae33e756f22f7a82038a409d58ce52fa8a9c45896417662f101d83c36139a6
+  md5: b8f9003432a6b58e2bbd174910f9df84
+  depends:
+  - cuda-nvrtc
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 259896450
+  timestamp: 1702976080471
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-12.8.4.1-hd55a8e4_1.conda
   sha256: 7d10a5b2750faccc39dd66d28ca5b74cb618d3445ed8c933d51736dba2b7bcc4
   md5: 8d6b39fb6f62e3e1b278774c00b115ac
@@ -14088,6 +15501,48 @@ packages:
   purls: []
   size: 516220026
   timestamp: 1764897082131
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcublas-13.2.1.1-he38c790_0.conda
+  sha256: ba0e73bc783f6eb34770dbd2296c437b1b4c8ea888ac76beb2fe30643eb62883
+  md5: 295ab160a641ff6f42b9ba50669f7e1a
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - arm-variant * sbsa
+  - cuda-nvrtc
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 481495336
+  timestamp: 1768276502914
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.0.1.189-h63175ca_3.conda
+  sha256: d39c6d2e01dad4e9b06707f7343150e423042fd2c65cc5772333ab82d4132bb1
+  md5: c69ce5f6ea90ad064df6960636acaf15
+  depends:
+  - cuda-nvrtc
+  - cuda-version >=12.0,<12.1.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 247048799
+  timestamp: 1701932385460
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.2.5.6-h63175ca_0.conda
+  sha256: 2e0abbb96a9aefd0e6c284df7ca6223e48ee55304cb0fea72cd7db59489eac8e
+  md5: d695bf389c6314948a130aa6334c58c2
+  depends:
+  - cuda-nvrtc
+  - cuda-version >=12.2,<12.3.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 288422075
+  timestamp: 1702976743208
 - conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-12.8.4.1-he0c23c2_1.conda
   sha256: 7a4c53bbcf77c37033777acd1ff60b4664615ae67fff245718d43db422feac59
   md5: 626453d0b7f7b9f3c3a92e4398314714
@@ -14140,6 +15595,19 @@ packages:
   purls: []
   size: 388564116
   timestamp: 1764897124611
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcublas-13.2.1.1-hac47afa_0.conda
+  sha256: 0e7180aed3a41eff2c5a3df079abb3ea86612eea18f833febe858cebac0a3e96
+  md5: d56da2a29117df5d879594b5e58fc3a5
+  depends:
+  - cuda-nvrtc
+  - cuda-version >=13.1,<13.2.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 371899218
+  timestamp: 1768276556597
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcudnn-9.10.2.21-hf7e9902_0.conda
   sha256: dc6b89e874867b2cdf08224059bd1543cbb72ed646da177c1454596469c9a4bb
   md5: a178a1f3642521f104ecceeefa138d01
@@ -14293,6 +15761,30 @@ packages:
   purls: []
   size: 61127411
   timestamp: 1761105599209
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.0.0.21-hd3aeb46_2.conda
+  sha256: ed62279e20761c033525a550dc753327103f53aa37bf441c40db2f37950b7b50
+  md5: 5dbf17a732e01fed414a22bdf89aaaad
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 44795345
+  timestamp: 1701904310549
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.0.8.103-hd3aeb46_0.conda
+  sha256: af72a643d81c2401be7e5ccb8f2eb033e8254531ccd521101e9af8609817b5bf
+  md5: e6ca97f313721442e41e725ce7b3b75a
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 62856006
+  timestamp: 1702938780985
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-11.3.3.83-h5888daf_1.conda
   sha256: 1a38727a9666b7020ad844fd5074693b2c378d0161f58401d9f8488bdeb920a1
   md5: d0d12b6842be47267e3214e7ab2b1b02
@@ -14341,6 +15833,44 @@ packages:
   purls: []
   size: 192378644
   timestamp: 1764880073980
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcufft-12.1.0.78-hecca717_0.conda
+  sha256: 4f8951e942210116ee6e1548c25774009afddc59e494b5eac0e5ca539196d1b5
+  md5: 58a7aa38206ea03a9eb6ccbcc012901e
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 192379210
+  timestamp: 1768273636415
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.0.0.21-hac28a21_2.conda
+  sha256: c9647dedc5da9a60ca1d88e8f82a42e7b1837f3d2bccd294bb46b218795d498e
+  md5: cbd87df968670b2d4d752b22657591fe
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 44814329
+  timestamp: 1701904278310
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.0.8.103-hac28a21_0.conda
+  sha256: ba19464e72391d1f7b45b862fa93c8e87cb0821148ae36b91cadcb3833f35b57
+  md5: b7a1c44db1312dd191ff21ecd82076c5
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 62915617
+  timestamp: 1702938781901
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-11.3.3.83-h3ae8b8a_1.conda
   sha256: d5cb9df683d7ea22184714b5c0569a5decf0a332d81c241b60ff68599a5ccc06
   md5: 093577dd6d3b9be7d3f7a6ecb01dcf01
@@ -14394,6 +15924,44 @@ packages:
   purls: []
   size: 192843651
   timestamp: 1764880098927
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcufft-12.1.0.78-h8f3c8d4_0.conda
+  sha256: 82f4715e0c6aa59080531d816bb847e3096635625645fdd8046fa6c1d248ef2e
+  md5: 1bd80ebee861a876bdf7860d559f4866
+  depends:
+  - arm-variant * sbsa
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 192700443
+  timestamp: 1768273669731
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.0.0.21-h63175ca_2.conda
+  sha256: b06554c3106338de6dd85b6b697dfd27d823067adcf0e7236110fa0ea49cc6b9
+  md5: 403b53342b3588579e16772a18722739
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 44262157
+  timestamp: 1701904877029
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.0.8.103-h63175ca_0.conda
+  sha256: af4f043218b7584fe9c1d4f0cf40edfdfd01637fedbfaf100273a8ba131dafc0
+  md5: 3e0d3168dcaea961f6ffa665b0c27c40
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 63117431
+  timestamp: 1702939178613
 - conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-11.3.3.83-he0c23c2_1.conda
   sha256: 083ba1d13f5512dae13fd7e3785336d578bc66f01c88917bbf1f53923339a5e4
   md5: 6e4c0fa04966e643cbe847321bdeee54
@@ -14442,6 +16010,18 @@ packages:
   purls: []
   size: 192328577
   timestamp: 1764880153393
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcufft-12.1.0.78-hac47afa_0.conda
+  sha256: 7bf34c7298350325e0f23b2483f53e015fff446c03dd8d75c500cc5dbb5cee62
+  md5: a8ce534392102f2b3109dcee4702468a
+  depends:
+  - cuda-version >=13.1,<13.2.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 192328586
+  timestamp: 1768273720164
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcufile-1.13.1.3-h628e99a_1.conda
   sha256: 213f5df6ed25d19c4390666708a32ea457b1dcda64aca121f861b94671e2ed63
   md5: 9a97a35e7e63910013d638c389fa3514
@@ -14666,6 +16246,18 @@ packages:
   purls: []
   size: 43737577
   timestamp: 1764879942081
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcurand-10.4.1.81-h676940d_0.conda
+  sha256: bba28a650b35f221eaad9537df4a6f1d86b2fa617e52f56194ad2a959f84736c
+  md5: 5926fbc6df184a110130a310608cb5e8
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 43775293
+  timestamp: 1768273736749
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.3.1.50-hac28a21_1.conda
   sha256: 6c749658411c13e639977cce1da74dfacb693c4348fadffe09780c04fa4809b5
   md5: 72936062b7c649fc03b0a52e2ba54275
@@ -14749,6 +16341,21 @@ packages:
   purls: []
   size: 44154661
   timestamp: 1764879984766
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcurand-10.4.1.81-he38c790_0.conda
+  sha256: ef4300b83ea202e459e917a4f159478074fdc10c51f3061374361e9b89b6ba04
+  md5: b02eb8fbb430bd99f7a870382a91c24d
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - arm-variant * sbsa
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 44099763
+  timestamp: 1768273767993
 - conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.3.1.50-h63175ca_1.conda
   sha256: 3030074dcf96f4e397e4ba778d802900249a61388876cde06dc97257b2a2bc16
   md5: af9c9c9ae729b884dcc5dc48b3bb205a
@@ -14821,6 +16428,47 @@ packages:
   purls: []
   size: 46140551
   timestamp: 1764880079531
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcurand-10.4.1.81-hac47afa_0.conda
+  sha256: 807515b768161a684b097a6959fabd013fad813ca595b3fd25e9b53b0c796487
+  md5: 753cb0f8717a35b53215a18c009953b2
+  depends:
+  - cuda-version >=13.1,<13.2.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 46201230
+  timestamp: 1768273862521
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.4.2.57-hd3aeb46_2.conda
+  sha256: 65e4acdce5c358c57f0d263c87c39346695d0954855868bff60cb066043c7632
+  md5: a684e4ff8d2a6a100249377aa9d37a5c
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.0,<12.1.0a0
+  - libcublas >=12.0.1.189,<12.1.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 58748256
+  timestamp: 1701944344928
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.5.2.141-hd3aeb46_0.conda
+  sha256: a83322a1ede77e652acc3330d68f0428e28b198c3f7517bd3f1aeaf577232363
+  md5: 4ee6abbff18849a3036a1678771e4800
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.2,<12.3.0a0
+  - libcublas >=12.2.5.6,<12.3.0a0
+  - libcusparse >=12.1.2.141,<12.2.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 79957553
+  timestamp: 1703004799401
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-11.7.3.90-h9ab20c4_1.conda
   sha256: 868ba1b0b0ae15f7621ee960a459a74b9a17b69ba629c510a11bb37480e7b6df
   md5: 2d58a7eb9150525ea89195cf1bcfbc4c
@@ -14879,8 +16527,54 @@ packages:
   - libstdcxx >=14
   license: LicenseRef-NVIDIA-End-User-License-Agreement
   purls: []
-  size: 161086488
-  timestamp: 1764943396933
+  size: 161086488
+  timestamp: 1764943396933
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusolver-12.0.9.81-h676940d_0.conda
+  sha256: d6181d5fe7fbc36304577fbb50add02382ae9e7c6b1b598d310945bd12272f0b
+  md5: 17a342e69a0821ecf76a0e79a2044288
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - cuda-version >=13.1,<13.2.0a0
+  - libcublas >=13.2.1.1,<13.3.0a0
+  - libcusparse >=12.7.3.1,<12.8.0a0
+  - libgcc >=14
+  - libnvjitlink >=13.1.115,<14.0a0
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 161188241
+  timestamp: 1768286542683
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.4.2.57-hac28a21_2.conda
+  sha256: 83e01fddb31617623fc7475aa84db9efc0498cc76aca88e42e86f71442872f6c
+  md5: 7fbef3231f572b4b7c3bfe8efd6fcb5c
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - libcublas >=12.0.1.189,<12.1.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 58531702
+  timestamp: 1701944296106
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.5.2.141-hac28a21_0.conda
+  sha256: 8b84ec1fcee407676bc5dee930747ee6fd4b887d8a3f9ad69d98705260c7ae2d
+  md5: 4b628857805683900422fea3a166cd6f
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - libcublas >=12.2.5.6,<12.3.0a0
+  - libcusparse >=12.1.2.141,<12.2.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 79719976
+  timestamp: 1703004749317
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-11.7.3.90-hd55a8e4_1.conda
   sha256: 5016ad770146b3eb3739ee4213f82d3afed125626dbb77f0ee4b421cb9ab6d63
   md5: 7b044a3b61ea805e90e91f750c0e70dd
@@ -14950,6 +16644,53 @@ packages:
   purls: []
   size: 177727995
   timestamp: 1764943428002
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusolver-12.0.9.81-he38c790_0.conda
+  sha256: ce671884833cfed45128a7be1d6102242c394524a654b4ba3921ec49a856a6e7
+  md5: c1aa3d742409b794d096fcaf6aaf3c1a
+  depends:
+  - __glibc >=2.28,<3.0.a0
+  - arm-variant * sbsa
+  - cuda-version >=13.1,<13.2.0a0
+  - libcublas >=13.2.1.1,<13.3.0a0
+  - libcusparse >=12.7.3.1,<12.8.0a0
+  - libgcc >=14
+  - libnvjitlink >=13.1.115,<14.0a0
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 177825017
+  timestamp: 1768286571769
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.4.2.57-h63175ca_2.conda
+  sha256: 1486f5ced304b13ec1e8fb2af3e4134aeb8c1bc98d5c13c864c48c2f9e42cfa6
+  md5: 11f11b1971bd9a2e39eade3206c6e63a
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - libcublas >=12.0.1.189,<12.1.0a0
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 56552114
+  timestamp: 1701944947700
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.5.2.141-h63175ca_0.conda
+  sha256: 7073d934f6d2dd607a4f987efa2c2d16b0e68340db7637b8e98ff4a1004d3ca3
+  md5: 0ff5423da121b524f887e8f24c6a55df
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - libcublas >=12.2.5.6,<12.3.0a0
+  - libcusparse >=12.1.2.141,<12.2.0a0
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 77792716
+  timestamp: 1703005402425
 - conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-11.7.3.90-he0c23c2_1.conda
   sha256: c967651aab88a4a9a761be0b027b460c36850a9cd9df03890ce5bf833cef8c9f
   md5: 830a8909cfd5427f57b93ca6e468c1dd
@@ -15010,6 +16751,47 @@ packages:
   purls: []
   size: 156777611
   timestamp: 1764943590003
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusolver-12.0.9.81-hac47afa_0.conda
+  sha256: 660e6b88a56b9b125e9f3e44975baf75249bee32505960b7906c1e8ba84bc9e3
+  md5: 79dca8cbbf9f76e1b298f3538c6c3bb8
+  depends:
+  - cuda-version >=13.1,<13.2.0a0
+  - libcublas >=13.2.1.1,<13.3.0a0
+  - libcusparse >=12.7.3.1,<12.8.0a0
+  - libnvjitlink >=13.1.115,<14.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 156887400
+  timestamp: 1768286696520
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.0.0.76-hd3aeb46_2.conda
+  sha256: def44b0e57a59bc060bc69fb1c79c39cf281efe8980cd78840cb092ada5eda19
+  md5: 91072eaa64ea11a9f804547806dbacf0
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 98176542
+  timestamp: 1701931152417
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.1.2.141-hd3aeb46_0.conda
+  sha256: 48ab25898ae3315a9dce7f5a5ad2c1d5bce84c78c757f54dce4a43c65d436af4
+  md5: 3b4528c647c041ec53a883023ef4f054
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - libstdcxx-ng >=12
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 112121082
+  timestamp: 1702970684025
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.5.10.65-hecca717_2.conda
   sha256: 7b511549a22df408d36dadbeabdfd9c35b124d9d6f000b29ffcbe4b38b7faeb7
   md5: 890ebfaad48c887d3d82847ec9d6bc79
@@ -15062,6 +16844,47 @@ packages:
   purls: []
   size: 144184696
   timestamp: 1764886592758
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libcusparse-12.7.3.1-hecca717_0.conda
+  sha256: 86b31339206cb44c2cddeea4684de748d39ecc89c45c884a92e653f0af2986c6
+  md5: 915b747d67493ba94a0d9b79095cc06d
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libnvjitlink >=13.1.115,<14.0a0
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 145513192
+  timestamp: 1768280223267
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.0.0.76-hac28a21_2.conda
+  sha256: fa7b204b0b25ab4a61db98ef8c0d8ccc7d5fc158bcc89f95eedd4286af67ba9b
+  md5: 2d5bbfce1a53628178df9d711445cd60
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 98122549
+  timestamp: 1701931113993
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.1.2.141-hac28a21_0.conda
+  sha256: 50b0c4c09aa576dce40ae62fe45253b244fd01c4024b0efbc34bec3532db8ded
+  md5: 4cfd7e21691a81d22e483b08f384b594
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - libgcc-ng >=12
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - libstdcxx-ng >=12
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 112124005
+  timestamp: 1702970635167
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.5.10.65-h8f3c8d4_2.conda
   sha256: 9dbee8f1bfa9a876d24b12a34d4a022f33e584669c59bf93368b79d0bf55cd2f
   md5: 1e0731f3e9f303e6106a8fdd359a272e
@@ -15120,6 +16943,47 @@ packages:
   purls: []
   size: 160004278
   timestamp: 1764886666561
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcusparse-12.7.3.1-h8f3c8d4_0.conda
+  sha256: 85f20536cc261bf285ca2d9730d2b27669d862a38fa70a54a236d574be913f7b
+  md5: 73816ec8be4d675a1933cd0dc382382a
+  depends:
+  - arm-variant * sbsa
+  - cuda-version >=13.1,<13.2.0a0
+  - libgcc >=14
+  - libnvjitlink >=13.1.115,<14.0a0
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 161564363
+  timestamp: 1768280242337
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.0.0.76-h63175ca_2.conda
+  sha256: 7ac8438172e0712ae6e2ebe790f4a9117b1764a6a30f29513b0b4c6a36ae9211
+  md5: 18a3190fb1e98ce0765dca19a880997a
+  depends:
+  - cuda-version >=12.0,<12.1.0a0
+  - libnvjitlink >=12.0.76,<13.0.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 97602376
+  timestamp: 1701931624725
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.1.2.141-h63175ca_0.conda
+  sha256: d58adb5b76459c082c0c903ce798c9057b6c6e284b60117efc811b46b39abf96
+  md5: c689031410d83ceefe2c2299040f9de6
+  depends:
+  - cuda-version >=12.2,<12.3.0a0
+  - libnvjitlink >=12.2.140,<13.0.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.2,<15
+  - vc14_runtime >=14.29.30139
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 111548772
+  timestamp: 1702971058166
 - conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.5.10.65-hac47afa_2.conda
   sha256: fc911af27ab28af77d4b7203c6c9ebb15f4ddf27af8e8331d9a9983f4dd96483
   md5: 4e84a8282a9c1802ec4f516090164228
@@ -15172,6 +17036,19 @@ packages:
   purls: []
   size: 142426523
   timestamp: 1764886657256
+- conda: https://conda.anaconda.org/conda-forge/win-64/libcusparse-12.7.3.1-hac47afa_0.conda
+  sha256: 1ac52f373db5c5e00c1978f0bc6b2c2c576c80fba8801086ccb142d46eff0a4e
+  md5: 36a861ab5d2c5fd0a63395bbd6bab7d2
+  depends:
+  - cuda-version >=13.1,<13.2.0a0
+  - libnvjitlink >=13.1.115,<14.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 143956601
+  timestamp: 1768280260283
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda
   sha256: 1e1b08f6211629cbc2efe7a5bca5953f8f6b3cae0eeb04ca4dacee1bd4e2db2f
   md5: 8b09ae86839581147ef2e5c5e229d164
@@ -15578,6 +17455,7 @@ packages:
   constrains:
   - xz 5.8.2.*
   license: 0BSD
+  purls: []
   size: 113207
   timestamp: 1768752626120
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblzma-5.8.1-h86ecc28_2.conda
@@ -16096,6 +17974,18 @@ packages:
   purls: []
   size: 31218311
   timestamp: 1757021832026
+- conda: https://conda.anaconda.org/conda-forge/linux-64/libnvjitlink-13.1.115-hecca717_0.conda
+  sha256: 1ce8ac2f6fb3aaab065599f74b1e1bc68affc0804a081da239ab2c727abdc1cb
+  md5: 6cd0aefa03c679824ee5047ed39b0a09
+  depends:
+  - __glibc >=2.17,<3.0.a0
+  - cuda-version >=13,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 31331227
+  timestamp: 1768274146966
 - conda: https://conda.anaconda.org/conda-forge/linux-64/libnvjitlink-13.1.80-hecca717_0.conda
   sha256: 1ccfcadcd096e225a4d3a10c7d35363fa3ef02e97b54efb6ef50c8849aec4804
   md5: 12c045632ae898f40024b7a1d61fc100
@@ -16147,6 +18037,20 @@ packages:
   purls: []
   size: 29710724
   timestamp: 1757021907780
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libnvjitlink-13.1.115-h8f3c8d4_0.conda
+  sha256: 49ff65205602d2535586e646008ff0577a92bf6f16de9c4cc6a10473caf3d700
+  md5: e211b0e0846d538f23296214de1d35a6
+  depends:
+  - arm-variant * sbsa
+  - cuda-version >=13,<13.2.0a0
+  - libgcc >=14
+  - libstdcxx >=14
+  constrains:
+  - arm-variant * sbsa
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 29775481
+  timestamp: 1768274109937
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libnvjitlink-13.1.80-h8f3c8d4_0.conda
   sha256: 3ffb88544e6407cad2b82a9e6b405a28ba6d56d600f8f58c3b6cda62d844f94e
   md5: d69b83167de6fd594dcf3b93ef82cf90
@@ -16197,6 +18101,18 @@ packages:
   purls: []
   size: 27704690
   timestamp: 1757021910611
+- conda: https://conda.anaconda.org/conda-forge/win-64/libnvjitlink-13.1.115-hac47afa_0.conda
+  sha256: 7a07c089f3d58552caad6151a0aaa6366231078f4dec4c6b4bd15aa06490daf6
+  md5: 27d92a3cc46bebee72ad41931c8442f5
+  depends:
+  - cuda-version >=13,<13.2.0a0
+  - ucrt >=10.0.20348.0
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  license: LicenseRef-NVIDIA-End-User-License-Agreement
+  purls: []
+  size: 28186019
+  timestamp: 1768274186462
 - conda: https://conda.anaconda.org/conda-forge/win-64/libnvjitlink-13.1.80-hac47afa_0.conda
   sha256: e83551c06b6594ad5bc3eeeed09ead80607b422dee660657262e77fa26648d51
   md5: 792c82dd2a996b65970ec5789c43840f
@@ -17776,22 +19692,6 @@ packages:
   - pylint>=2.6.0 ; extra == 'dev'
   - pyink ; extra == 'dev'
   requires_python: '>=3.9'
-- pypi: https://files.pythonhosted.org/packages/79/2b/a826ba18d2179a56e144aef69e57fb2ab7c464ef0b2111940ee8a3a223a2/ml_dtypes-0.5.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
-  name: ml-dtypes
-  version: 0.5.4
-  sha256: 0d2ffd05a2575b1519dc928c0b93c06339eb67173ff53acb00724502cda231cf
-  requires_dist:
-  - numpy>=1.21
-  - numpy>=1.21.2 ; python_full_version >= '3.10'
-  - numpy>=1.23.3 ; python_full_version >= '3.11'
-  - numpy>=1.26.0 ; python_full_version >= '3.12'
-  - numpy>=2.1.0 ; python_full_version >= '3.13'
-  - absl-py ; extra == 'dev'
-  - pytest ; extra == 'dev'
-  - pytest-xdist ; extra == 'dev'
-  - pylint>=2.6.0 ; extra == 'dev'
-  - pyink ; extra == 'dev'
-  requires_python: '>=3.9'
 - pypi: https://files.pythonhosted.org/packages/a9/80/19189ea605017473660e43762dc853d2797984b3c7bf30ce656099add30c/ml_dtypes-0.5.4-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
   name: ml-dtypes
   version: 0.5.4
@@ -18462,7 +20362,7 @@ packages:
   timestamp: 1765466860567
 - conda: .
   name: numba-cuda
-  version: 0.25.0
+  version: 0.24.0
   build: py310h04c9772_0
   subdir: linux-64
   variants:
@@ -18484,7 +20384,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
+  version: 0.24.0
   build: py310h3ca6f64_0
   subdir: linux-aarch64
   variants:
@@ -18506,11 +20406,10 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
-  build: py310hf0cc224_0
+  version: 0.24.0
+  build: py310h5d23e43_0
   subdir: win-64
   variants:
-    cxx_compiler: vs2022
     python: 3.10.*
     target_platform: win-64
   depends:
@@ -18522,21 +20421,19 @@ packages:
   - cuda-python >=12.9,<14
   - cuda-pathfinder >=1.3.1,<2
   - cuda-cudart
-  - vc >=14.3,<15
-  - vc14_runtime >=14.44.35208
-  - ucrt >=10.0.20348.0
+  - vc >=14.1,<15
+  - vc14_runtime >=14.16.27033
   - python_abi 3.10.* *_cp310
   - numpy >=1.21,<3
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
-  build: py311h17f48b4_0
-  subdir: win-64
+  version: 0.24.0
+  build: py311h2894be0_0
+  subdir: linux-aarch64
   variants:
-    cxx_compiler: vs2022
     python: 3.11.*
-    target_platform: win-64
+    target_platform: linux-aarch64
   depends:
   - python
   - packaging
@@ -18546,20 +20443,19 @@ packages:
   - cuda-python >=12.9,<14
   - cuda-pathfinder >=1.3.1,<2
   - cuda-cudart
-  - vc >=14.3,<15
-  - vc14_runtime >=14.44.35208
-  - ucrt >=10.0.20348.0
+  - libstdcxx >=14
+  - libgcc >=14
   - python_abi 3.11.* *_cp311
   - numpy >=1.23,<3
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
-  build: py311h2894be0_0
-  subdir: linux-aarch64
+  version: 0.24.0
+  build: py311hb9e802a_0
+  subdir: win-64
   variants:
     python: 3.11.*
-    target_platform: linux-aarch64
+    target_platform: win-64
   depends:
   - python
   - packaging
@@ -18569,14 +20465,14 @@ packages:
   - cuda-python >=12.9,<14
   - cuda-pathfinder >=1.3.1,<2
   - cuda-cudart
-  - libstdcxx >=14
-  - libgcc >=14
+  - vc >=14.1,<15
+  - vc14_runtime >=14.16.27033
   - python_abi 3.11.* *_cp311
   - numpy >=1.23,<3
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
+  version: 0.24.0
   build: py311he8c1319_0
   subdir: linux-64
   variants:
@@ -18598,7 +20494,7 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
+  version: 0.24.0
   build: py312h3eebbd5_0
   subdir: linux-64
   variants:
@@ -18620,13 +20516,12 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
-  build: py312h61be6c2_0
-  subdir: win-64
+  version: 0.24.0
+  build: py312h8e85db0_0
+  subdir: linux-aarch64
   variants:
-    cxx_compiler: vs2022
     python: 3.12.*
-    target_platform: win-64
+    target_platform: linux-aarch64
   depends:
   - python
   - packaging
@@ -18636,20 +20531,19 @@ packages:
   - cuda-python >=12.9,<14
   - cuda-pathfinder >=1.3.1,<2
   - cuda-cudart
-  - vc >=14.3,<15
-  - vc14_runtime >=14.44.35208
-  - ucrt >=10.0.20348.0
+  - libstdcxx >=14
+  - libgcc >=14
   - python_abi 3.12.* *_cp312
   - numpy >=1.23,<3
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
-  build: py312h8e85db0_0
-  subdir: linux-aarch64
+  version: 0.24.0
+  build: py312ha067a5a_0
+  subdir: win-64
   variants:
     python: 3.12.*
-    target_platform: linux-aarch64
+    target_platform: win-64
   depends:
   - python
   - packaging
@@ -18659,14 +20553,14 @@ packages:
   - cuda-python >=12.9,<14
   - cuda-pathfinder >=1.3.1,<2
   - cuda-cudart
-  - libstdcxx >=14
-  - libgcc >=14
+  - vc >=14.1,<15
+  - vc14_runtime >=14.16.27033
   - python_abi 3.12.* *_cp312
   - numpy >=1.23,<3
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
+  version: 0.24.0
   build: py313h66129c8_0
   subdir: linux-aarch64
   variants:
@@ -18688,11 +20582,10 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
-  build: py313h96b86a2_0
+  version: 0.24.0
+  build: py313he80dd91_0
   subdir: win-64
   variants:
-    cxx_compiler: vs2022
     python: 3.13.*
     target_platform: win-64
   depends:
@@ -18704,15 +20597,14 @@ packages:
   - cuda-python >=12.9,<14
   - cuda-pathfinder >=1.3.1,<2
   - cuda-cudart
-  - vc >=14.3,<15
-  - vc14_runtime >=14.44.35208
-  - ucrt >=10.0.20348.0
+  - vc >=14.1,<15
+  - vc14_runtime >=14.16.27033
   - python_abi 3.13.* *_cp313
   - numpy >=1.23,<3
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
+  version: 0.24.0
   build: py313hf75ce08_0
   subdir: linux-64
   variants:
@@ -18734,13 +20626,12 @@ packages:
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
-  build: py314h3be3d12_0
-  subdir: win-64
+  version: 0.24.0
+  build: py314h59f3c06_0
+  subdir: linux-64
   variants:
-    cxx_compiler: vs2022
     python: 3.14.*
-    target_platform: win-64
+    target_platform: linux-64
   depends:
   - python
   - packaging
@@ -18750,20 +20641,19 @@ packages:
   - cuda-python >=12.9,<14
   - cuda-pathfinder >=1.3.1,<2
   - cuda-cudart
-  - vc >=14.3,<15
-  - vc14_runtime >=14.44.35208
-  - ucrt >=10.0.20348.0
+  - libstdcxx >=15
+  - libgcc >=15
   - python_abi 3.14.* *_cp314
   - numpy >=1.23,<3
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
-  build: py314h59f3c06_0
-  subdir: linux-64
+  version: 0.24.0
+  build: py314h625260f_0
+  subdir: win-64
   variants:
     python: 3.14.*
-    target_platform: linux-64
+    target_platform: win-64
   depends:
   - python
   - packaging
@@ -18773,14 +20663,14 @@ packages:
   - cuda-python >=12.9,<14
   - cuda-pathfinder >=1.3.1,<2
   - cuda-cudart
-  - libstdcxx >=15
-  - libgcc >=15
+  - vc >=14.1,<15
+  - vc14_runtime >=14.16.27033
   - python_abi 3.14.* *_cp314
   - numpy >=1.23,<3
   license: BSD-2-Clause
 - conda: .
   name: numba-cuda
-  version: 0.25.0
+  version: 0.24.0
   build: py314ha479ada_0
   subdir: linux-aarch64
   variants:
@@ -18800,21 +20690,6 @@ packages:
   - python_abi 3.14.* *_cp314
   - numpy >=1.23,<3
   license: BSD-2-Clause
-- pypi: https://files.pythonhosted.org/packages/10/a7/cfbe475c35371cae1358e61f20c5f075badc18c4797ab4354140e1d283cf/numpy-2.4.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
-  name: numpy
-  version: 2.4.1
-  sha256: 52b5f61bdb323b566b528899cc7db2ba5d1015bda7ea811a8bcf3c89c331fa42
-  requires_python: '>=3.11'
-- pypi: https://files.pythonhosted.org/packages/7e/bb/c6513edcce5a831810e2dddc0d3452ce84d208af92405a0c2e58fd8e7881/numpy-2.4.1-cp314-cp314-win_amd64.whl
-  name: numpy
-  version: 2.4.1
-  sha256: 7d5d7999df434a038d75a748275cd6c0094b0ecdb0837342b332a82defc4dc4d
-  requires_python: '>=3.11'
-- pypi: https://files.pythonhosted.org/packages/c0/c4/2e7908915c0e32ca636b92e4e4a3bdec4cb1e7eb0f8aedf1ed3c68a0d8cd/numpy-2.4.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl
-  name: numpy
-  version: 2.4.1
-  sha256: 5d558123217a83b2d1ba316b986e9248a1ed1971ad495963d555ccd75dcb1556
-  requires_python: '>=3.11'
 - conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.2.6-py310hefbff90_0.conda
   sha256: 0ba94a61f91d67413e60fa8daa85627a8f299b5054b0eff8f93d26da83ec755e
   md5: b0cea2c364bf65cd19e023040eeab05d
@@ -18915,6 +20790,25 @@ packages:
   - pkg:pypi/numpy?source=hash-mapping
   size: 8983076
   timestamp: 1766383421113
+- conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.4.2-py314h2b28147_0.conda
+  sha256: 80a1929df6252fe9a32f383c50f9ad18c38377843580902f9fb2906cd552ece1
+  md5: e5ff0b238f18fd2a2aca8ca068794df6
+  depends:
+  - python
+  - libgcc >=14
+  - libstdcxx >=14
+  - __glibc >=2.17,<3.0.a0
+  - python_abi 3.14.* *_cp314
+  - liblapack >=3.9.0,<4.0a0
+  - libblas >=3.9.0,<4.0a0
+  - libcblas >=3.9.0,<4.0a0
+  constrains:
+  - numpy-base <0a0
+  license: BSD-3-Clause
+  purls:
+  - pkg:pypi/numpy?source=hash-mapping
+  size: 8927060
+  timestamp: 1770020140979
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-2.2.6-py310h6e5608f_0.conda
   sha256: d7234b9c45e4863c7d4c5221c1e91d69b0e0009464bf361c3fea47e64dc4adc2
   md5: 9e9f1f279eb02c41bda162a42861adc0
@@ -19015,6 +20909,25 @@ packages:
   - pkg:pypi/numpy?source=hash-mapping
   size: 7815157
   timestamp: 1766383452981
+- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-2.4.2-py314haac167e_0.conda
+  sha256: 65206193d2f348a715ca190580bbca1917dcf1f3a2a56124584636b7ed6c3caa
+  md5: 02e3507a8fa2f34d5e44004d5e0f6423
+  depends:
+  - python
+  - libgcc >=14
+  - libstdcxx >=14
+  - python 3.14.* *_cp314
+  - libcblas >=3.9.0,<4.0a0
+  - python_abi 3.14.* *_cp314
+  - libblas >=3.9.0,<4.0a0
+  - liblapack >=3.9.0,<4.0a0
+  constrains:
+  - numpy-base <0a0
+  license: BSD-3-Clause
+  purls:
+  - pkg:pypi/numpy?source=hash-mapping
+  size: 8006748
+  timestamp: 1770020188593
 - conda: https://conda.anaconda.org/conda-forge/win-64/numpy-2.2.6-py310h4987827_0.conda
   sha256: 6f628e51763b86a535a723664e3aa1e38cb7147a2697f80b75c1980c1ed52f3e
   md5: d2596785ac2cf5bab04e2ee9e5d04041
@@ -19115,6 +21028,25 @@ packages:
   - pkg:pypi/numpy?source=compressed-mapping
   size: 7584934
   timestamp: 1766383321713
+- conda: https://conda.anaconda.org/conda-forge/win-64/numpy-2.4.2-py314h06c3c77_0.conda
+  sha256: a0e6059c41fd1fb6b991ba965cb6bee7514da40c1664ecddfeb09516f926e281
+  md5: a5691a20b4523bacc0947b3051f3fddb
+  depends:
+  - python
+  - vc >=14.3,<15
+  - vc14_runtime >=14.44.35208
+  - ucrt >=10.0.20348.0
+  - liblapack >=3.9.0,<4.0a0
+  - libblas >=3.9.0,<4.0a0
+  - python_abi 3.14.* *_cp314
+  - libcblas >=3.9.0,<4.0a0
+  constrains:
+  - numpy-base <0a0
+  license: BSD-3-Clause
+  purls:
+  - pkg:pypi/numpy?source=hash-mapping
+  size: 7309039
+  timestamp: 1770020044956
 - conda: https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.10.0-pyhcf101f3_0.conda
   sha256: 482d94fce136c4352b18c6397b9faf0a3149bfb12499ab1ffebad8db0cb6678f
   md5: 3aa4b625f20f55cf68e92df5e5bf3c39
@@ -19379,20 +21311,20 @@ packages:
   - pkg:pypi/psutil?source=hash-mapping
   size: 228170
   timestamp: 1767012382363
-- conda: https://conda.anaconda.org/conda-forge/linux-64/psutil-7.2.1-py314h3f2afee_0.conda
-  sha256: 3571148467c49837027099ec5c4bbb5473202917d66279a317f05896bd7586e7
-  md5: b2911c190fb8f5eb62be3a60adb105db
+- conda: https://conda.anaconda.org/conda-forge/linux-64/psutil-7.2.2-py314h0f05182_0.conda
+  sha256: f15574ed6c8c8ed8c15a0c5a00102b1efe8b867c0bd286b498cd98d95bd69ae5
+  md5: 4f225a966cfee267a79c5cb6382bd121
   depends:
   - python
   - libgcc >=14
   - __glibc >=2.17,<3.0.a0
-  - python_abi 3.14.* *_cp314t
+  - python_abi 3.14.* *_cp314
   license: BSD-3-Clause
   license_family: BSD
   purls:
-  - pkg:pypi/psutil?source=hash-mapping
-  size: 228971
-  timestamp: 1767012384426
+  - pkg:pypi/psutil?source=compressed-mapping
+  size: 231303
+  timestamp: 1769678156552
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/psutil-7.2.1-py310hef25091_0.conda
   sha256: d6deeea23c1c40be15d8ac4171f00ebac2a2028bb09152151ec3d0a479018f31
   md5: 6d96240ee0dcba494ab8ed1b8517bdf5
@@ -19890,9 +21822,10 @@ packages:
   size: 36790521
   timestamp: 1765021515427
   python_site_packages_path: lib/python3.14/site-packages
-- conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.2-he1279bd_0_cp314t.conda
-  sha256: 79a4be7901d977858bdf1b0024b30360d8448e30fe38bece903f855b21b88cf6
-  md5: 08a2a24f4e6907bea0ebfe22eecae6be
+- conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.2-h32b2ec7_101_cp314.conda
+  build_number: 101
+  sha256: 24719868a471dd94041aa9873c6f87adf3b86c07878ad4e242ac97228f9e6460
+  md5: 051f60a9d1e3aae7160d173aeb7029f8
   depends:
   - __glibc >=2.17,<3.0.a0
   - bzip2 >=1.0.8,<2.0a0
@@ -19900,25 +21833,23 @@ packages:
   - libexpat >=2.7.3,<3.0a0
   - libffi >=3.5.2,<3.6.0a0
   - libgcc >=14
-  - liblzma >=5.8.1,<6.0a0
+  - liblzma >=5.8.2,<6.0a0
   - libmpdec >=4.0.0,<5.0a0
-  - libsqlite >=3.51.1,<4.0a0
-  - libuuid >=2.41.2,<3.0a0
+  - libsqlite >=3.51.2,<4.0a0
+  - libuuid >=2.41.3,<3.0a0
   - libzlib >=1.3.1,<2.0a0
   - ncurses >=6.5,<7.0a0
   - openssl >=3.5.4,<4.0a0
-  - python_abi 3.14.* *_cp314t
-  - readline >=8.2,<9.0a0
+  - python_abi 3.14.* *_cp314
+  - readline >=8.3,<9.0a0
   - tk >=8.6.13,<8.7.0a0
   - tzdata
   - zstd >=1.5.7,<1.6.0a0
-  track_features:
-  - py_freethreading
   license: Python-2.0
   purls: []
-  size: 47658766
-  timestamp: 1765021403755
-  python_site_packages_path: lib/python3.14t/site-packages
+  size: 36833080
+  timestamp: 1769458770373
+  python_site_packages_path: lib/python3.14/site-packages
 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/python-3.10.19-h28be5d3_2_cpython.conda
   build_number: 2
   sha256: 9bdbc749cd9ee99ae4d72116aad5140e908fdf1215a417375f5e351f96372c77
@@ -20226,17 +22157,6 @@ packages:
   purls: []
   size: 6989
   timestamp: 1752805904792
-- conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314t.conda
-  build_number: 8
-  sha256: d9ed2538fba61265a330ee1b1afe99a4bb23ace706172b9464546c7e01259d63
-  md5: 3251796e09870c978e0f69fa05e38fb6
-  constrains:
-  - python 3.14.* *_cp314t
-  license: BSD-3-Clause
-  license_family: BSD
-  purls: []
-  size: 7020
-  timestamp: 1752805919426
 - conda: https://conda.anaconda.org/conda-forge/linux-64/pytorch-2.9.1-cuda129_mkl_py312_h2ff76c1_302.conda
   sha256: 3f27e7b4da22d6a6c61f16f3bf82b4766e128d8339380eca1ecb769a85a975de
   md5: 062a64a99d83ebf707bc5ee5fa32ff50
diff --git a/pixi.toml b/pixi.toml
index 0480032ae..d67064179 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -77,6 +77,7 @@ cffi = ">=1"
 pytest = ">=8,<9"
 pytest-xdist = ">=3.8"
 pytest-benchmark = ">=5.1"
+cupy = "*"
 
 [feature.test.pypi-dependencies]
 ml_dtypes = "*"
@@ -107,6 +108,23 @@ numpydoc = ">=1.9.0"
 [feature.docs.pypi-dependencies]
 nvidia-sphinx-theme = "*"
 
+[feature.test-cupy.dependencies]
+cupy = "*"
+
+[feature.test-sim.dependencies]
+# Simulator testing - same as test feature but without cupy
+make = "*"
+pre-commit = ">=4.3"
+psutil = ">=6"
+cffi = ">=1"
+pytest = ">=8,<9"
+pytest-xdist = ">=3.8"
+pytest-benchmark = ">=5.1"
+
+[feature.test-sim.pypi-dependencies]
+ml_dtypes = "*"
+filecheck = "*"
+
 [environments]
 default = { features = ["cu-13-1", "test", "cu", "cu-13", "cu-rt", "nvvm", "py314"], solve-group = "default" }
 dev = { features = ["ruff"], no-default-feature = true }
@@ -118,6 +136,7 @@ cu-12-0-py310 = { features = [
     "cu",
     "cu-12",
     "py310",
+    "test-cupy",
 ], solve-group = "cu-12-0-py310" }
 cu-12-0-py311 = { features = [
     "cu-12-0",
@@ -125,6 +144,7 @@ cu-12-0-py311 = { features = [
     "cu",
     "cu-12",
     "py311",
+    "test-cupy",
 ], solve-group = "cu-12-0-py311" }
 cu-12-2-py311 = { features = [
     "cu-12-2",
@@ -133,6 +153,7 @@ cu-12-2-py311 = { features = [
     "cu-12",
     "nvvm",
     "py311",
+    "test-cupy",
 ], solve-group = "cu-12-2-py311" }
 cu-12-8-py310 = { features = [
     "cu-12-8",
@@ -142,6 +163,7 @@ cu-12-8-py310 = { features = [
     "cu-rt",
     "nvvm",
     "py310",
+    "test-cupy",
 ], solve-group = "cu-12-8-py310" }
 cu-12-8-py311 = { features = [
     "cu-12-8",
@@ -151,6 +173,7 @@ cu-12-8-py311 = { features = [
     "cu-rt",
     "nvvm",
     "py311",
+    "test-cupy",
 ], solve-group = "cu-12-8-py311" }
 cu-12-8-py312 = { features = [
     "cu-12-8",
@@ -160,6 +183,7 @@ cu-12-8-py312 = { features = [
     "cu-rt",
     "nvvm",
     "py312",
+    "test-cupy",
 ], solve-group = "cu-12-8-py312" }
 cu-12-8-py313 = { features = [
     "cu-12-8",
@@ -169,6 +193,7 @@ cu-12-8-py313 = { features = [
     "cu-rt",
     "nvvm",
     "py313",
+    "test-cupy",
 ], solve-group = "cu-12-8-py313" }
 cu-12-9-py312 = { features = [
     "cu-12-9",
@@ -179,6 +204,7 @@ cu-12-9-py312 = { features = [
     "cu-rt",
     "nvvm",
     "py312",
+    "test-cupy",
 ], solve-group = "cu-12-9-py312" }
 # CUDA 13
 cu-13-0-py312 = { features = [
@@ -189,6 +215,7 @@ cu-13-0-py312 = { features = [
     "cu-rt",
     "nvvm",
     "py312",
+    "test-cupy",
 ], solve-group = "cu-13-0-py312" }
 cu-13-0-py313 = { features = [
     "cu-13-0",
@@ -198,6 +225,7 @@ cu-13-0-py313 = { features = [
     "cu-rt",
     "nvvm",
     "py313",
+    "test-cupy",
 ], solve-group = "cu-13-0-py313" }
 cu-13-0-py314 = { features = [
     "cu-13-0",
@@ -217,6 +245,16 @@ cu-13-1-py314 = { features = [
     "nvvm",
     "py314",
 ], solve-group = "cu-13-1-py314" }
+# Simulator (no CUDA required, no CuPy)
+sim-cu-12-8-py312 = { features = [
+    "cu-12-8",
+    "test-sim",
+    "cu",
+    "cu-12",
+    "cu-rt",
+    "nvvm",
+    "py312",
+], solve-group = "sim-cu-12-8-py312" }
 
 docs = { features = ["docs"], solve-group = "docs" }
 
@@ -283,7 +321,7 @@ NUMBA_ENABLE_CUDASIM = "1"
 
 [package]
 name = "numba-cuda"
-version = "0.26.0"
+version = "0.24.0"
 
 [package.build]
 backend = { name = "pixi-build-python", version = "*" }
diff --git a/pyproject.toml b/pyproject.toml
index 00ffce12f..2e56f5903 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,8 +46,16 @@ test = [
     "ml_dtypes",
     "statistics",
 ]
-test-cu12 = ["cuda-toolkit[curand]==12.*", { include-group = "test" }]
-test-cu13 = ["cuda-toolkit[curand]==13.*", { include-group = "test" }]
+test-cu12 = [
+    "cuda-toolkit[curand]==12.*",
+    "cupy-cuda12x; python_version<'3.14'",
+    { include-group = "test" }
+]
+test-cu13 = [
+    "cuda-toolkit[curand]==13.*",
+    "cupy-cuda13x; python_version<'3.14'",
+    { include-group = "test" }
+]
 
 [project.urls]
 Homepage = "https://nvidia.github.io/numba-cuda/"
diff --git a/testing/pytest.ini b/testing/pytest.ini
index 5815ecb45..cbdb7defe 100644
--- a/testing/pytest.ini
+++ b/testing/pytest.ini
@@ -26,4 +26,5 @@ filterwarnings =
     ignore:overflow encountered in scalar .+:RuntimeWarning
     ignore:.*Host array used in CUDA kernel will incur copy overhead.*:numba.cuda.core.errors.NumbaPerformanceWarning
     ignore:NVRTC log messages.*Architectures prior to.*are deprecated.*:UserWarning
+    ignore:CUDA path could not be detected.*:UserWarning:cupy._environment
     ignore:Benchmark machine_info is different:pytest_benchmark.logger.PytestBenchmarkWarning