Skip to content
Merged
29 changes: 26 additions & 3 deletions ci/test-matrix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,39 +20,62 @@ linux:
pull-request:
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'v100', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'rtxpro6000', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'rtxpro6000', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'rtxpro6000', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'rtxpro6000', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' }
# - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' }
# - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
# - { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
# - { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' }
nightly: []
special_runners:
amd64:
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'H100', DRIVER: 'latest' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'H100', DRIVER: 'latest' }

windows:
pull-request:
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'rtx2080', DRIVER: 'latest', DRIVER_MODE: 'WDDM' }
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'rtxpro6000', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'rtxpro6000', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'v100', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'rtx4090', DRIVER: 'latest', DRIVER_MODE: 'WDDM' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'rtx4090', DRIVER: 'latest', DRIVER_MODE: 'WDDM' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'rtxpro6000', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'rtxpro6000', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'rtxpro6000', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
# - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
# - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
# - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
nightly: []
5 changes: 5 additions & 0 deletions ci/versions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@

cuda:
build:
# Note: This is intentionally kept at 13.0.2 (the lowest 13.0 version) to generate
# test artifacts. We need to link with nvjitlink 13.0.2, and using a higher version
# like 13.1.0 to generate code while linking with nvjitlink 13.0.2 is not a supported
# use case (must use matching versions or always use the latest nvjitlink within a
# CUDA major version).
version: "13.0.2"
prev_build:
version: "12.9.1"
39 changes: 39 additions & 0 deletions numba_cuda/numba/cuda/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,45 @@ def skip_if_nvjitlink_missing(reason):
return unittest.skipIf(not driver._have_nvjitlink(), reason)


def _is_nvjitlink_13_1_and_sm_120():
"""Check if nvjitlink version is 13.1 and compute capability is 120 (sm_120).

sm_120 refers to compute capability 12.0, represented as the tuple (12, 0).
"""
if config.ENABLE_CUDASIM:
return False

try:
from cuda.bindings import nvjitlink

nvjitlink_ver = nvjitlink.version()
# Check if nvjitlink version is 13.1.x
if nvjitlink_ver[0] != 13 or nvjitlink_ver[1] != 1:
return False

# Check if compute capability is 12.0 (sm_120)
cc = devices.get_context().device.compute_capability
if cc != (12, 0):
return False

return True
except (ImportError, AttributeError, RuntimeError):
# ImportError: nvjitlink not available
# AttributeError: version() method missing
# RuntimeError: device context issues
return False


def skip_on_nvjitlink_13_1_sm_120(reason):
"""Skip test when nvjitlink version is 13.1 and compute capability is sm_120.

This is used to skip tests that fail at link time with nvjitlink 13.1 on sm_120
GPUs (e.g., tests calling sum, mean, etc. in numba kernels).
"""
assert isinstance(reason, str)
return unittest.skipIf(_is_nvjitlink_13_1_and_sm_120(), reason)


class ForeignArray(object):
"""
Class for emulating an array coming from another library through the CUDA
Expand Down
17 changes: 16 additions & 1 deletion numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from numba.cuda.tests.support import TestCase, MemoryLeakMixin
from numba import cuda
from numba.cuda.testing import skip_on_cudasim
from numba.cuda.testing import skip_on_cudasim, skip_on_nvjitlink_13_1_sm_120
from numba.cuda.misc.special import literal_unroll
from numba.cuda import config

Expand Down Expand Up @@ -72,6 +72,9 @@ def kernel(out):
kernel[1, 1](out)
self.assertPreciseEqual(expected, out.copy_to_host())

@skip_on_nvjitlink_13_1_sm_120(
"sum fails at link time on sm_120 + CUDA 13.1"
)
def test_sum_basic(self):
arrays = (
np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
Expand Down Expand Up @@ -99,6 +102,9 @@ def kernel(out):
kernel[1, 1](out)
self.assertPreciseEqual(expected, out.copy_to_host())

@skip_on_nvjitlink_13_1_sm_120(
"mean fails at link time on sm_120 + CUDA 13.1"
)
def test_mean_basic(self):
arrays = (
np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
Expand Down Expand Up @@ -284,6 +290,9 @@ def kernel(out):
kernel[1, 1](out)
self.assertPreciseEqual(expected, out.copy_to_host())

@skip_on_nvjitlink_13_1_sm_120(
"nanmean fails at link time on sm_120 + CUDA 13.1"
)
def test_nanmean_basic(self):
arrays = (
np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
Expand All @@ -309,6 +318,9 @@ def kernel(out):
kernel[1, 1](out)
self.assertPreciseEqual(expected, out.copy_to_host())

@skip_on_nvjitlink_13_1_sm_120(
"nansum fails at link time on sm_120 + CUDA 13.1"
)
def test_nansum_basic(self):
arrays = (
np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
Expand All @@ -334,6 +346,9 @@ def kernel(out):
kernel[1, 1](out)
self.assertPreciseEqual(expected, out.copy_to_host())

@skip_on_nvjitlink_13_1_sm_120(
"nanprod fails at link time on sm_120 + CUDA 13.1"
)
def test_nanprod_basic(self):
arrays = (
np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),
Expand Down
Loading