diff --git a/ci/test-matrix.yml b/ci/test-matrix.yml index f96f86c27..2acae33ff 100644 --- a/ci/test-matrix.yml +++ b/ci/test-matrix.yml @@ -20,39 +20,62 @@ linux: pull-request: - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'v100', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'l4', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'rtxpro6000', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'l4', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'rtxpro6000', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'rtxpro6000', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', DRIVER: 'latest' } - - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'rtxpro6000', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' } + # - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' } + # - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' } + - { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' } + - { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' } + - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' } + - { ARCH: 'arm64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'a100', DRIVER: 'latest' } - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' } + - { ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' } + # - { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' } + # - { ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest' } nightly: [] special_runners: amd64: - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'H100', DRIVER: 'latest' } + - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'H100', DRIVER: 'latest' } windows: pull-request: - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'rtx2080', DRIVER: 'latest', DRIVER_MODE: 'WDDM' } - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'rtxpro6000', DRIVER: 'latest', DRIVER_MODE: 'TCC' } + - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'rtxpro6000', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'v100', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'rtx4090', DRIVER: 'latest', DRIVER_MODE: 'WDDM' } + - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'rtx4090', DRIVER: 'latest', DRIVER_MODE: 'WDDM' } - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest', DRIVER_MODE: 'TCC' } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'rtxpro6000', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } - - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'rtxpro6000', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } + - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.1.0', LOCAL_CTK: '0', GPU: 'rtxpro6000', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } + - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', DRIVER: 'latest', DRIVER_MODE: 'TCC' } + - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } + - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'l4', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } + # - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', DRIVER: 'latest', DRIVER_MODE: 'TCC' } + # - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } + # - { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.1.0', LOCAL_CTK: '1', GPU: 'a100', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } nightly: [] diff --git a/ci/versions.yml b/ci/versions.yml index 2780fbe87..e0531689d 100644 --- a/ci/versions.yml +++ b/ci/versions.yml @@ -3,6 +3,11 @@ cuda: build: + # Note: This is intentionally kept at 13.0.2 (the lowest 13.0 version) to generate + # test artifacts. We need to link with nvjitlink 13.0.2, and using a higher version + # like 13.1.0 to generate code while linking with nvjitlink 13.0.2 is not a supported + # use case (must use matching versions or always use the latest nvjitlink within a + # CUDA major version). version: "13.0.2" prev_build: version: "12.9.1" diff --git a/numba_cuda/numba/cuda/testing.py b/numba_cuda/numba/cuda/testing.py index 0858be48a..7906af27b 100644 --- a/numba_cuda/numba/cuda/testing.py +++ b/numba_cuda/numba/cuda/testing.py @@ -331,6 +331,45 @@ def skip_if_nvjitlink_missing(reason): return unittest.skipIf(not driver._have_nvjitlink(), reason) +def _is_nvjitlink_13_1_and_sm_120(): + """Check if nvjitlink version is 13.1 and compute capability is 120 (sm_120). + + sm_120 refers to compute capability 12.0, represented as the tuple (12, 0). + """ + if config.ENABLE_CUDASIM: + return False + + try: + from cuda.bindings import nvjitlink + + nvjitlink_ver = nvjitlink.version() + # Check if nvjitlink version is 13.1.x + if nvjitlink_ver[0] != 13 or nvjitlink_ver[1] != 1: + return False + + # Check if compute capability is 12.0 (sm_120) + cc = devices.get_context().device.compute_capability + if cc != (12, 0): + return False + + return True + except (ImportError, AttributeError, RuntimeError): + # ImportError: nvjitlink not available + # AttributeError: version() method missing + # RuntimeError: device context issues + return False + + +def skip_on_nvjitlink_13_1_sm_120(reason): + """Skip test when nvjitlink version is 13.1 and compute capability is sm_120. + + This is used to skip tests that fail at link time with nvjitlink 13.1 on sm_120 + GPUs (e.g., tests calling sum, mean, etc. in numba kernels). + """ + assert isinstance(reason, str) + return unittest.skipIf(_is_nvjitlink_13_1_and_sm_120(), reason) + + class ForeignArray(object): """ Class for emulating an array coming from another library through the CUDA diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py index caee1ba61..dddcbfd4b 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py @@ -4,7 +4,7 @@ from numba.cuda.tests.support import TestCase, MemoryLeakMixin from numba import cuda -from numba.cuda.testing import skip_on_cudasim +from numba.cuda.testing import skip_on_cudasim, skip_on_nvjitlink_13_1_sm_120 from numba.cuda.misc.special import literal_unroll from numba.cuda import config @@ -72,6 +72,9 @@ def kernel(out): kernel[1, 1](out) self.assertPreciseEqual(expected, out.copy_to_host()) + @skip_on_nvjitlink_13_1_sm_120( + "sum fails at link time on sm_120 + CUDA 13.1" + ) def test_sum_basic(self): arrays = ( np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]), @@ -99,6 +102,9 @@ def kernel(out): kernel[1, 1](out) self.assertPreciseEqual(expected, out.copy_to_host()) + @skip_on_nvjitlink_13_1_sm_120( + "mean fails at link time on sm_120 + CUDA 13.1" + ) def test_mean_basic(self): arrays = ( np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]), @@ -284,6 +290,9 @@ def kernel(out): kernel[1, 1](out) self.assertPreciseEqual(expected, out.copy_to_host()) + @skip_on_nvjitlink_13_1_sm_120( + "nanmean fails at link time on sm_120 + CUDA 13.1" + ) def test_nanmean_basic(self): arrays = ( np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]), @@ -309,6 +318,9 @@ def kernel(out): kernel[1, 1](out) self.assertPreciseEqual(expected, out.copy_to_host()) + @skip_on_nvjitlink_13_1_sm_120( + "nansum fails at link time on sm_120 + CUDA 13.1" + ) def test_nansum_basic(self): arrays = ( np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]), @@ -334,6 +346,9 @@ def kernel(out): kernel[1, 1](out) self.assertPreciseEqual(expected, out.copy_to_host()) + @skip_on_nvjitlink_13_1_sm_120( + "nanprod fails at link time on sm_120 + CUDA 13.1" + ) def test_nanprod_basic(self): arrays = ( np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5]),