Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
7748ef6
Rename test-thirdparty to test-thirdparty-cudf
gmarkall Nov 10, 2025
307e23a
Add nvmath-python tests
gmarkall Nov 10, 2025
9924815
Attempt to fix nvmath test script
gmarkall Nov 10, 2025
ce12ec6
There is no conda prefix when there's no conda
gmarkall Nov 10, 2025
78b8afe
Try installing nvidia-cutlass
gmarkall Nov 11, 2025
e23de8d
Correct typo in quotes
gmarkall Nov 11, 2025
a6899c5
Fail fast on nvmath tests and use native traceback while debuging
gmarkall Nov 11, 2025
f28dfe2
Generate Numba signature objects when Numba is in use
gmarkall Nov 12, 2025
4b513c3
Consider core Numba number domain and struct model in array type check
gmarkall Nov 12, 2025
cfc81d1
Install dx tests requirements too
gmarkall Nov 12, 2025
dc1ae2b
Add Dim3 and GridGroup to types module
gmarkall Nov 12, 2025
6ab470b
Try only installing nvidia-mthdx directly
gmarkall Nov 12, 2025
f9e0af7
Add thirdparty tests of Awkward Array
gmarkall Nov 12, 2025
f7f7164
Use correct wheel for CuPy
gmarkall Nov 12, 2025
baf15cd
Correct typo in awkward Github URL
gmarkall Nov 12, 2025
7897334
Disable benchmarks for awkward tests
gmarkall Nov 12, 2025
e85e8d7
Don't run awkward tests with -v because it makes the log huge
gmarkall Nov 12, 2025
06e3eac
Merge remote-tracking branch 'NVIDIA/main' into thirdparty-tests
gmarkall Nov 17, 2025
fa92cb2
Only run nvmath tests on push to main
gmarkall Nov 17, 2025
9e5c87e
Attempt to make pr job succeed when nvmath-python tests skipped
gmarkall Nov 18, 2025
ed99309
Add in a dedicated workflow for merged commits
cryos Nov 19, 2025
ae4f2d1
Merge remote-tracking branch 'NVIDIA/main' into thirdparty-tests
gmarkall Nov 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 148 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD-2-Clause

name: ci

on:
push:
branches:
- "main"

jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd
with:
extra_args: --all-files --show-diff-on-failure
compute-matrix:
runs-on: ubuntu-latest
outputs:
BUILD_MATRIX: ${{ steps.compute-matrix.outputs.BUILD_MATRIX }}
SIMULATOR_MATRIX: ${{ steps.compute-matrix.outputs.SIMULATOR_MATRIX }}
TEST_MATRIX: ${{ steps.compute-matrix.outputs.TEST_MATRIX }}
steps:
- uses: actions/checkout@v5
- name: Compute Matrices
id: compute-matrix
run: |
BUILD_MATRIX="$(yq '.build-matrix' ci/matrix.yml)"
SIMULATOR_MATRIX="$(yq '.simulator-matrix' ci/matrix.yml)"
TEST_MATRIX="$(yq '.test-matrix' ci/matrix.yml)"
{
echo 'BUILD_MATRIX<<EOF'
echo "${BUILD_MATRIX}"
echo 'EOF'
echo 'SIMULATOR_MATRIX<<EOF'
echo "${SIMULATOR_MATRIX}"
echo 'EOF'
echo 'TEST_MATRIX<<EOF'
echo "${TEST_MATRIX}"
echo 'EOF'
} >> "${GITHUB_OUTPUT}"
build-conda:
needs:
- compute-matrix
uses: ./.github/workflows/conda-python-build.yaml
with:
build_type: pull-request
script: "ci/build_conda.sh"
matrix: ${{ needs.compute-matrix.outputs.BUILD_MATRIX }}
test-conda:
needs:
- compute-matrix
uses: ./.github/workflows/conda-python-tests.yaml
with:
build_type: pull-request
script: "ci/test_conda.sh"
run_codecov: false
matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
test-simulator:
needs:
- compute-matrix
uses: ./.github/workflows/simulator-test.yaml
with:
build_type: pull-request
script: "ci/test_simulator.sh"
matrix: ${{ needs.compute-matrix.outputs.SIMULATOR_MATRIX }}
build-wheels:
needs:
- compute-matrix
uses: ./.github/workflows/wheels-build.yaml
with:
build_type: pull-request
script: "ci/build_wheel.sh"
matrix: ${{ needs.compute-matrix.outputs.BUILD_MATRIX }}
build-wheels-windows:
uses: ./.github/workflows/wheel-windows-build.yaml
test-wheels-windows:
needs:
- build-wheels-windows
uses: ./.github/workflows/wheel-windows-tests.yaml
test-wheels:
needs:
- build-wheels
- compute-matrix
uses: ./.github/workflows/wheels-test.yaml
with:
build_type: pull-request
script: "ci/test_wheel.sh false"
matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
test-wheels-deps-wheels:
needs:
- build-wheels
- compute-matrix
uses: ./.github/workflows/wheels-test.yaml
with:
build_type: pull-request
script: "ci/test_wheel_deps_wheels.sh"
matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber >= 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
test-thirdparty-cudf:
needs:
- build-wheels
- compute-matrix
uses: ./.github/workflows/wheels-test.yaml
with:
build_type: pull-request
script: "ci/test_thirdparty_cudf.sh"
matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
# TODO: Enable for CUDA 13 when a supporting version of cuDF is available
matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber == 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
test-thirdparty-nvmath:
needs:
- build-wheels
- compute-matrix
uses: ./.github/workflows/wheels-test.yaml
with:
build_type: pull-request
script: "ci/test_thirdparty_nvmath.sh"
matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
# TODO: Enable for CUDA 13 when a supporting version of nvmath-python is available
matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber == 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
test-thirdparty-awkward:
needs:
- build-wheels
- compute-matrix
uses: ./.github/workflows/wheels-test.yaml
with:
build_type: pull-request
script: "ci/test_thirdparty_awkward.sh"
matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
# TODO: Enable for CUDA 13 in future
matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber == 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))

build-docs:
uses: ./.github/workflows/docs-build.yaml
coverage-report:
needs:
- build-wheels
- compute-matrix
uses: ./.github/workflows/wheels-test.yaml
with:
build_type: pull-request
script: "ci/coverage_report.sh"
matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
matrix_filter: 'map(select(.ARCH == "amd64" and .CUDA_VER == "12.9.1" and .PY_VER == "3.12")) | .[0:1]'
20 changes: 16 additions & 4 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ on:
push:
branches:
- "pull-request/[0-9]+"
- "main"

concurrency:
group: >-
Expand All @@ -31,7 +30,8 @@ jobs:
- test-wheels-windows
- test-wheels
- test-wheels-deps-wheels
- test-thirdparty
- test-thirdparty-cudf
- test-thirdparty-awkward
- build-docs
- coverage-report
secrets: inherit
Expand Down Expand Up @@ -127,17 +127,29 @@ jobs:
script: "ci/test_wheel_deps_wheels.sh"
matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber >= 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
test-thirdparty:
test-thirdparty-cudf:
needs:
- build-wheels
- compute-matrix
uses: ./.github/workflows/wheels-test.yaml
with:
build_type: pull-request
script: "ci/test_thirdparty.sh"
script: "ci/test_thirdparty_cudf.sh"
matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
# TODO: Enable for CUDA 13 when a supporting version of cuDF is available
matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber == 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
test-thirdparty-awkward:
needs:
- build-wheels
- compute-matrix
uses: ./.github/workflows/wheels-test.yaml
with:
build_type: pull-request
script: "ci/test_thirdparty_awkward.sh"
matrix: ${{ needs.compute-matrix.outputs.TEST_MATRIX }}
# TODO: Enable for CUDA 13 in future
matrix_filter: map(select(.ARCH == "amd64" and (.CUDA_VER | split(".") | .[0] | tonumber == 12))) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))

build-docs:
uses: ./.github/workflows/docs-build.yaml
coverage-report:
Expand Down
42 changes: 42 additions & 0 deletions ci/test_thirdparty_awkward.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD-2-Clause

set -euo pipefail

CUDA_VER_MAJOR_MINOR=${CUDA_VER%.*}
AWKWARD_VERSION="2.8.10"

rapids-logger "Install awkward and related libraries"

pip install awkward==${AWKWARD_VERSION} cupy-cuda12x pyarrow pandas nox

rapids-logger "Install wheel with test dependencies"
package=$(realpath wheel/numba_cuda*.whl)
echo "Package path: ${package}"
python -m pip install \
"${package}" \
"cuda-python==${CUDA_VER_MAJOR_MINOR%.*}.*" \
"cuda-core==0.3.*" \
"nvidia-nvjitlink-cu12" \
--group test


rapids-logger "Clone awkward repository"
git clone --recursive https://github.com/scikit-hep/awkward.git
pushd awkward
git checkout v${AWKWARD_VERSION}

rapids-logger "Generate awkward tests"
nox -s prepare -- --tests

rapids-logger "Check GPU usage"
nvidia-smi

rapids-logger "Show Numba system info"
python -m numba --sysinfo

rapids-logger "Run Awkward CUDA tests"
python -m pytest -n auto --benchmark-disable tests-cuda tests-cuda-kernels tests-cuda-kernels-explicit

popd
File renamed without changes.
55 changes: 55 additions & 0 deletions ci/test_thirdparty_nvmath.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD-2-Clause

set -euo pipefail

CUDA_VER_MAJOR_MINOR=${CUDA_VER%.*}

NVMATH_PYTHON_VERSION="0.6.0"
# The commit on Github corresponding to 0.6.0
NVMATH_PYTHON_SHA="6bddfa71c39c07804127adeb23f5b0d2168ae38c"

rapids-logger "Install nvmath-python"

pip install nvmath-python[cu12,dx]==${NVMATH_PYTHON_VERSION}

rapids-logger "Remove Extraneous numba-cuda"
pip uninstall -y numba-cuda

rapids-logger "Install wheel with test dependencies"
package=$(realpath wheel/numba_cuda*.whl)
echo "Package path: ${package}"
python -m pip install \
"${package}" \
"cuda-python==${CUDA_VER_MAJOR_MINOR%.*}.*" \
"cuda-core==0.3.*" \
"nvidia-nvjitlink-cu12" \
--group test


rapids-logger "Shallow clone nvmath-python repository"
git clone https://github.com/NVIDIA/nvmath-python.git
pushd nvmath-python
git checkout ${NVMATH_PYTHON_SHA}

rapids-logger "Install nvmath-python test dependencies"
pip install -r requirements/pip/tests.txt
pip install nvidia-mathdx
pip install nvidia-cutlass

rapids-logger "Check GPU usage"
nvidia-smi

rapids-logger "Show Numba system info"
python -m numba --sysinfo

rapids-logger "Run nvmath-python device tests"
pushd tests
# Required for nvmath-python to locate pip-install MathDx
export SYS_PREFIX=`python -c "import sys; print(sys.prefix)"`
export MATHDX_HOME=${SYS_PREFIX}/lib/python3.13/site-packages/nvidia/mathdx
python -m pytest nvmath_tests/device --tb=native -x
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RE: clogging up CI, did you try scaling out with multiple pytest workers?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did, but the tests use a lot of memory so it's not easy to find a sweet spot that speeds things up and doesn't OOM.


popd
popd
16 changes: 13 additions & 3 deletions numba_cuda/numba/cuda/cudaimpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from numba.cuda.core.imputils import Registry
from numba.cuda.typing.npydecl import parse_dtype
from numba.cuda.datamodel import models
from numba.cuda.datamodel.models import StructModel
from numba.cuda import types
from numba.cuda import cgutils
from numba.cuda.np import ufunc_db
Expand All @@ -21,6 +21,10 @@
from numba.cuda import nvvmutils, stubs
from numba.cuda.types.ext_types import dim3, CUDADispatcher

if cuda.HAS_NUMBA:
from numba.core.datamodel.models import StructModel as CoreStructModel
from numba.core import types as core_types

registry = Registry("cudaimpl")
lower = registry.lower
lower_attr = registry.lower_getattr
Expand Down Expand Up @@ -880,13 +884,19 @@ def _generic_array(
raise ValueError("array length <= 0")

# Check that we support the requested dtype
number_domain = types.number_domain
struct_model_types = (StructModel,)
if cuda.HAS_NUMBA:
number_domain |= core_types.number_domain
struct_model_types = (StructModel, CoreStructModel)

data_model = context.data_model_manager[dtype]
other_supported_type = (
isinstance(dtype, (types.Record, types.Boolean))
or isinstance(data_model, models.StructModel)
or isinstance(data_model, struct_model_types)
or dtype == types.float16
)
if dtype not in types.number_domain and not other_supported_type:
if dtype not in number_domain and not other_supported_type:
raise TypeError("unsupported type: %s" % dtype)

lldtype = context.get_data_type(dtype)
Expand Down
4 changes: 3 additions & 1 deletion numba_cuda/numba/cuda/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .npytypes import *
from .scalars import *
from .function_type import *
from .ext_types import bfloat16, dim3, grid_group
from .ext_types import bfloat16, dim3, grid_group, GridGroup, Dim3

numpy_version = tuple(map(int, np.__version__.split(".")[:2]))

Expand Down Expand Up @@ -222,6 +222,8 @@
bfloat16
dim3
grid_group
Dim3
GridGroup
"""


Expand Down
6 changes: 5 additions & 1 deletion numba_cuda/numba/cuda/typing/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,11 @@ def generic(self):
def signature(return_type, *args, **kws):
recvr = kws.pop("recvr", None)
assert not kws
return Signature(return_type, args, recvr=recvr)
if HAS_NUMBA:
signature_class = CoreSignature
else:
signature_class = Signature
return signature_class(return_type, args, recvr=recvr)


def fold_arguments(
Expand Down
Loading