Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
c1f1cec
copy/paste minimal CI infra from cuda-python
leofang Nov 20, 2025
85566c7
tailor for numba-cuda
leofang Nov 20, 2025
1b939c3
enable cibuildwheel support
leofang Nov 20, 2025
950078e
skip py314 and py314t for now
leofang Nov 20, 2025
6fa44fc
nit: fix typo
leofang Nov 20, 2025
593fcf3
make Linux executables discoverable
leofang Nov 21, 2025
f183ebe
include cuobjdump
leofang Nov 21, 2025
72da422
update makefile to accommodate for the new CI env
leofang Nov 21, 2025
a973726
suppress NVRTC warnings on V100 + CUDA 12
leofang Dec 1, 2025
df8f583
libcudadevrt.a is expected to be in lib64 :(
leofang Dec 1, 2025
861eede
ensure tests that need cuobjdump can be skipped
leofang Dec 1, 2025
6910ebf
Merge branch 'main' into new_ci
leofang Dec 1, 2025
f40af54
cosmetic changes
leofang Dec 1, 2025
55658b4
Merge branch 'main' into new_ci
leofang Dec 5, 2025
d724e26
cover sm120 in tests
leofang Dec 5, 2025
38c4bec
nits
leofang Dec 5, 2025
b752720
fix
leofang Dec 5, 2025
9f498d1
fix dll patching point
leofang Dec 6, 2025
e17ef86
Merge branch 'main' into new_ci
leofang Dec 8, 2025
4536f80
chore: bump pixi lock file
cpcloud Dec 8, 2025
da0ac2b
expose minor ver as env var
leofang Dec 10, 2025
4b0d8a1
Constrain wheel major.minor vers
leofang Dec 10, 2025
c9723d5
ensure cuda-toolkit can be used to constrain version
leofang Dec 10, 2025
568de03
Merge branch 'main' into new_ci
brandon-b-miller Dec 12, 2025
61bafec
small fixes
brandon-b-miller Dec 12, 2025
ea2c4f5
fix UB
brandon-b-miller Dec 16, 2025
d63e022
Merge branch 'main' into new_ci
brandon-b-miller Dec 16, 2025
b101eda
restore lockfile
brandon-b-miller Dec 16, 2025
4c4c743
regenerate pixi lockfile
brandon-b-miller Dec 16, 2025
5b6af61
don't use sccache when targeting .a
brandon-b-miller Dec 16, 2025
78817cb
Merge branch 'main' into new_ci
brandon-b-miller Dec 16, 2025
8a0dc9a
test changes from PR 591
brandon-b-miller Dec 16, 2025
606cc7a
small fix
brandon-b-miller Dec 16, 2025
6f24a30
Revert "test changes from PR 591"
brandon-b-miller Dec 16, 2025
28e51a1
Revert "small fix"
brandon-b-miller Dec 16, 2025
e2f8e0c
Merge branch 'main' into new_ci
brandon-b-miller Dec 16, 2025
506d6b8
skip cc>10, cuda12 atomics tests
brandon-b-miller Dec 17, 2025
8e96a09
Merge branch 'main' into new_ci
brandon-b-miller Dec 17, 2025
64068b7
reason
brandon-b-miller Dec 17, 2025
48cca14
10->12
brandon-b-miller Dec 17, 2025
a6b513d
test skip
brandon-b-miller Dec 17, 2025
ccd8382
Merge branch 'main' into new_ci
brandon-b-miller Dec 17, 2025
476082b
fix skip condition
brandon-b-miller Dec 17, 2025
822b37d
complex atomic skips
brandon-b-miller Dec 17, 2025
cafdf68
Merge branch 'main' into new_ci
brandon-b-miller Dec 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 193 additions & 0 deletions .github/actions/fetch_ctk/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

name: Fetch mini CTK

description: Fetch (or create) a mini CUDA Toolkit from cache

inputs:
host-platform:
required: true
type: string
cuda-version:
required: true
type: string
cuda-components:
description: "A list of the CTK components to install as a comma-separated list. e.g. 'cuda_nvcc,cuda_nvrtc,cuda_cudart'"
required: false
type: string
default: "cuda_nvcc,cuda_cudart,cuda_crt,libnvvm,cuda_nvrtc,cuda_profiler_api,cuda_cccl,libnvjitlink,libcufile"
cuda-path:
description: "where the CTK components will be installed to, relative to $PWD"
required: false
type: string
default: "./cuda_toolkit"

runs:
using: composite
steps:
- name: Set up CTK cache variable
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
# Pre-process the component list to ensure hash uniqueness
CTK_CACHE_COMPONENTS=${{ inputs.cuda-components }}
# Conditionally strip out libnvjitlink for CUDA versions < 12
CUDA_MAJOR_VER="$(cut -d '.' -f 1 <<< ${{ inputs.cuda-version }})"
if [[ "$CUDA_MAJOR_VER" -lt 12 ]]; then
CTK_CACHE_COMPONENTS="${CTK_CACHE_COMPONENTS//libnvjitlink/}"
fi
# Conditionally strip out cuda_crt and libnvvm for CUDA versions < 13
CUDA_MAJOR_VER="$(cut -d '.' -f 1 <<< ${{ inputs.cuda-version }})"
if [[ "$CUDA_MAJOR_VER" -lt 13 ]]; then
CTK_CACHE_COMPONENTS="${CTK_CACHE_COMPONENTS//cuda_crt/}"
CTK_CACHE_COMPONENTS="${CTK_CACHE_COMPONENTS//libnvvm/}"
fi
# Conditionally strip out libcufile since it does not support Windows
if [[ "${{ inputs.host-platform }}" == win-* ]]; then
CTK_CACHE_COMPONENTS="${CTK_CACHE_COMPONENTS//libcufile/}"
fi
# Cleanup stray commas after removing components
CTK_CACHE_COMPONENTS="${CTK_CACHE_COMPONENTS//,,/,}"

HASH=$(echo -n "${CTK_CACHE_COMPONENTS}" | sha256sum | awk '{print $1}')
echo "CTK_CACHE_KEY=mini-ctk-${{ inputs.cuda-version }}-${{ inputs.host-platform }}-$HASH" >> $GITHUB_ENV
echo "CTK_CACHE_FILENAME=mini-ctk-${{ inputs.cuda-version }}-${{ inputs.host-platform }}-$HASH.tar.gz" >> $GITHUB_ENV
echo "CTK_CACHE_COMPONENTS=${CTK_CACHE_COMPONENTS}" >> $GITHUB_ENV

- name: Install dependencies
uses: ./.github/actions/install_unix_deps
continue-on-error: false
with:
dependencies: "zstd curl xz-utils"
dependent_exes: "zstd curl xz"

- name: Download CTK cache
id: ctk-get-cache
uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
continue-on-error: true
with:
key: ${{ env.CTK_CACHE_KEY }}
path: ./${{ env.CTK_CACHE_FILENAME }}
fail-on-cache-miss: false

- name: Get CUDA components
if: ${{ steps.ctk-get-cache.outputs.cache-hit != 'true' }}
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
# Everything under this folder is packed and stored in the GitHub Cache space,
# and unpacked after retrieving from the cache.
CACHE_TMP_DIR="./cache_tmp_dir"
rm -rf $CACHE_TMP_DIR
mkdir $CACHE_TMP_DIR

# The binary archives (redist) are guaranteed to be updated as part of the release posting.
CTK_BASE_URL="https://developer.download.nvidia.com/compute/cuda/redist/"
CTK_JSON_URL="$CTK_BASE_URL/redistrib_${{ inputs.cuda-version }}.json"
if [[ "${{ inputs.host-platform }}" == linux* ]]; then
if [[ "${{ inputs.host-platform }}" == "linux-64" ]]; then
CTK_SUBDIR="linux-x86_64"
elif [[ "${{ inputs.host-platform }}" == "linux-aarch64" ]]; then
CTK_SUBDIR="linux-sbsa"
fi
function extract() {
tar -xvf $1 -C $CACHE_TMP_DIR --strip-components=1
}
elif [[ "${{ inputs.host-platform }}" == "win-64" ]]; then
CTK_SUBDIR="windows-x86_64"
function extract() {
_TEMP_DIR_=$(mktemp -d)
unzip $1 -d $_TEMP_DIR_
cp -r $_TEMP_DIR_/*/* $CACHE_TMP_DIR
rm -rf $_TEMP_DIR_
# see commit NVIDIA/cuda-python@69410f1d9228e775845ef6c8b4a9c7f37ffc68a5
chmod 644 $CACHE_TMP_DIR/LICENSE
}
fi
function populate_cuda_path() {
# take the component name as a argument
function download() {
curl -kLSs $1 -o $2
}
CTK_COMPONENT=$1
CTK_COMPONENT_REL_PATH="$(curl -s $CTK_JSON_URL |
python -c "import sys, json; print(json.load(sys.stdin)['${CTK_COMPONENT}']['${CTK_SUBDIR}']['relative_path'])")"
CTK_COMPONENT_URL="${CTK_BASE_URL}/${CTK_COMPONENT_REL_PATH}"
CTK_COMPONENT_COMPONENT_FILENAME="$(basename $CTK_COMPONENT_REL_PATH)"
download $CTK_COMPONENT_URL $CTK_COMPONENT_COMPONENT_FILENAME
extract $CTK_COMPONENT_COMPONENT_FILENAME
rm $CTK_COMPONENT_COMPONENT_FILENAME
}

# Get headers and shared libraries in place
for item in $(echo $CTK_CACHE_COMPONENTS | tr ',' ' '); do
populate_cuda_path "$item"
done
# TODO: check Windows
if [[ "${{ inputs.host-platform }}" == linux* ]]; then
mv $CACHE_TMP_DIR/lib $CACHE_TMP_DIR/lib64
fi
ls -l $CACHE_TMP_DIR

# Prepare the cache
# Note: try to escape | and > ...
tar -czvf ${CTK_CACHE_FILENAME} ${CACHE_TMP_DIR}

# "Move" files from temp dir to CUDA_PATH
CUDA_PATH="./cuda_toolkit"
mkdir -p $CUDA_PATH
# Unfortunately we cannot use "rsync -av $CACHE_TMP_DIR/ $CUDA_PATH" because
# not all runners have rsync pre-installed (or even installable, such as
# Git Bash). We do it in the dumb way.
cp -r $CACHE_TMP_DIR/* $CUDA_PATH
rm -rf $CACHE_TMP_DIR
ls -l $CUDA_PATH

- name: Upload CTK cache
if: ${{ !cancelled() &&
steps.ctk-get-cache.outputs.cache-hit != 'true' }}
uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
key: ${{ env.CTK_CACHE_KEY }}
path: ./${{ env.CTK_CACHE_FILENAME }}

- name: Restore CTK cache
if: ${{ steps.ctk-get-cache.outputs.cache-hit == 'true' }}
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
ls -l
CACHE_TMP_DIR="./cache_tmp_dir"
CUDA_PATH="./cuda_toolkit"
mkdir -p $CUDA_PATH
tar -xzvf $CTK_CACHE_FILENAME
# Can't use rsync here, see above
cp -r $CACHE_TMP_DIR/* $CUDA_PATH
rm -rf $CACHE_TMP_DIR $CTK_CACHE_FILENAME
ls -l $CUDA_PATH
if [ ! -d "$CUDA_PATH/include" ]; then
exit 1
fi

- name: Move CTK to the specified location
if: ${{ inputs.cuda-path != './cuda_toolkit' }}
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
mv ./cuda_toolkit ${{ inputs.cuda-path }}

- name: Set output environment variables
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
# mimics actual CTK installation
if [[ "${{ inputs.host-platform }}" == linux* ]]; then
CUDA_PATH=$(realpath "${{ inputs.cuda-path }}")
echo "${CUDA_PATH}/bin" >> $GITHUB_PATH
echo "LD_LIBRARY_PATH=${CUDA_PATH}/lib64:${LD_LIBRARY_PATH:-}" >> $GITHUB_ENV
elif [[ "${{ inputs.host-platform }}" == win* ]]; then
function normpath() {
echo "$(echo $(cygpath -w $1) | sed 's/\\/\\\\/g')"
}
CUDA_PATH=$(normpath $(realpath "${{ inputs.cuda-path }}"))
echo "$(normpath ${CUDA_PATH}/bin)" >> $GITHUB_PATH
fi
echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV
echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV
58 changes: 58 additions & 0 deletions .github/actions/get_pr_number/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

name: Get the PR number

description: Get the PR number without relying on the pull_request* event triggers.

runs:
using: composite
steps:
- name: Get PR info (non-main, non-release branch)
if: ${{ github.ref_name != 'main' && !startsWith(github.ref_name, 'release/') }}
uses: nv-gha-runners/get-pr-info@main
id: get-pr-info

- name: Extract PR number (non-main, non-release branch)
if: ${{ github.ref_name != 'main' && !startsWith(github.ref_name, 'release/') }}
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
trap 'echo "Error at line $LINENO"; exit 1' ERR
PR_NUMBER="${{ fromJSON(steps.get-pr-info.outputs.pr-info).number }}"
if [[ -z "$PR_NUMBER" ]]; then
echo "Cannot extract PR number for ref: ${{ github.ref_name }}"
exit 1
fi
echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV
echo "BUILD_PREVIEW=1" >> $GITHUB_ENV

- name: Get PR data (main or release/* branch)
if: ${{ github.ref_name == 'main' || startsWith(github.ref_name, 'release/') }}
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
id: get-pr-data
with:
script: |
const prs = await github.rest.repos.listPullRequestsAssociatedWithCommit({
commit_sha: context.sha,
owner: context.repo.owner,
repo: context.repo.repo,
});
if (!prs.data.length) {
core.setFailed("No PR associated with this commit on 'main' or 'release/*'.");
} else {
return prs.data[0];
}

- name: Extract PR number (main or release/* branch)
if: ${{ github.ref_name == 'main' || startsWith(github.ref_name, 'release/') }}
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
trap 'echo "Error at line $LINENO"; exit 1' ERR
PR_NUMBER="${{ fromJSON(steps.get-pr-data.outputs.result).number }}"
if [[ -z "$PR_NUMBER" ]]; then
echo "No associated PR found for the commit in 'main' or 'release/*'."
exit 1
fi
echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV
echo "BUILD_LATEST=1" >> $GITHUB_ENV
49 changes: 49 additions & 0 deletions .github/actions/install_unix_deps/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

name: Install dependencies on Ubuntu

description: Install needed dependencies, regardless if using GitHub- or self- hosted runners, container, sudo or not.

inputs:
dependencies:
required: true
type: string
dependent_exes:
required: true
type: string

runs:
using: composite
steps:
- name: Install dependencies
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
dependencies=(${{ inputs.dependencies }})
dependent_exes=(${{ inputs.dependent_exes }})

not_found=0
for dep in ${dependent_exes[@]}; do
if ! (command -v $dep 2>&1 >/dev/null); then
not_found=1
break
fi
done
if [[ $not_found == 0 ]]; then
echo "All dependencies are found. Do nothing."
exit 0
fi
if ! (command -v sudo 2>&1 >/dev/null); then
if [[ $EUID == 0 ]]; then
alias SUDO=""
else
echo "The following oprations require root access."
exit 1
fi
else
alias SUDO="sudo"
fi
shopt -s expand_aliases
SUDO apt update
SUDO apt install -y ${dependencies[@]}
Loading
Loading