diff --git a/.github/actions/fetch_ctk/action.yml b/.github/actions/fetch_ctk/action.yml new file mode 100644 index 000000000..b6da52c2a --- /dev/null +++ b/.github/actions/fetch_ctk/action.yml @@ -0,0 +1,193 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +name: Fetch mini CTK + +description: Fetch (or create) a mini CUDA Toolkit from cache + +inputs: + host-platform: + required: true + type: string + cuda-version: + required: true + type: string + cuda-components: + description: "A list of the CTK components to install as a comma-separated list. e.g. 'cuda_nvcc,cuda_nvrtc,cuda_cudart'" + required: false + type: string + default: "cuda_nvcc,cuda_cudart,cuda_crt,libnvvm,cuda_nvrtc,cuda_profiler_api,cuda_cccl,libnvjitlink,libcufile" + cuda-path: + description: "where the CTK components will be installed to, relative to $PWD" + required: false + type: string + default: "./cuda_toolkit" + +runs: + using: composite + steps: + - name: Set up CTK cache variable + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + # Pre-process the component list to ensure hash uniqueness + CTK_CACHE_COMPONENTS=${{ inputs.cuda-components }} + # Conditionally strip out libnvjitlink for CUDA versions < 12 + CUDA_MAJOR_VER="$(cut -d '.' -f 1 <<< ${{ inputs.cuda-version }})" + if [[ "$CUDA_MAJOR_VER" -lt 12 ]]; then + CTK_CACHE_COMPONENTS="${CTK_CACHE_COMPONENTS//libnvjitlink/}" + fi + # Conditionally strip out cuda_crt and libnvvm for CUDA versions < 13 + CUDA_MAJOR_VER="$(cut -d '.' -f 1 <<< ${{ inputs.cuda-version }})" + if [[ "$CUDA_MAJOR_VER" -lt 13 ]]; then + CTK_CACHE_COMPONENTS="${CTK_CACHE_COMPONENTS//cuda_crt/}" + CTK_CACHE_COMPONENTS="${CTK_CACHE_COMPONENTS//libnvvm/}" + fi + # Conditionally strip out libcufile since it does not support Windows + if [[ "${{ inputs.host-platform }}" == win-* ]]; then + CTK_CACHE_COMPONENTS="${CTK_CACHE_COMPONENTS//libcufile/}" + fi + # Cleanup stray commas after removing components + CTK_CACHE_COMPONENTS="${CTK_CACHE_COMPONENTS//,,/,}" + + HASH=$(echo -n "${CTK_CACHE_COMPONENTS}" | sha256sum | awk '{print $1}') + echo "CTK_CACHE_KEY=mini-ctk-${{ inputs.cuda-version }}-${{ inputs.host-platform }}-$HASH" >> $GITHUB_ENV + echo "CTK_CACHE_FILENAME=mini-ctk-${{ inputs.cuda-version }}-${{ inputs.host-platform }}-$HASH.tar.gz" >> $GITHUB_ENV + echo "CTK_CACHE_COMPONENTS=${CTK_CACHE_COMPONENTS}" >> $GITHUB_ENV + + - name: Install dependencies + uses: ./.github/actions/install_unix_deps + continue-on-error: false + with: + dependencies: "zstd curl xz-utils" + dependent_exes: "zstd curl xz" + + - name: Download CTK cache + id: ctk-get-cache + uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + continue-on-error: true + with: + key: ${{ env.CTK_CACHE_KEY }} + path: ./${{ env.CTK_CACHE_FILENAME }} + fail-on-cache-miss: false + + - name: Get CUDA components + if: ${{ steps.ctk-get-cache.outputs.cache-hit != 'true' }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + # Everything under this folder is packed and stored in the GitHub Cache space, + # and unpacked after retrieving from the cache. + CACHE_TMP_DIR="./cache_tmp_dir" + rm -rf $CACHE_TMP_DIR + mkdir $CACHE_TMP_DIR + + # The binary archives (redist) are guaranteed to be updated as part of the release posting. + CTK_BASE_URL="https://developer.download.nvidia.com/compute/cuda/redist/" + CTK_JSON_URL="$CTK_BASE_URL/redistrib_${{ inputs.cuda-version }}.json" + if [[ "${{ inputs.host-platform }}" == linux* ]]; then + if [[ "${{ inputs.host-platform }}" == "linux-64" ]]; then + CTK_SUBDIR="linux-x86_64" + elif [[ "${{ inputs.host-platform }}" == "linux-aarch64" ]]; then + CTK_SUBDIR="linux-sbsa" + fi + function extract() { + tar -xvf $1 -C $CACHE_TMP_DIR --strip-components=1 + } + elif [[ "${{ inputs.host-platform }}" == "win-64" ]]; then + CTK_SUBDIR="windows-x86_64" + function extract() { + _TEMP_DIR_=$(mktemp -d) + unzip $1 -d $_TEMP_DIR_ + cp -r $_TEMP_DIR_/*/* $CACHE_TMP_DIR + rm -rf $_TEMP_DIR_ + # see commit NVIDIA/cuda-python@69410f1d9228e775845ef6c8b4a9c7f37ffc68a5 + chmod 644 $CACHE_TMP_DIR/LICENSE + } + fi + function populate_cuda_path() { + # take the component name as a argument + function download() { + curl -kLSs $1 -o $2 + } + CTK_COMPONENT=$1 + CTK_COMPONENT_REL_PATH="$(curl -s $CTK_JSON_URL | + python -c "import sys, json; print(json.load(sys.stdin)['${CTK_COMPONENT}']['${CTK_SUBDIR}']['relative_path'])")" + CTK_COMPONENT_URL="${CTK_BASE_URL}/${CTK_COMPONENT_REL_PATH}" + CTK_COMPONENT_COMPONENT_FILENAME="$(basename $CTK_COMPONENT_REL_PATH)" + download $CTK_COMPONENT_URL $CTK_COMPONENT_COMPONENT_FILENAME + extract $CTK_COMPONENT_COMPONENT_FILENAME + rm $CTK_COMPONENT_COMPONENT_FILENAME + } + + # Get headers and shared libraries in place + for item in $(echo $CTK_CACHE_COMPONENTS | tr ',' ' '); do + populate_cuda_path "$item" + done + # TODO: check Windows + if [[ "${{ inputs.host-platform }}" == linux* ]]; then + mv $CACHE_TMP_DIR/lib $CACHE_TMP_DIR/lib64 + fi + ls -l $CACHE_TMP_DIR + + # Prepare the cache + # Note: try to escape | and > ... + tar -czvf ${CTK_CACHE_FILENAME} ${CACHE_TMP_DIR} + + # "Move" files from temp dir to CUDA_PATH + CUDA_PATH="./cuda_toolkit" + mkdir -p $CUDA_PATH + # Unfortunately we cannot use "rsync -av $CACHE_TMP_DIR/ $CUDA_PATH" because + # not all runners have rsync pre-installed (or even installable, such as + # Git Bash). We do it in the dumb way. + cp -r $CACHE_TMP_DIR/* $CUDA_PATH + rm -rf $CACHE_TMP_DIR + ls -l $CUDA_PATH + + - name: Upload CTK cache + if: ${{ !cancelled() && + steps.ctk-get-cache.outputs.cache-hit != 'true' }} + uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + key: ${{ env.CTK_CACHE_KEY }} + path: ./${{ env.CTK_CACHE_FILENAME }} + + - name: Restore CTK cache + if: ${{ steps.ctk-get-cache.outputs.cache-hit == 'true' }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + ls -l + CACHE_TMP_DIR="./cache_tmp_dir" + CUDA_PATH="./cuda_toolkit" + mkdir -p $CUDA_PATH + tar -xzvf $CTK_CACHE_FILENAME + # Can't use rsync here, see above + cp -r $CACHE_TMP_DIR/* $CUDA_PATH + rm -rf $CACHE_TMP_DIR $CTK_CACHE_FILENAME + ls -l $CUDA_PATH + if [ ! -d "$CUDA_PATH/include" ]; then + exit 1 + fi + + - name: Move CTK to the specified location + if: ${{ inputs.cuda-path != './cuda_toolkit' }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + mv ./cuda_toolkit ${{ inputs.cuda-path }} + + - name: Set output environment variables + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + # mimics actual CTK installation + if [[ "${{ inputs.host-platform }}" == linux* ]]; then + CUDA_PATH=$(realpath "${{ inputs.cuda-path }}") + echo "${CUDA_PATH}/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=${CUDA_PATH}/lib64:${LD_LIBRARY_PATH:-}" >> $GITHUB_ENV + elif [[ "${{ inputs.host-platform }}" == win* ]]; then + function normpath() { + echo "$(echo $(cygpath -w $1) | sed 's/\\/\\\\/g')" + } + CUDA_PATH=$(normpath $(realpath "${{ inputs.cuda-path }}")) + echo "$(normpath ${CUDA_PATH}/bin)" >> $GITHUB_PATH + fi + echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV + echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV diff --git a/.github/actions/get_pr_number/action.yml b/.github/actions/get_pr_number/action.yml new file mode 100644 index 000000000..1641f8068 --- /dev/null +++ b/.github/actions/get_pr_number/action.yml @@ -0,0 +1,58 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +name: Get the PR number + +description: Get the PR number without relying on the pull_request* event triggers. + +runs: + using: composite + steps: + - name: Get PR info (non-main, non-release branch) + if: ${{ github.ref_name != 'main' && !startsWith(github.ref_name, 'release/') }} + uses: nv-gha-runners/get-pr-info@main + id: get-pr-info + + - name: Extract PR number (non-main, non-release branch) + if: ${{ github.ref_name != 'main' && !startsWith(github.ref_name, 'release/') }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + trap 'echo "Error at line $LINENO"; exit 1' ERR + PR_NUMBER="${{ fromJSON(steps.get-pr-info.outputs.pr-info).number }}" + if [[ -z "$PR_NUMBER" ]]; then + echo "Cannot extract PR number for ref: ${{ github.ref_name }}" + exit 1 + fi + echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV + echo "BUILD_PREVIEW=1" >> $GITHUB_ENV + + - name: Get PR data (main or release/* branch) + if: ${{ github.ref_name == 'main' || startsWith(github.ref_name, 'release/') }} + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + id: get-pr-data + with: + script: | + const prs = await github.rest.repos.listPullRequestsAssociatedWithCommit({ + commit_sha: context.sha, + owner: context.repo.owner, + repo: context.repo.repo, + }); + if (!prs.data.length) { + core.setFailed("No PR associated with this commit on 'main' or 'release/*'."); + } else { + return prs.data[0]; + } + + - name: Extract PR number (main or release/* branch) + if: ${{ github.ref_name == 'main' || startsWith(github.ref_name, 'release/') }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + trap 'echo "Error at line $LINENO"; exit 1' ERR + PR_NUMBER="${{ fromJSON(steps.get-pr-data.outputs.result).number }}" + if [[ -z "$PR_NUMBER" ]]; then + echo "No associated PR found for the commit in 'main' or 'release/*'." + exit 1 + fi + echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV + echo "BUILD_LATEST=1" >> $GITHUB_ENV diff --git a/.github/actions/install_unix_deps/action.yml b/.github/actions/install_unix_deps/action.yml new file mode 100644 index 000000000..6289541c9 --- /dev/null +++ b/.github/actions/install_unix_deps/action.yml @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +name: Install dependencies on Ubuntu + +description: Install needed dependencies, regardless if using GitHub- or self- hosted runners, container, sudo or not. + +inputs: + dependencies: + required: true + type: string + dependent_exes: + required: true + type: string + +runs: + using: composite + steps: + - name: Install dependencies + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + dependencies=(${{ inputs.dependencies }}) + dependent_exes=(${{ inputs.dependent_exes }}) + + not_found=0 + for dep in ${dependent_exes[@]}; do + if ! (command -v $dep 2>&1 >/dev/null); then + not_found=1 + break + fi + done + if [[ $not_found == 0 ]]; then + echo "All dependencies are found. Do nothing." + exit 0 + fi + if ! (command -v sudo 2>&1 >/dev/null); then + if [[ $EUID == 0 ]]; then + alias SUDO="" + else + echo "The following oprations require root access." + exit 1 + fi + else + alias SUDO="sudo" + fi + shopt -s expand_aliases + SUDO apt update + SUDO apt install -y ${dependencies[@]} diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml new file mode 100644 index 000000000..f5d6aae12 --- /dev/null +++ b/.github/workflows/build-wheel.yml @@ -0,0 +1,255 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +on: + workflow_call: + inputs: + host-platform: + required: true + type: string + cuda-version: + required: true + type: string + prev-cuda-version: + required: true + type: string + +defaults: + run: + shell: bash --noprofile --norc -xeuo pipefail {0} + +permissions: + contents: read # This is required for actions/checkout + +jobs: + build: + strategy: + fail-fast: false + matrix: + python-version: + - "3.10" + - "3.11" + - "3.12" + - "3.13" + # - "3.14" + # - "3.14t" + name: py${{ matrix.python-version }} + runs-on: ${{ (inputs.host-platform == 'linux-64' && 'linux-amd64-cpu8') || + (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') || + (inputs.host-platform == 'win-64' && 'windows-2022') }} + steps: + - name: Checkout ${{ github.event.repository.name }} + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + fetch-depth: 0 + + # The env vars ACTIONS_CACHE_SERVICE_V2, ACTIONS_RESULTS_URL, and ACTIONS_RUNTIME_TOKEN + # are exposed by this action. + - name: Enable sccache + if: ${{ startsWith(inputs.host-platform, 'linux') }} + uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad # 0.0.9 + + # xref: https://github.com/orgs/community/discussions/42856#discussioncomment-7678867 + - name: Adding addtional GHA cache-related env vars + uses: actions/github-script@v7 + with: + script: | + core.exportVariable('ACTIONS_CACHE_URL', process.env['ACTIONS_CACHE_URL']) + core.exportVariable('ACTIONS_RUNTIME_URL', process.env['ACTIONS_RUNTIME_URL']) + + - name: Setup proxy cache + uses: nv-gha-runners/setup-proxy-cache@main + continue-on-error: true + # Skip the cache on Windows nodes outside of our org. + if: ${{ inputs.host-platform != 'win-64' }} + + - name: Set up Python + id: setup-python1 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + # WAR: setup-python is not relocatable, and cibuildwheel hard-wires to 3.12... + # see https://github.com/actions/setup-python/issues/871 + python-version: "3.12" + + - name: Set up MSVC + if: ${{ startsWith(inputs.host-platform, 'win') }} + uses: ilammy/msvc-dev-cmd@v1 # TODO: ask admin to allow pinning commits + + - name: Set environment variables + env: + HOST_PLATFORM: ${{ inputs.host-platform }} + PY_VER: ${{ matrix.python-version }} + SHA: ${{ github.sha }} + run: ./ci/tools/env-vars build + + - name: Dump environment + run: | + env + + - name: Build numba-cuda wheel + uses: pypa/cibuildwheel@9c00cb4f6b517705a3794b22395aedc36257242c # v3.2.1 + with: + package-dir: . + output-dir: ${{ env.NUMBA_CUDA_ARTIFACTS_DIR }} + env: + CIBW_BUILD: ${{ env.CIBW_BUILD }} + # CIBW mounts the host filesystem under /host + CIBW_ENVIRONMENT_LINUX: > + CC="/host/${{ env.SCCACHE_PATH }} cc" + CXX="/host/${{ env.SCCACHE_PATH }} c++" + SCCACHE_GHA_ENABLED=true + ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} + ACTIONS_RUNTIME_URL=${{ env.ACTIONS_RUNTIME_URL }} + ACTIONS_RESULTS_URL=${{ env.ACTIONS_RESULTS_URL }} + ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} + ACTIONS_CACHE_SERVICE_V2=${{ env.ACTIONS_CACHE_SERVICE_V2 }} + SCCACHE_DIR=/host/${{ env.SCCACHE_DIR }} + SCCACHE_CACHE_SIZE=${{ env.SCCACHE_CACHE_SIZE }} + # check cache stats before leaving cibuildwheel + CIBW_BEFORE_TEST_LINUX: > + "/host/${{ env.SCCACHE_PATH }}" --show-stats + # force the test stage to be run (so that before-test is not skipped) + # TODO: we might want to think twice on adding this, it does a lot of + # things before reaching this command. + CIBW_TEST_COMMAND_LINUX: > + echo "ok!" + + - name: List the numba-cuda artifacts directory + run: | + if [[ "${{ inputs.host-platform }}" == win* ]]; then + export CHOWN=chown + else + export CHOWN="sudo chown" + fi + $CHOWN -R $(whoami) ${{ env.NUMBA_CUDA_ARTIFACTS_DIR }} + ls -lahR ${{ env.NUMBA_CUDA_ARTIFACTS_DIR }} + + - name: Upload numba-cuda build artifacts + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: ${{ env.NUMBA_CUDA_ARTIFACT_NAME }} + path: ${{ env.NUMBA_CUDA_ARTIFACTS_DIR }}/*.whl + if-no-files-found: error + + build-tests: + needs: + - build + strategy: + fail-fast: false + matrix: + # We just need 1 Python version because the artifacts are Python agnostic. + python-version: + - "3.10" + cuda-version: + - ${{ inputs.cuda-version }} + - ${{ inputs.prev-cuda-version }} + name: py${{ matrix.python-version }} CUDA ${{ matrix.cuda-version }} + runs-on: ${{ (inputs.host-platform == 'linux-64' && 'linux-amd64-cpu8') || + (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') || + (inputs.host-platform == 'win-64' && 'windows-2022') }} + steps: + - name: Checkout ${{ github.event.repository.name }} + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + fetch-depth: 0 + + # The env vars ACTIONS_CACHE_SERVICE_V2, ACTIONS_RESULTS_URL, and ACTIONS_RUNTIME_TOKEN + # are exposed by this action. + - name: Enable sccache + if: ${{ startsWith(inputs.host-platform, 'linux') }} + uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad # 0.0.9 + + # xref: https://github.com/orgs/community/discussions/42856#discussioncomment-7678867 + - name: Adding addtional GHA cache-related env vars + uses: actions/github-script@v7 + with: + script: | + core.exportVariable('ACTIONS_CACHE_URL', process.env['ACTIONS_CACHE_URL']) + core.exportVariable('ACTIONS_RUNTIME_URL', process.env['ACTIONS_RUNTIME_URL']) + + - name: Setup proxy cache + uses: nv-gha-runners/setup-proxy-cache@main + continue-on-error: true + # Skip the cache on Windows nodes outside of our org. + if: ${{ inputs.host-platform != 'win-64' }} + + - name: Set up Python + id: setup-python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + python-version: ${{ matrix.python-version }} + + - name: Set up MSVC + if: ${{ startsWith(inputs.host-platform, 'win') }} + uses: ilammy/msvc-dev-cmd@v1 # TODO: ask admin to allow pinning commits + + - name: Set environment variables + env: + HOST_PLATFORM: ${{ inputs.host-platform }} + PY_VER: ${{ matrix.python-version }} + SHA: ${{ github.sha }} + run: | + ./ci/tools/env-vars build + CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ matrix.cuda-version }})" + echo "CUDA_MAJOR=${CUDA_MAJOR}" >> ${GITHUB_ENV} + + - name: Dump environment + run: | + env + + - name: Download numba-cuda build artifacts + if: ${{ env.SKIP_NUMBA_CUDA_TEST == '0'}} + uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 + with: + name: ${{ env.NUMBA_CUDA_ARTIFACT_NAME }} + path: ${{ env.NUMBA_CUDA_ARTIFACTS_DIR }} + + - name: Display structure of downloaded numba-cuda artifacts + run: | + pwd + ls -lahR ${NUMBA_CUDA_ARTIFACTS_DIR} + + - name: Install numba-cuda + run: | + # used in testing/Makefile + pip install ${{ env.NUMBA_CUDA_ARTIFACTS_DIR }}/*.whl "cuda-bindings==${CUDA_MAJOR}.*" + + - name: Set up mini CTK ${{ matrix.cuda-version }} + uses: ./.github/actions/fetch_ctk + continue-on-error: false + with: + host-platform: ${{ inputs.host-platform }} + cuda-version: ${{ matrix.cuda-version }} + cuda-components: "cuda_nvcc,cuda_cudart,cuda_crt,libnvvm,cuda_nvrtc,cuda_cccl,libnvjitlink,cuda_cuobjdump" + + - name: Build numba-cuda test artifacts aginst CUDA ${{ matrix.cuda-version }} + run: | + pushd testing + if [[ "${{ inputs.host-platform }}" == linux* ]]; then + PATH=$(dirname ${SCCACHE_PATH}):${PATH} + SCCACHE_GHA_ENABLED=true + fi + + nvcc --version + + # TODO: move this list to json + if [[ "${CUDA_MAJOR}" == 12 ]]; then + CC_LIST=(70 75 80 86 89 90 120) + elif [[ "${CUDA_MAJOR}" == 13 ]]; then + CC_LIST=(75 80 86 89 90 120) + fi + + for cc in ${CC_LIST[*]}; do + make -j $(nproc) GPU_CC=${cc} + mkdir cu${CUDA_MAJOR}_cc${cc} + mv *.cubin *.fatbin *.ptx *.o *.a *.ltoir cu${CUDA_MAJOR}_cc${cc} + done + popd + + - name: Upload numba-cuda test artifacts + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: ${{ env.NUMBA_CUDA_TEST_ARTIFACT_NAME }}-cu${{ env.CUDA_MAJOR }} + path: testing/cu* + if-no-files-found: error diff --git a/.github/workflows/ci-new.yaml b/.github/workflows/ci-new.yaml new file mode 100644 index 000000000..c41164521 --- /dev/null +++ b/.github/workflows/ci-new.yaml @@ -0,0 +1,206 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# Note: This name is referred to in the test job, so make sure any changes are sync'd up! +# Further this is referencing a run in the backport branch to fetch old bindings. +name: "CI" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} + cancel-in-progress: true + +on: + push: + branches: + - "pull-request/[0-9]+" + - "main" + +jobs: + ci-vars: + runs-on: ubuntu-latest + outputs: + CUDA_BUILD_VER: ${{ steps.get-vars.outputs.cuda_build_ver }} + CUDA_PREV_BUILD_VER: ${{ steps.get-vars.outputs.cuda_prev_build_ver }} + steps: + - name: Checkout repository + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + fetch-depth: 0 + + - name: Get CUDA build versions + id: get-vars + run: | + cuda_build_ver=$(jq -r .cuda.build.version ci/versions.json) + echo "cuda_build_ver=$cuda_build_ver" >> $GITHUB_OUTPUT + cuda_prev_build_ver=$(jq -r .cuda.prev_build.version ci/versions.json) + echo "cuda_prev_build_ver=$cuda_prev_build_ver" >> $GITHUB_OUTPUT + + should-skip: + runs-on: ubuntu-latest + outputs: + skip: ${{ steps.get-should-skip.outputs.skip }} + steps: + - name: Checkout repository + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Compute whether to skip builds and tests + id: get-should-skip + env: + GH_TOKEN: ${{ github.token }} + run: | + set -euxo pipefail + if ${{ startsWith(github.ref_name, 'pull-request/') }}; then + skip="$(gh pr view "$(grep -Po '(\d+)$' <<< '${{ github.ref_name }}')" --json title --jq '.title | contains("[no-ci]")')" + else + skip=false + fi + echo "skip=${skip}" >> "$GITHUB_OUTPUT" + + # WARNING: make sure all of the build jobs are in sync + build-linux-64: + needs: + - ci-vars + - should-skip + strategy: + fail-fast: false + matrix: + host-platform: + - linux-64 + name: Build ${{ matrix.host-platform }} + if: ${{ github.repository_owner == 'nvidia' && !fromJSON(needs.should-skip.outputs.skip) }} + secrets: inherit + uses: ./.github/workflows/build-wheel.yml + with: + host-platform: ${{ matrix.host-platform }} + cuda-version: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }} + prev-cuda-version: ${{ needs.ci-vars.outputs.CUDA_PREV_BUILD_VER }} + + # WARNING: make sure all of the build jobs are in sync + build-linux-aarch64: + needs: + - ci-vars + - should-skip + strategy: + fail-fast: false + matrix: + host-platform: + - linux-aarch64 + name: Build ${{ matrix.host-platform }} + if: ${{ github.repository_owner == 'nvidia' && !fromJSON(needs.should-skip.outputs.skip) }} + secrets: inherit + uses: ./.github/workflows/build-wheel.yml + with: + host-platform: ${{ matrix.host-platform }} + cuda-version: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }} + prev-cuda-version: ${{ needs.ci-vars.outputs.CUDA_PREV_BUILD_VER }} + + # WARNING: make sure all of the build jobs are in sync + build-windows: + needs: + - ci-vars + - should-skip + strategy: + fail-fast: false + matrix: + host-platform: + - win-64 + name: Build ${{ matrix.host-platform }} + if: ${{ github.repository_owner == 'nvidia' && !fromJSON(needs.should-skip.outputs.skip) }} + secrets: inherit + uses: ./.github/workflows/build-wheel.yml + with: + host-platform: ${{ matrix.host-platform }} + cuda-version: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }} + prev-cuda-version: ${{ needs.ci-vars.outputs.CUDA_PREV_BUILD_VER }} + + # WARNING: make sure both Linux test jobs are in sync + test-linux-64: + strategy: + fail-fast: false + matrix: + host-platform: + - linux-64 + name: Test ${{ matrix.host-platform }} + if: ${{ github.repository_owner == 'nvidia' }} + permissions: + contents: read # This is required for actions/checkout + needs: + - build-linux-64 + secrets: inherit + uses: ./.github/workflows/test-wheel-linux.yml + with: + build-type: pull-request + host-platform: ${{ matrix.host-platform }} + + # WARNING: make sure both Linux test jobs are in sync + test-linux-aarch64: + strategy: + fail-fast: false + matrix: + host-platform: + - linux-aarch64 + name: Test ${{ matrix.host-platform }} + if: ${{ github.repository_owner == 'nvidia' }} + permissions: + contents: read # This is required for actions/checkout + needs: + - build-linux-aarch64 + secrets: inherit + uses: ./.github/workflows/test-wheel-linux.yml + with: + build-type: pull-request + host-platform: ${{ matrix.host-platform }} + + test-windows: + strategy: + fail-fast: false + matrix: + host-platform: + - win-64 + name: Test ${{ matrix.host-platform }} + if: ${{ github.repository_owner == 'nvidia' }} + permissions: + contents: read # This is required for actions/checkout + needs: + - build-windows + secrets: inherit + uses: ./.github/workflows/test-wheel-windows.yml + with: + build-type: pull-request + host-platform: ${{ matrix.host-platform }} + + checks: + name: Check job status + if: always() + runs-on: ubuntu-latest + needs: + - test-linux-64 + - test-linux-aarch64 + - test-windows + steps: + - name: Exit + run: | + # if any dependencies were cancelled, that's a failure + # + # see https://docs.github.com/en/actions/reference/workflows-and-actions/expressions#always + # and https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/troubleshooting-required-status-checks#handling-skipped-but-required-checks + # for why this cannot be encoded in the job-level `if:` field + # + # TL; DR: `$REASONS` + # + # The intersection of skipped-as-success and required status checks + # creates a scenario where if you DON'T `always()` run this job, the + # status check UI will block merging and if you DO `always()` run and + # a dependency is _cancelled_ (due to a critical failure, which is + # somehow not considered a failure ¯\_(ツ)_/¯) then the critically + # failing job(s) will timeout causing a cancellation here and the + # build to succeed which we don't want (originally this was just + # 'exit 0') + if ${{ needs.test-linux-64.result == 'cancelled' || + needs.test-linux-aarch64.result == 'cancelled' || + needs.test-windows.result == 'cancelled' || + needs.doc.result == 'cancelled' }}; then + exit 1 + else + exit 0 + fi diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml new file mode 100644 index 000000000..be8aa739f --- /dev/null +++ b/.github/workflows/test-wheel-linux.yml @@ -0,0 +1,177 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +name: "CI: Test wheels" + +on: + workflow_call: + inputs: + build-type: + type: string + required: true + host-platform: + type: string + required: true + matrix_filter: + type: string + default: "." + +defaults: + run: + shell: bash --noprofile --norc -xeuo pipefail {0} + +jobs: + compute-matrix: + runs-on: ubuntu-latest + env: + BUILD_TYPE: ${{ inputs.build-type }} + ARCH: ${{ (inputs.host-platform == 'linux-64' && 'amd64') || + (inputs.host-platform == 'linux-aarch64' && 'arm64') }} + outputs: + MATRIX: ${{ steps.compute-matrix.outputs.MATRIX }} + steps: + - name: Checkout ${{ github.event.repository.name }} + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + + - name: Validate Test Type + run: | + if [[ "$BUILD_TYPE" != "pull-request" ]] && [[ "$BUILD_TYPE" != "nightly" ]] && [[ "$BUILD_TYPE" != "branch" ]]; then + echo "Invalid build type! Must be one of 'nightly', 'pull-request', or 'branch'." + exit 1 + fi + + - name: Compute Python Test Matrix + id: compute-matrix + run: | + # Use the nightly matrix for branch tests + MATRIX_TYPE="${BUILD_TYPE}" + if [[ "${MATRIX_TYPE}" == "branch" ]]; then + MATRIX_TYPE="nightly" + fi + + # Read base matrix from JSON file for the specific architecture + TEST_MATRIX=$(jq --arg arch "$ARCH" --arg matrix_type "$MATRIX_TYPE" ' + .linux[$matrix_type] | + map(select(.ARCH == $arch)) + ' ci/test-matrix.json) + + # Add special runner for amd64 if applicable + if [[ "${ARCH}" == "amd64" ]]; then + SPECIAL_RUNNERS=$(jq ' + .linux.special_runners.amd64 + ' ci/test-matrix.json) + TEST_MATRIX=$(jq --argjson special "$SPECIAL_RUNNERS" '. + $special' <<< "$TEST_MATRIX") + fi + + MATRIX="$( + jq -c '${{ inputs.matrix_filter }} | if (. | length) > 0 then {include: .} else "Error: Empty matrix\n" | halt_error(1) end' <<< "$TEST_MATRIX" + )" + + echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}" + + test: + name: py${{ matrix.PY_VER }}, ${{ matrix.CUDA_VER }}, ${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}, ${{ matrix.GPU }} + needs: compute-matrix + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }} + runs-on: "linux-${{ matrix.ARCH }}-gpu-${{ matrix.GPU }}-${{ matrix.DRIVER }}-1" + # The build stage could fail but we want the CI to keep moving. + if: ${{ github.repository_owner == 'nvidia' && !cancelled() }} + # Our self-hosted runners require a container + container: + options: -u root --security-opt seccomp=unconfined --shm-size 16g + image: ubuntu:22.04 + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} + steps: + - name: Ensure GPU is working + run: nvidia-smi + + - name: Checkout ${{ github.event.repository.name }} + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + + - name: Setup proxy cache + uses: nv-gha-runners/setup-proxy-cache@main + continue-on-error: true + + - name: Install dependencies + uses: ./.github/actions/install_unix_deps + continue-on-error: false + with: + # for artifact fetching, graphics libs + dependencies: "jq wget libgl1 libegl1" + dependent_exes: "jq wget" + + - name: Set environment variables + env: + CUDA_VER: ${{ matrix.CUDA_VER }} + HOST_PLATFORM: ${{ inputs.host-platform }} + LOCAL_CTK: ${{ matrix.LOCAL_CTK }} + PY_VER: ${{ matrix.PY_VER }} + SHA: ${{ github.sha }} + run: ./ci/tools/env-vars test + + - name: Download numba-cuda build artifacts + if: ${{ env.SKIP_NUMBA_CUDA_TEST == '0'}} + uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 + with: + name: ${{ env.NUMBA_CUDA_ARTIFACT_NAME }} + path: ${{ env.NUMBA_CUDA_ARTIFACTS_DIR }} + + - name: Display structure of downloaded numba-cuda artifacts + run: | + pwd + ls -lahR $NUMBA_CUDA_ARTIFACTS_DIR + + - name: Download numba-cuda test artifacts + uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 + with: + name: ${{ env.NUMBA_CUDA_TEST_ARTIFACT_NAME }}-cu${{ env.TEST_CUDA_MAJOR }} + path: testing/ + + - name: Display structure of downloaded numba-cuda test artifacts + run: | + pwd + ls -lahR testing/ + + - name: Set up Python ${{ matrix.PY_VER }} + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + python-version: ${{ matrix.PY_VER }} + env: + # we use self-hosted runners on which setup-python behaves weirdly... + AGENT_TOOLSDIRECTORY: "/opt/hostedtoolcache" + + - name: Set up mini CTK + if: ${{ matrix.LOCAL_CTK == '1' }} + uses: ./.github/actions/fetch_ctk + continue-on-error: false + with: + host-platform: ${{ inputs.host-platform }} + cuda-version: ${{ matrix.CUDA_VER }} + cuda-components: "cuda_nvcc,cuda_cudart,cuda_crt,libnvvm,cuda_nvrtc,cuda_cccl,libnvjitlink,cuda_cuobjdump" + +# - name: Set up latest cuda_sanitizer_api +# if: ${{ env.SETUP_SANITIZER == '1' }} +# uses: ./.github/actions/fetch_ctk +# continue-on-error: false +# with: +# host-platform: ${{ inputs.host-platform }} +# cuda-version: ${{ env.LATEST_CUDA_VERSION }} +# cuda-components: "cuda_sanitizer_api" +# +# - name: Set up compute-sanitizer +# run: setup-sanitizer + + - name: Run numba-cuda tests + if: ${{ env.SKIP_NUMBA_CUDA_TEST == '0' }} + env: + CUDA_VER: ${{ matrix.CUDA_VER }} + LOCAL_CTK: ${{ matrix.LOCAL_CTK }} + run: | + if [[ "${LOCAL_CTK}" != 1 ]]; then + export NUMBA_CUDA_TEST_WHEEL_ONLY=1 + fi + run-tests diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml new file mode 100644 index 000000000..3588dbca1 --- /dev/null +++ b/.github/workflows/test-wheel-windows.yml @@ -0,0 +1,155 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +name: "CI: Test wheels" + +on: + workflow_call: + inputs: + build-type: + type: string + required: true + host-platform: + type: string + required: true + matrix_filter: + type: string + default: "." + +jobs: + compute-matrix: + runs-on: ubuntu-latest + defaults: + run: + shell: bash --noprofile --norc -xeuo pipefail {0} + env: + BUILD_TYPE: ${{ inputs.build-type }} + ARCH: ${{ (inputs.host-platform == 'win-64' && 'amd64') }} + outputs: + MATRIX: ${{ steps.compute-matrix.outputs.MATRIX }} + steps: + - name: Checkout ${{ github.event.repository.name }} + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + + - name: Validate Test Type + run: | + if [[ "$BUILD_TYPE" != "pull-request" ]] && [[ "$BUILD_TYPE" != "nightly" ]] && [[ "$BUILD_TYPE" != "branch" ]]; then + echo "Invalid build type! Must be one of 'nightly', 'pull-request', or 'branch'." + exit 1 + fi + - name: Compute Python Test Matrix + id: compute-matrix + run: | + # Use the nightly matrix for branch tests + MATRIX_TYPE="${BUILD_TYPE}" + if [[ "${MATRIX_TYPE}" == "branch" ]]; then + MATRIX_TYPE="nightly" + fi + + # Read base matrix from JSON file for the specific architecture + TEST_MATRIX=$(jq --arg arch "$ARCH" --arg matrix_type "$MATRIX_TYPE" ' + .windows[$matrix_type] | + map(select(.ARCH == $arch)) + ' ci/test-matrix.json) + + MATRIX="$( + jq -c '${{ inputs.matrix_filter }} | if (. | length) > 0 then {include: .} else "Error: Empty matrix\n" | halt_error(1) end' <<< "$TEST_MATRIX" + )" + + echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}" + + test: + name: py${{ matrix.PY_VER }}, ${{ matrix.CUDA_VER }}, ${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}, ${{ matrix.GPU }} (${{ matrix.DRIVER_MODE }}) + # The build stage could fail but we want the CI to keep moving. + needs: compute-matrix + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }} + if: ${{ github.repository_owner == 'nvidia' && !cancelled() }} + runs-on: "windows-${{ matrix.ARCH }}-gpu-${{ matrix.GPU }}-${{ matrix.DRIVER }}-1" + steps: + - name: Checkout ${{ github.event.repository.name }} + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + + - name: Setup proxy cache + uses: nv-gha-runners/setup-proxy-cache@main + continue-on-error: true + + - name: Update driver + env: + DRIVER_MODE: ${{ matrix.DRIVER_MODE }} + GPU_TYPE: ${{ matrix.GPU }} + run: | + ci/tools/install_gpu_driver.ps1 + + - name: Ensure GPU is working + run: | + nvidia-smi + + $mode_output = nvidia-smi | Select-String -Pattern "${{ matrix.DRIVER_MODE }}" + Write-Output "Driver mode check: $mode_output" + if ("$mode_output" -eq "") { + Write-Error "Switching to driver mode ${{ matrix.DRIVER_MODE }} failed!" + exit 1 + } + Write-Output "Driver mode verified: ${{ matrix.DRIVER_MODE }}" + + - name: Set environment variables + env: + CUDA_VER: ${{ matrix.CUDA_VER }} + HOST_PLATFORM: ${{ inputs.host-platform }} + LOCAL_CTK: ${{ matrix.LOCAL_CTK }} + PY_VER: ${{ matrix.PY_VER }} + SHA: ${{ github.sha }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: ./ci/tools/env-vars test + + - name: Download numba-cuda build artifacts + if: ${{ env.SKIP_NUMBA_CUDA_TEST == '0'}} + uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 + with: + name: ${{ env.NUMBA_CUDA_ARTIFACT_NAME }} + path: ${{ env.NUMBA_CUDA_ARTIFACTS_DIR }} + + - name: Display structure of downloaded numba-cuda artifacts + run: | + Get-Location + Get-ChildItem -Recurse -Force $env:NUMBA_CUDA_ARTIFACTS_DIR | Select-Object Mode, LastWriteTime, Length, FullName + + - name: Download numba-cuda test artifacts + uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 + with: + name: ${{ env.NUMBA_CUDA_TEST_ARTIFACT_NAME }}-cu${{ env.TEST_CUDA_MAJOR }} + path: testing/ + + - name: Display structure of downloaded numba-cuda test artifacts + run: | + Get-Location + Get-ChildItem -Recurse -Force testing/ | Select-Object Mode, LastWriteTime, Length, FullName + + - name: Set up Python ${{ matrix.PY_VER }} + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + python-version: ${{ matrix.PY_VER }} + + - name: Set up mini CTK + if: ${{ matrix.LOCAL_CTK == '1' }} + uses: ./.github/actions/fetch_ctk + continue-on-error: false + with: + host-platform: ${{ inputs.host-platform }} + cuda-version: ${{ matrix.CUDA_VER }} + cuda-components: "cuda_nvcc,cuda_cudart,cuda_crt,libnvvm,cuda_nvrtc,cuda_cccl,libnvjitlink,cuda_cuobjdump" + + - name: Run numba-cuda tests + if: ${{ env.SKIP_NUMBA_CUDA_TEST == '0' }} + env: + CUDA_VER: ${{ matrix.CUDA_VER }} + LOCAL_CTK: ${{ matrix.LOCAL_CTK }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + if [[ "${LOCAL_CTK}" != 1 ]]; then + export NUMBA_CUDA_TEST_WHEEL_ONLY=1 + fi + run-tests diff --git a/ci/test-matrix.json b/ci/test-matrix.json new file mode 100644 index 000000000..6f3ad6944 --- /dev/null +++ b/ci/test-matrix.json @@ -0,0 +1,44 @@ +{ + "_description": "Test matrix configurations for CUDA Python CI workflows. This file consolidates the test matrices that were previously hardcoded in the workflow files. All GPU and ARCH values are hard-coded for each architecture: l4 GPU for amd64, a100 GPU for arm64.", + "_sorted_by": "Please keep matrices sorted in ascending order by [ARCH, PY_VER, CUDA_VER, LOCAL_CTK, GPU, DRIVER]. Windows entries also include DRIVER_MODE.", + "_notes": "DRIVER: 'earliest' does not work with CUDA 12.9.1 and LOCAL_CTK: 0 does not work with CUDA 12.0.1", + "linux": { + "pull-request": [ + { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "v100", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "v100", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest" }, + { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, + { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, + { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, + { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, + { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, + { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, + { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, + { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" } + ], + "nightly": [], + "special_runners": { + "amd64": [ + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "H100", "DRIVER": "latest" } + ] + } + }, + "windows": { + "pull-request": [ + { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "rtx2080", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, + { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "v100", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtx4090", "DRIVER": "latest", "DRIVER_MODE": "WDDM" }, + { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "MCDM" }, + { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest", "DRIVER_MODE": "TCC" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "rtxpro6000", "DRIVER": "latest", "DRIVER_MODE": "MCDM" } + ], + "nightly": [] + } +} diff --git a/ci/tools/download-wheels b/ci/tools/download-wheels new file mode 100755 index 000000000..8081966c0 --- /dev/null +++ b/ci/tools/download-wheels @@ -0,0 +1,75 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# A utility script to download component wheels from GitHub Actions artifacts. +# This script reuses the same logic that was in release.yml to maintain consistency. + +set -euo pipefail + +# Check required arguments +if [[ $# -lt 3 ]]; then + echo "Usage: $0 [output-dir]" >&2 + echo " run-id: The GitHub Actions run ID containing the artifacts" >&2 + echo " component: The component name pattern to download (e.g., cuda-core, cuda-bindings)" >&2 + echo " repository: The GitHub repository (e.g., NVIDIA/cuda-python)" >&2 + echo " output-dir: Optional output directory (default: ./dist)" >&2 + exit 1 +fi + +RUN_ID="$1" +COMPONENT="$2" +REPOSITORY="$3" +OUTPUT_DIR="${4:-./dist}" + +# Ensure we have a GitHub token +if [[ -z "${GH_TOKEN:-}" ]]; then + echo "Error: GH_TOKEN environment variable is required" + exit 1 +fi + +echo "Downloading wheels for component: $COMPONENT from run: $RUN_ID" + +# Download component wheels using the same logic as release.yml +if [[ "$COMPONENT" == "all" ]]; then + # Download all component patterns + gh run download "$RUN_ID" -p "numba*" -R "$REPOSITORY" +else + gh run download "$RUN_ID" -p "${COMPONENT}*" -R "$REPOSITORY" +fi + +# Create output directory +mkdir -p "$OUTPUT_DIR" + +# Process downloaded artifacts +for p in numba* +do + if [[ ! -d "$p" ]]; then + continue + fi + + # exclude cython test artifacts + if [[ "${p}" == *-tests ]]; then + echo "Skipping test artifact: $p" + continue + fi + + # If we're not downloading "all", only process matching component + if [[ "$COMPONENT" != "all" && "$p" != ${COMPONENT}* ]]; then + continue + fi + + echo "Processing artifact: $p" + # Move wheel files to output directory + if [[ -d "$p" ]]; then + find "$p" -name "*.whl" -exec mv {} "$OUTPUT_DIR/" \; + fi +done + +# Clean up artifact directories +rm -rf numba* + +echo "Downloaded wheels to: $OUTPUT_DIR" +ls -la "$OUTPUT_DIR" diff --git a/ci/tools/env-vars b/ci/tools/env-vars new file mode 100755 index 000000000..83f1145a1 --- /dev/null +++ b/ci/tools/env-vars @@ -0,0 +1,70 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# A utility script to set up the GitHub environment variables for the CI. + +set -euo pipefail + +# Check if the script was called with exactly 1 argument +if [[ ${#} -ne 1 ]]; then + echo "Error: This script requires exactly 1 argument (the build mode). You provided ${#}" + echo "Usage: ${0} build_mode[build or test]" + exit 1 +fi + +PYTHON_VERSION_FORMATTED=$(echo "${PY_VER}" | tr -d '.') + +if [[ "${HOST_PLATFORM}" == linux* ]]; then + REPO_DIR=$(pwd) + TOOLS_PATH="${REPO_DIR}/ci/tools" +elif [[ "${HOST_PLATFORM}" == win* ]]; then + PWD=$(pwd) + REPO_DIR=$(cygpath -w ${PWD}) + TOOLS_PATH=$(cygpath -w ${PWD}/ci/tools) +fi + +echo "${TOOLS_PATH}" >> $GITHUB_PATH +{ + echo "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" +} >> $GITHUB_ENV + +if [[ "${1}" == "build" ]]; then + # platform is handled by the default value of platform (`auto`) in cibuildwheel + # here we only need to specify the python version we want + echo "CIBW_BUILD=cp${PYTHON_VERSION_FORMATTED}-*" >> $GITHUB_ENV + NUMBA_CUDA_ARTIFACT_BASENAME="numba-cuda-python${PYTHON_VERSION_FORMATTED}-${HOST_PLATFORM}" + # Enforce an explicit cache dir so that we can reuse this path later + echo "SCCACHE_DIR=${HOME}/.cache/sccache" >> $GITHUB_ENV + echo "SCCACHE_CACHE_SIZE=1G" >> $GITHUB_ENV +elif [[ "${1}" == "test" ]]; then + TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${CUDA_VER})" + TEST_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${CUDA_VER})" + NUMBA_CUDA_ARTIFACT_BASENAME="numba-cuda-python${PYTHON_VERSION_FORMATTED}-${HOST_PLATFORM}" +# # We don't test compute-sanitizer on CTK<12 because backporting fixes is too much effort +# # We only test compute-sanitizer on python 3.12 arbitrarily; we don't need to use sanitizer on the entire matrix +# # Only local ctk installs have compute-sanitizer; there is no wheel for it +# if [[ "${PY_VER}" == "3.12" && "${CUDA_VER}" != "11.8.0" && "${LOCAL_CTK}" == 1 && "${HOST_PLATFORM}" == linux* ]]; then +# echo "LATEST_CUDA_VERSION=$(bash .github/workflows/guess_latest.sh $TEST_CUDA_MAJOR)" >> $GITHUB_ENV +# SETUP_SANITIZER=1 +# else +# SETUP_SANITIZER=0 +# echo "SANITIZER_CMD=" >> $GITHUB_ENV +# fi + { +# echo "SETUP_SANITIZER=${SETUP_SANITIZER}" +# echo "SKIP_NUMBA_CUDA_TEST=${SKIP_NUMBA_CUDA_TEST}" + echo "SANITIZER_CMD=" + echo "TEST_CUDA_MAJOR=${TEST_CUDA_MAJOR}" + echo "TEST_CUDA_MINOR=${TEST_CUDA_MINOR}" + } >> $GITHUB_ENV +fi + +{ + echo "NUMBA_CUDA_ARTIFACT_BASENAME=${NUMBA_CUDA_ARTIFACT_BASENAME}" + echo "NUMBA_CUDA_ARTIFACT_NAME=${NUMBA_CUDA_ARTIFACT_BASENAME}-${SHA}" + echo "NUMBA_CUDA_TEST_ARTIFACT_NAME=numba-cuda-${HOST_PLATFORM}-${SHA}-test" + echo "NUMBA_CUDA_ARTIFACTS_DIR=$(realpath "${REPO_DIR}/dist")" +} >> $GITHUB_ENV diff --git a/ci/tools/install_gpu_driver.ps1 b/ci/tools/install_gpu_driver.ps1 new file mode 100644 index 000000000..5602eeb48 --- /dev/null +++ b/ci/tools/install_gpu_driver.ps1 @@ -0,0 +1,82 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# Install the driver +function Install-Driver { + + # Set the correct URL, filename, and arguments to the installer + # This driver is picked to support Windows 11 & CUDA 13.0 + $version = '581.15' + + # Get GPU type from environment variable + $gpu_type = $env:GPU_TYPE + + $data_center_gpus = @('a100', 'h100', 'l4', 't4', 'v100', 'rtxa6000', 'rtx6000ada') + $desktop_gpus = @('rtx2080', 'rtx4090', 'rtxpro6000') + + if ($data_center_gpus -contains $gpu_type) { + Write-Output "Data center GPU detected: $gpu_type" + $filename="$version-data-center-tesla-desktop-winserver-2022-2025-dch-international.exe" + $server_path="tesla/$version" + } elseif ($desktop_gpus -contains $gpu_type) { + Write-Output "Desktop GPU detected: $gpu_type" + $filename="$version-desktop-win10-win11-64bit-international-dch-whql.exe" + $server_path="Windows/$version" + } else { + Write-Output "Unknown GPU type: $gpu_type" + exit 1 + } + + $url="https://us.download.nvidia.com/$server_path/$filename" + $filepath="C:\NVIDIA-Driver\$filename" + + Write-Output "Installing NVIDIA driver version $version for GPU type $gpu_type" + Write-Output "Download URL: $url" + + # Silent install arguments + $install_args = '/s /noeula /noreboot'; + + # Create the folder for the driver download + if (!(Test-Path -Path 'C:\NVIDIA-Driver')) { + New-Item -Path 'C:\' -Name 'NVIDIA-Driver' -ItemType 'directory' | Out-Null + } + + # Download the file to a specified directory + # Disabling progress bar due to https://github.com/GoogleCloudPlatform/compute-gpu-installation/issues/29 + $ProgressPreference_tmp = $ProgressPreference + $ProgressPreference = 'SilentlyContinue' + Write-Output 'Downloading the driver installer...' + Invoke-WebRequest $url -OutFile $filepath + $ProgressPreference = $ProgressPreference_tmp + Write-Output 'Download complete!' + + # Install the file with the specified path from earlier + Write-Output 'Running the driver installer...' + Start-Process -FilePath $filepath -ArgumentList $install_args -Wait + Write-Output 'Done!' + + # Handle driver mode configuration + # This assumes we have the prior knowledge on which GPU can use which mode. + $driver_mode = $env:DRIVER_MODE + if ($driver_mode -eq "WDDM") { + Write-Output "Setting driver mode to WDDM..." + nvidia-smi -fdm 0 + } elseif ($driver_mode -eq "TCC") { + Write-Output "Setting driver mode to TCC..." + nvidia-smi -fdm 1 + } elseif ($driver_mode -eq "MCDM") { + Write-Output "Setting driver mode to MCDM..." + nvidia-smi -fdm 2 + } else { + Write-Output "Unknown driver mode: $driver_mode" + exit 1 + } + pnputil /disable-device /class Display + pnputil /enable-device /class Display + # Give it a minute to settle: + Start-Sleep -Seconds 5 +} + +# Run the functions +Install-Driver diff --git a/ci/tools/lookup-run-id b/ci/tools/lookup-run-id new file mode 100755 index 000000000..db2f84b79 --- /dev/null +++ b/ci/tools/lookup-run-id @@ -0,0 +1,99 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# A utility script to find the GitHub Actions workflow run ID for a given git tag. +# This script looks for the CI workflow run that corresponds to the commit of the given tag. + +set -euo pipefail + +# Check required arguments +if [[ $# -lt 2 ]]; then + echo "Usage: $0 [workflow-name]" >&2 + echo " git-tag: The git tag to find the corresponding workflow run for" >&2 + echo " repository: The GitHub repository (e.g., NVIDIA/cuda-python)" >&2 + echo " workflow-name: Optional workflow name to filter by (default: CI)" >&2 + echo "" >&2 + echo "Examples:" >&2 + echo " $0 v13.0.1 NVIDIA/cuda-python" >&2 + echo " $0 v13.0.1 NVIDIA/cuda-python \"CI\"" >&2 + exit 1 +fi + +GIT_TAG="${1}" +REPOSITORY="${2}" +WORKFLOW_NAME="${3:-CI}" + +# Ensure we have required tools +if [[ -z "${GH_TOKEN:-}" ]]; then + echo "Error: GH_TOKEN environment variable is required" >&2 + exit 1 +fi + +if ! command -v jq >/dev/null 2>&1; then + echo "Error: jq is required but not installed" >&2 + exit 1 +fi + +if ! command -v gh >/dev/null 2>&1; then + echo "Error: GitHub CLI (gh) is required but not installed" >&2 + exit 1 +fi + +echo "Looking up run ID for tag: ${GIT_TAG} in repository: ${REPOSITORY}" >&2 + +# Resolve git tag to commit SHA +if ! COMMIT_SHA=$(git rev-parse "${GIT_TAG}"); then + echo "Error: Could not resolve git tag '${GIT_TAG}' to a commit SHA" >&2 + echo "Make sure the tag exists and you have fetched it" >&2 + exit 1 +fi + +echo "Resolved tag '${GIT_TAG}' to commit: ${COMMIT_SHA}" >&2 + +# Find workflow runs for this commit +echo "Searching for '${WORKFLOW_NAME}' workflow runs for commit: ${COMMIT_SHA}" >&2 + +# Get workflow runs for the commit, filter by workflow name and successful status +RUN_DATA=$(gh run list \ + --repo "${REPOSITORY}" \ + --commit "${COMMIT_SHA}" \ + --workflow "${WORKFLOW_NAME}" \ + --status completed \ + --json databaseId,workflowName,status,conclusion,headSha \ + --limit 10) + +if [[ -z "${RUN_DATA}" || "${RUN_DATA}" == "[]" ]]; then + echo "Error: No completed '${WORKFLOW_NAME}' workflow runs found for commit ${COMMIT_SHA}" >&2 + echo "Available workflow runs for this commit:" >&2 + gh run list --repo "${REPOSITORY}" --commit "${COMMIT_SHA}" --limit 10 || true + exit 1 +fi + +# Filter for successful runs (conclusion = success) and extract the run ID from the first one +RUN_ID=$(echo "${RUN_DATA}" | jq -r '.[] | select(.conclusion == "success") | .databaseId' | head -1) + +if [[ -z "${RUN_ID}" || "${RUN_ID}" == "null" ]]; then + echo "Error: No successful '${WORKFLOW_NAME}' workflow runs found for commit ${COMMIT_SHA}" >&2 + echo "Available workflow runs for this commit:" >&2 + gh run list --repo "$REPOSITORY" --commit "${COMMIT_SHA}" --limit 10 || true + echo "" >&2 + echo "Completed runs with their conclusions:" >&2 + echo "${RUN_DATA}" | jq -r '.[] | "\(.databaseId): \(.conclusion)"' >&2 + exit 1 +fi + +echo "Found workflow run ID: ${RUN_ID} for tag '${GIT_TAG}'" >&2 + +# Verify the run has the expected artifacts by checking if there are any artifacts +echo "Verifying artifacts exist for run ${RUN_ID}..." >&2 +ARTIFACT_LIST=$(gh run view "${RUN_ID}" --repo "${REPOSITORY}" --json url || echo "") + +if [[ -z "${ARTIFACT_LIST}" ]]; then + echo "Warning: Could not verify artifacts for workflow run ${RUN_ID}" >&2 +fi + +# Output the run ID (this is what gets used by calling scripts) +echo "${RUN_ID}" diff --git a/ci/tools/run-tests b/ci/tools/run-tests new file mode 100755 index 000000000..59eb1055e --- /dev/null +++ b/ci/tools/run-tests @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +# A utility script to install the correct packages and run the tests. + +set -euo pipefail + +echo "Installing numba-cuda wheel" +if [[ "${LOCAL_CTK}" == 1 ]]; then + pip install "${NUMBA_CUDA_ARTIFACTS_DIR}"/*.whl "cuda-bindings==${TEST_CUDA_MAJOR}.*" --group test +else + pip install $(ls "${NUMBA_CUDA_ARTIFACTS_DIR}"/*.whl)["cu${TEST_CUDA_MAJOR}"] "cuda-toolkit==${TEST_CUDA_MAJOR}.${TEST_CUDA_MINOR}.*" --group test +fi +echo "Running numba-cuda tests" +export NUMBA_CUDA_TEST_BIN_DIR=`pwd`/testing +pushd $NUMBA_CUDA_TEST_BIN_DIR +GPU_CC=$(nvidia-smi --query-gpu=compute_cap --format=csv | grep -v compute_cap | head -n 1 | sed 's/\.//') +mv cu${TEST_CUDA_MAJOR}_cc${GPU_CC}/* . +${SANITIZER_CMD} pytest -rxXs -v diff --git a/ci/versions.json b/ci/versions.json new file mode 100644 index 000000000..32b869833 --- /dev/null +++ b/ci/versions.json @@ -0,0 +1,10 @@ +{ + "cuda": { + "build": { + "version": "13.0.2" + }, + "prev_build": { + "version": "12.9.1" + } + } +} diff --git a/numba_cuda/numba/cuda/__init__.py b/numba_cuda/numba/cuda/__init__.py index d0ff4ba55..9f887a2ba 100644 --- a/numba_cuda/numba/cuda/__init__.py +++ b/numba_cuda/numba/cuda/__init__.py @@ -1,6 +1,8 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: BSD-2-Clause +# delvewheel: patch + import importlib from numba.cuda.core import config from .utils import _readenv @@ -23,7 +25,8 @@ ): raise ImportError( "NVIDIA CUDA Python bindings not found. Install the 'cuda' package " - "(e.g. pip install nvidia-cuda-python or numba-cuda[cuXY])." + '(e.g. pip install "cuda-bindings==XY.*" or "numba-cuda[cuXY]", ' + "with XY=12 or XY=13)." ) if config.ENABLE_CUDASIM: diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py b/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py index 4838ce0e0..ff51db4f1 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py @@ -43,6 +43,12 @@ TEST_BIN_DIR, "test_device_functions.ltoir" ) + require_cuobjdump = ( + test_device_functions_fatbin_multi, + test_device_functions_fatbin, + test_device_functions_o, + ) + @unittest.skipIf( not TEST_BIN_DIR or not _have_nvjitlink(), @@ -127,14 +133,22 @@ def tearDown(self): super().tearDown() def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly(self): - files = [ + files = ( test_device_functions_cu, test_device_functions_ltoir, test_device_functions_fatbin_multi, - ] + ) for file in files: with self.subTest(file=file): + if ( + file in require_cuobjdump + and os.getenv("NUMBA_CUDA_TEST_WHEEL_ONLY") is not None + ): + self.skipTest( + "wheel-only environments do not have cuobjdump" + ) + f = io.StringIO() with contextlib.redirect_stdout(f): sig = "uint32(uint32, uint32)" @@ -151,16 +165,24 @@ def kernel(result): self.assertTrue("ASSEMBLY (AFTER LTO)" in f.getvalue()) def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly_warn(self): - files = [ + files = ( test_device_functions_a, test_device_functions_cubin, test_device_functions_fatbin, test_device_functions_o, test_device_functions_ptx, - ] + ) for file in files: with self.subTest(file=file): + if ( + file in require_cuobjdump + and os.getenv("NUMBA_CUDA_TEST_WHEEL_ONLY") is not None + ): + self.skipTest( + "wheel-only environments do not have cuobjdump" + ) + sig = "uint32(uint32, uint32)" add_from_numba = cuda.declare_device("add_from_numba", sig) diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py b/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py index e7d30dc2c..2de769670 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py @@ -592,6 +592,12 @@ def atomic_cas_2dim(res, old, ary, fill_val): old[gid] = cuda.atomic.cas(res, gid, fill_val, ary[gid]) +@unittest.skipIf( + not config.ENABLE_CUDASIM + and cuda.get_current_device().compute_capability >= (12, 0) + and cuda.cudadrv.runtime.get_version()[0] == 12, + reason="NVVM 12.9 Bugged on CC 10+", +) class TestCudaAtomics(CUDATestCase): def setUp(self): super().setUp() diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_complex.py b/numba_cuda/numba/cuda/tests/cudapy/test_complex.py index 2437c9ace..d793f5604 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_complex.py @@ -9,6 +9,7 @@ from numba.cuda.testing import unittest, CUDATestCase from numba.cuda import types from numba import cuda +from numba.cuda import config from numba.cuda.tests.cudapy.complex_usecases import ( real_usecase, imag_usecase, @@ -333,6 +334,12 @@ def test_tanh(self): self.check_unary_func(tanh_usecase, ulps=2, ignore_sign_on_zero=True) +@unittest.skipIf( + not config.ENABLE_CUDASIM + and cuda.get_current_device().compute_capability >= (12, 0) + and cuda.cudadrv.runtime.get_version()[0] == 12, + reason="NVVM 12.9 Bugged on CC 10+", +) class TestAtomicOnComplexComponents(CUDATestCase): # Based on the reproducer from Issue #8309. array.real and array.imag could # not be used because they required returning an array from a generated diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py b/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py index f69ab496d..d86b9c35b 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py @@ -287,16 +287,17 @@ def use_vote_sync_all_with_mask(mask, predicate, result): valid_cases = [ # mask: unsigned/signed integer # predicate: unsigned/signed integer, boolean - ("void(uint32[:], uint32[:], int32[:])", np.uint32, np.uint32, 1), - ("void(int64[:], int64[:], int32[:])", np.int64, np.int64, 1), - ("void(uint64[:], uint64[:], int32[:])", np.uint64, np.uint64, 1), - ("void(int32[:], int32[:], int32[:])", np.int32, np.int32, 1), - ("void(uint32[:], boolean[:], int32[:])", np.uint32, np.bool_, 1), - ("void(uint64[:], boolean[:], int32[:])", np.uint64, np.bool_, 1), + ("void(uint32[:], uint32[:], int32[:])", np.uint32, np.uint32), + ("void(int64[:], int64[:], int32[:])", np.int64, np.int64), + ("void(uint64[:], uint64[:], int32[:])", np.uint64, np.uint64), + ("void(int32[:], int32[:], int32[:])", np.int32, np.int32), + ("void(uint32[:], boolean[:], int32[:])", np.uint32, np.bool_), + ("void(uint64[:], boolean[:], int32[:])", np.uint64, np.bool_), ] - for sig, mask_dtype, pred_dtype, mask_val in valid_cases: + for sig, mask_dtype, pred_dtype in valid_cases: with self.subTest(sig=sig): + mask_val = (~np.array(0, dtype=mask_dtype)).item() compiled = cuda.jit(sig)(use_vote_sync_all_with_mask) ary_mask = np.full(nelem, mask_val, dtype=mask_dtype) ary_pred = np.ones(nelem, dtype=pred_dtype) diff --git a/pixi.lock b/pixi.lock index 29ea8ddc6..6f5167fd0 100644 --- a/pixi.lock +++ b/pixi.lock @@ -15430,7 +15430,7 @@ packages: - numpy >=1.21,<3 license: BSD-2-Clause input: - hash: 374ed0f53cec9900fe88055c53fd85b4bb401a28b0f2e81241682223da95fed2 + hash: f4f870026faa6c5b05f9ae3b9ddcb8500d569eeafd6477b27702c588535c3418 globs: - pyproject.toml - conda: . @@ -15453,7 +15453,7 @@ packages: - numpy >=1.21,<3 license: BSD-2-Clause input: - hash: 374ed0f53cec9900fe88055c53fd85b4bb401a28b0f2e81241682223da95fed2 + hash: f4f870026faa6c5b05f9ae3b9ddcb8500d569eeafd6477b27702c588535c3418 globs: - pyproject.toml - conda: . @@ -15476,7 +15476,7 @@ packages: - numpy >=1.21,<3 license: BSD-2-Clause input: - hash: 374ed0f53cec9900fe88055c53fd85b4bb401a28b0f2e81241682223da95fed2 + hash: f4f870026faa6c5b05f9ae3b9ddcb8500d569eeafd6477b27702c588535c3418 globs: - pyproject.toml - conda: . @@ -15499,7 +15499,7 @@ packages: - numpy >=1.23,<3 license: BSD-2-Clause input: - hash: 374ed0f53cec9900fe88055c53fd85b4bb401a28b0f2e81241682223da95fed2 + hash: f4f870026faa6c5b05f9ae3b9ddcb8500d569eeafd6477b27702c588535c3418 globs: - pyproject.toml - conda: . @@ -15522,7 +15522,7 @@ packages: - numpy >=1.23,<3 license: BSD-2-Clause input: - hash: 374ed0f53cec9900fe88055c53fd85b4bb401a28b0f2e81241682223da95fed2 + hash: f4f870026faa6c5b05f9ae3b9ddcb8500d569eeafd6477b27702c588535c3418 globs: - pyproject.toml - conda: . @@ -15545,7 +15545,7 @@ packages: - numpy >=1.23,<3 license: BSD-2-Clause input: - hash: 374ed0f53cec9900fe88055c53fd85b4bb401a28b0f2e81241682223da95fed2 + hash: f4f870026faa6c5b05f9ae3b9ddcb8500d569eeafd6477b27702c588535c3418 globs: - pyproject.toml - conda: . @@ -15568,7 +15568,7 @@ packages: - numpy >=1.23,<3 license: BSD-2-Clause input: - hash: 374ed0f53cec9900fe88055c53fd85b4bb401a28b0f2e81241682223da95fed2 + hash: f4f870026faa6c5b05f9ae3b9ddcb8500d569eeafd6477b27702c588535c3418 globs: - pyproject.toml - conda: . @@ -15591,7 +15591,7 @@ packages: - numpy >=1.23,<3 license: BSD-2-Clause input: - hash: 374ed0f53cec9900fe88055c53fd85b4bb401a28b0f2e81241682223da95fed2 + hash: f4f870026faa6c5b05f9ae3b9ddcb8500d569eeafd6477b27702c588535c3418 globs: - pyproject.toml - conda: . @@ -15614,7 +15614,7 @@ packages: - numpy >=1.23,<3 license: BSD-2-Clause input: - hash: 374ed0f53cec9900fe88055c53fd85b4bb401a28b0f2e81241682223da95fed2 + hash: f4f870026faa6c5b05f9ae3b9ddcb8500d569eeafd6477b27702c588535c3418 globs: - pyproject.toml - conda: . @@ -15637,7 +15637,7 @@ packages: - numpy >=1.23,<3 license: BSD-2-Clause input: - hash: 374ed0f53cec9900fe88055c53fd85b4bb401a28b0f2e81241682223da95fed2 + hash: f4f870026faa6c5b05f9ae3b9ddcb8500d569eeafd6477b27702c588535c3418 globs: - pyproject.toml - conda: . @@ -15660,7 +15660,7 @@ packages: - numpy >=1.23,<3 license: BSD-2-Clause input: - hash: 374ed0f53cec9900fe88055c53fd85b4bb401a28b0f2e81241682223da95fed2 + hash: f4f870026faa6c5b05f9ae3b9ddcb8500d569eeafd6477b27702c588535c3418 globs: - pyproject.toml - conda: . @@ -15683,7 +15683,7 @@ packages: - numpy >=1.23,<3 license: BSD-2-Clause input: - hash: 374ed0f53cec9900fe88055c53fd85b4bb401a28b0f2e81241682223da95fed2 + hash: f4f870026faa6c5b05f9ae3b9ddcb8500d569eeafd6477b27702c588535c3418 globs: - pyproject.toml - pypi: https://files.pythonhosted.org/packages/5e/a6/9ca0eecc489640615642a6cbc0ca9e10df70df38c4d43f5a928ff18d8827/numpy-2.3.5-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl diff --git a/pyproject.toml b/pyproject.toml index 42e83bf8d..f49a8c2dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,23 +27,13 @@ dependencies = ["numba>=0.60.0", "cuda-bindings>=12.9.1,<14.0.0", "cuda-core>=0. cu12 = [ "cuda-bindings>=12.9.1,<13.0.0", "cuda-core>=0.3.0,<1.0.0", - "cuda-python==12.9.*", # supports all CTK 12.x - "nvidia-cuda-nvcc-cu12", # for libNVVM - "nvidia-cuda-runtime-cu12", - "nvidia-cuda-nvrtc-cu12", - "nvidia-nvjitlink-cu12", - "nvidia-cuda-cccl-cu12", + # install nvcc for libNVVM + "cuda-toolkit[cudart,nvcc,nvrtc,nvjitlink,cccl]==12.*", ] -# TODO: Use cuda-toolkit package dependencies - e.g. cuda-toolkit[curand,nvvm,nvrtc]=13.* cu13 = [ "cuda-bindings==13.*", "cuda-core>=0.3.2,<1.0.0", - "cuda-python==13.*", - "nvidia-nvvm==13.*", - "nvidia-cuda-runtime==13.*", - "nvidia-cuda-nvrtc==13.*", - "nvidia-nvjitlink==13.*", - "nvidia-cuda-cccl==13.*", + "cuda-toolkit[cudart,nvvm,nvrtc,nvjitlink,cccl]==13.*", ] [dependency-groups] @@ -156,3 +146,24 @@ exclude = [ [tool.pyrefly] search-path = ["./numba_cuda"] + +[tool.cibuildwheel] +skip = "*-musllinux_*" +enable = "cpython-freethreading" +build-verbosity = 1 + +[tool.cibuildwheel.linux] +archs = "native" +before-build = "pip install twine" +repair-wheel-command = [ + "auditwheel repair -w {dest_dir} {wheel}", + "twine check --strict {dest_dir}/*", +] + +[tool.cibuildwheel.windows] +archs = "AMD64" +before-build = "pip install delvewheel twine" +repair-wheel-command = [ + "delvewheel repair --custom-patch -w {dest_dir} {wheel}", + "twine check --strict {dest_dir}/*", +] diff --git a/testing/Makefile b/testing/Makefile index 2d9c0d138..be015e962 100644 --- a/testing/Makefile +++ b/testing/Makefile @@ -31,9 +31,16 @@ MULTI_FATBIN_GENCODE := $(MULTI_GENCODE) -gencode arch=compute_$(ALT_CC),code=[s # LTO-IR tests need to generate for the LTO "architecture" instead LTOIR_GENCODE := -gencode arch=lto_$(GPU_CC),code=lto_$(GPU_CC) +# In CI we use sccache. Note that sccache does not support generating fatbin or ltoir. +ifeq ($(shell command -v sccache 2>&1 >/dev/null; echo $$?),0) + SCCACHE := sccache +else + SCCACHE := +endif + # Compile with optimization; use relocatable device code to preserve device # functions in the final output -NVCC_FLAGS := -O3 -rdc true +NVCC_FLAGS := -O3 -rdc true -std=c++17 # Flags specific to output type CUBIN_FLAGS := $(GENCODE) --cubin @@ -46,13 +53,13 @@ LTOIR_FLAGS := $(LTOIR_GENCODE) -dc OUTPUT_DIR := ./ -NRT_INCLUDE_DIR := $(shell python -c "from numba.cuda.memory_management.nrt import get_include; print(get_include())") +NRT_INCLUDE_DIR := "$(shell python -c "from numba.cuda.memory_management.nrt import get_include; print(get_include())")" $(OUTPUT_DIR)/undefined_extern.cubin: undefined_extern.cu - nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $@ $< + $(SCCACHE) nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $@ $< $(OUTPUT_DIR)/test_device_functions.cubin: test_device_functions.cu - nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $@ $< + $(SCCACHE) nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $@ $< $(OUTPUT_DIR)/test_device_functions.fatbin: test_device_functions.cu nvcc $(NVCC_FLAGS) $(FATBIN_FLAGS) -o $@ $< @@ -61,10 +68,10 @@ $(OUTPUT_DIR)/test_device_functions_multi.fatbin: test_device_functions.cu nvcc $(NVCC_FLAGS) $(MULTI_FATBIN_FLAGS) -o $@ $< $(OUTPUT_DIR)/test_device_functions.ptx: test_device_functions.cu - nvcc $(NVCC_FLAGS) $(PTX_FLAGS) -o $@ $< + $(SCCACHE) nvcc $(NVCC_FLAGS) $(PTX_FLAGS) -o $@ $< $(OUTPUT_DIR)/test_device_functions.o: test_device_functions.cu - nvcc $(NVCC_FLAGS) $(OBJECT_FLAGS) -o $@ $< + $(SCCACHE) nvcc $(NVCC_FLAGS) $(OBJECT_FLAGS) -o $@ $< $(OUTPUT_DIR)/test_device_functions.a: test_device_functions.cu nvcc $(NVCC_FLAGS) $(LIBRARY_FLAGS) -o $@ $< @@ -86,7 +93,7 @@ test_device_functions: $(OUTPUT_DIR)/test_device_functions.cubin \ $(OUTPUT_DIR)/test_device_functions.ltoir $(OUTPUT_DIR)/nrt_extern.cubin: nrt_extern.cu - nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $@ $< -I$(NRT_INCLUDE_DIR) + $(SCCACHE) nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $@ $< -I$(NRT_INCLUDE_DIR) $(OUTPUT_DIR)/nrt_extern.fatbin: nrt_extern.cu nvcc $(NVCC_FLAGS) $(FATBIN_FLAGS) -o $@ $< -I$(NRT_INCLUDE_DIR) @@ -95,10 +102,10 @@ $(OUTPUT_DIR)/nrt_extern_multi.fatbin: nrt_extern.cu nvcc $(NVCC_FLAGS) $(MULTI_FATBIN_FLAGS) -o $@ $< -I$(NRT_INCLUDE_DIR) $(OUTPUT_DIR)/nrt_extern.ptx: nrt_extern.cu - nvcc $(NVCC_FLAGS) $(PTX_FLAGS) -o $@ $< -I$(NRT_INCLUDE_DIR) + $(SCCACHE) nvcc $(NVCC_FLAGS) $(PTX_FLAGS) -o $@ $< -I$(NRT_INCLUDE_DIR) $(OUTPUT_DIR)/nrt_extern.o: nrt_extern.cu - nvcc $(NVCC_FLAGS) $(OBJECT_FLAGS) -o $@ $< -I$(NRT_INCLUDE_DIR) + $(SCCACHE) nvcc $(NVCC_FLAGS) $(OBJECT_FLAGS) -o $@ $< -I$(NRT_INCLUDE_DIR) $(OUTPUT_DIR)/nrt_extern.a: nrt_extern.cu nvcc $(NVCC_FLAGS) $(LIBRARY_FLAGS) -o $@ $< -I$(NRT_INCLUDE_DIR) diff --git a/testing/generate_raw_ltoir.py b/testing/generate_raw_ltoir.py index 104aeaeaf..a40ea7b27 100644 --- a/testing/generate_raw_ltoir.py +++ b/testing/generate_raw_ltoir.py @@ -105,6 +105,7 @@ def get_ltoir(source, name, arch): "-dlto", "-rdc", "true", + "-std=c++17", *cuda_include_flags, ] options = [o.encode() for o in options] diff --git a/testing/pytest.ini b/testing/pytest.ini index d1050355e..847e3f159 100644 --- a/testing/pytest.ini +++ b/testing/pytest.ini @@ -24,4 +24,5 @@ filterwarnings = ignore:\nCompilation is falling back to object mode WITHOUT looplifting enabled.*:numba.core.errors.NumbaWarning ignore:overflow encountered in scalar .+:RuntimeWarning ignore:.*Host array used in CUDA kernel will incur copy overhead.*:numba.cuda.core.errors.NumbaPerformanceWarning + ignore:NVRTC log messages.*Architectures prior to.*are deprecated.*:UserWarning ignore:Benchmark machine_info is different:pytest_benchmark.logger.PytestBenchmarkWarning