Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions .github/workflows/linux_cuda_plugin_ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
name: CUDA Plugin Linux CI

on:
push:
branches: [main, 'rel-*']
pull_request:
branches: [main, 'rel-*']
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }}
cancel-in-progress: true

permissions:
contents: read
packages: write
attestations: write
id-token: write

jobs:
build-linux-cuda-plugin-x64-release:
name: Build Linux CUDA Plugin EP x64 Release
uses: ./.github/workflows/reusable_linux_build.yml
with:
pool_name: "onnxruntime-github-Ubuntu2204-AMD-CPU"
build_config: Release
architecture: x64
dockerfile_path: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1'
docker_image_repo: onnxruntimecuda12manylinuxbuild
extra_build_flags: >-
--use_binskim_compliant_compile_flags
--build_wheel
--parallel
--nvcc_threads 1
--cuda_version=12.8
--cuda_home=/usr/local/cuda-12.8
--cudnn_home=/usr/local/cuda-12.8
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
--cmake_extra_defines onnxruntime_BUILD_CUDA_EP_AS_PLUGIN=ON
python_path_prefix: 'PATH=/opt/python/cp312-cp312/bin:$PATH'
run_tests: false
upload_build_output: true
execution_providers: 'cuda'
job_identifier: build-linux-cuda-plugin-x64-release
secrets:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

test-linux-cuda-plugin-x64-release:
name: Test Linux CUDA Plugin EP x64 Release
needs: build-linux-cuda-plugin-x64-release
runs-on:
- self-hosted
- "1ES.Pool=onnxruntime-github-linux-a10"
- "JobId=test-linux-cuda-plugin-x64-release-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
permissions:
contents: read
packages: read
steps:
- name: Checkout code
uses: actions/checkout@v6

- uses: microsoft/onnxruntime-github-actions/build-docker-image@8bad63a3c05d448311dfa8e5f531171c97471aa1 # v0.0.12
id: build_docker_image_step
with:
dockerfile: ${{ github.workspace }}/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
image-name: ghcr.io/microsoft/onnxruntime/onnxruntimecuda12manylinuxbuild
build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1'
push: true
azure-container-registry-name: onnxruntimebuildcache
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# --- Download Build Artifact to Runner Temp Directory ---
- name: Download Build Artifact
uses: actions/download-artifact@v7
with:
name: build-output-x64-Release
path: ${{ runner.temp }}/Release

# --- Restore Permissions in the Temp Directory ---
- name: Restore Executable Permissions
if: success()
working-directory: ${{ runner.temp }}/Release
run: |
if [ -f perms.txt ]; then
echo "Restoring executable permissions in ${{ runner.temp }}/Release ..."
while IFS= read -r file; do
if [ -f "$file" ]; then
chmod +x "$file"
else
echo "Warning: File '$file' listed in perms.txt not found."
fi
done < perms.txt
echo "Permissions restored."
else
echo "Warning: perms.txt not found in artifact."
fi

# --- Install the ORT wheel and run CUDA plugin EP tests ---
- name: Run CUDA Plugin EP Python Tests
run: |
docker run --rm --gpus all \
-v ${{ github.workspace }}:/onnxruntime_src \
-v ${{ runner.temp }}/Release:/build/Release \
-e NVIDIA_VISIBLE_DEVICES=all \
${{ steps.build_docker_image_step.outputs.full-image-name }} \
bash -c "
set -ex
export PATH=/opt/python/cp312-cp312/bin:\$PATH

# Install the ORT wheel
python -m pip install /build/Release/Release/dist/onnxruntime*.whl

# Install test dependencies
python -m pip install numpy onnx
python -m pip install torch --index-url https://download.pytorch.org/whl/cpu

# Set plugin path and run tests
export ORT_CUDA_PLUGIN_PATH=/build/Release/Release/libonnxruntime_providers_cuda_plugin.so
echo \"ORT_CUDA_PLUGIN_PATH=\$ORT_CUDA_PLUGIN_PATH\"
ls -la \$ORT_CUDA_PLUGIN_PATH

cd /onnxruntime_src/onnxruntime/test/python/transformers
python test_cuda_plugin_ep.py
"
207 changes: 207 additions & 0 deletions .github/workflows/windows_cuda_plugin.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
name: CUDA Plugin Windows CI

on:
push:
branches:
- main
- rel-*
pull_request:
branches:
- main
- rel-*
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }}
cancel-in-progress: true

jobs:
build:
name: Windows CUDA Plugin EP Build
runs-on: [
"self-hosted",
"1ES.Pool=onnxruntime-github-vs2022-latest",
"JobId=windows-cuda-plugin-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
]
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
submodules: 'none'

- uses: actions/setup-python@v6
with:
python-version: '3.14'
architecture: x64

- name: Locate vcvarsall and Setup Env
uses: ./.github/actions/locate-vcvarsall-and-setup-env
with:
architecture: x64

- name: Install python modules
run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt
working-directory: ${{ github.workspace }}
shell: cmd

- name: Download CUDA SDK v12.8
working-directory: ${{ runner.temp }}
run: |
azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v12.8" .
dir
shell: pwsh

- name: Add CUDA to PATH
shell: powershell
run: |
Write-Host "Adding CUDA to PATH"
Write-Host "CUDA Path: $env:RUNNER_TEMP\v12.8\bin"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\bin"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\extras\CUPTI\lib64"

- name: Set OnnxRuntimeBuildDirectory
shell: pwsh
run: |
$buildDir = Join-Path ${{ runner.temp }} "build"
echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV

- name: Build ONNX Runtime with CUDA Plugin EP
working-directory: ${{ runner.temp }}
run: |
python.exe ${{ github.workspace }}\tools\ci_build\build.py `
--update --build --config Release `
--build_dir build `
--skip_submodule_sync `
--parallel `
--nvcc_threads 1 `
--use_binskim_compliant_compile_flags `
--cmake_generator "Visual Studio 17 2022" `
--build_shared_lib `
--build_wheel `
--use_cuda `
--cuda_home="$env:RUNNER_TEMP\v12.8" `
--skip_tests `
--use_vcpkg `
--use_vcpkg_ms_internal_asset_cache `
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 `
--cmake_extra_defines onnxruntime_BUILD_CUDA_EP_AS_PLUGIN=ON

if ($lastExitCode -ne 0) {
exit $lastExitCode
}

# Clean up intermediate files before uploading artifacts
$outputDir = "${{ runner.temp }}\build\Release"
Write-Host "Cleaning up files from $outputDir..."

Remove-Item -Path "$outputDir\onnxruntime" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\pybind11" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\models" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\vcpkg_installed" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\_deps" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\CMakeCache.txt" -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\CMakeFiles" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path $outputDir -Include "*.obj" -Recurse
shell: pwsh

- name: Upload build artifacts
uses: actions/upload-artifact@v6
with:
name: cuda-plugin-build-artifacts
path: ${{ runner.temp }}\build
env:
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
setVcvars: true
ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4

test:
name: Windows CUDA Plugin EP Test
needs: build
timeout-minutes: 120
runs-on: [
"self-hosted",
"1ES.Pool=onnxruntime-github-Win2022-GPU-A10",
"JobId=windows-cuda-plugin-test-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
]
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
submodules: 'none'

- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: cuda-plugin-build-artifacts
path: ${{ runner.temp }}\build

- uses: actions/setup-python@v6
with:
python-version: '3.14'
architecture: x64

- name: Locate vcvarsall and Setup Env
uses: ./.github/actions/locate-vcvarsall-and-setup-env
with:
architecture: x64

- name: Install python modules
run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt
working-directory: ${{ github.workspace }}
shell: cmd

- name: Install torch for CPU only
run: python -m pip install torch
working-directory: ${{ github.workspace }}
shell: cmd

- name: Download CUDA SDK v12.8
working-directory: ${{ runner.temp }}
run: |
azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v12.8" .
dir
shell: pwsh

- name: Add CUDA to PATH
shell: powershell
run: |
Write-Host "Adding CUDA to PATH"
Write-Host "CUDA Path: $env:RUNNER_TEMP\v12.8\bin"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\bin"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\extras\CUPTI\lib64"

- name: Set OnnxRuntimeBuildDirectory
shell: pwsh
run: |
$buildDir = Join-Path ${{ runner.temp }} "build"
echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV

- name: Install ONNX Runtime Wheel
uses: ./.github/actions/install-onnxruntime-wheel
with:
whl-directory: ${{ runner.temp }}\build\Release\Release\dist

- name: Run CUDA Plugin EP Python Tests
working-directory: ${{ github.workspace }}\onnxruntime\test\python\transformers
shell: pwsh
run: |
$env:ORT_CUDA_PLUGIN_PATH = "${{ runner.temp }}\build\Release\Release\onnxruntime_providers_cuda_plugin.dll"
Write-Host "ORT_CUDA_PLUGIN_PATH=$env:ORT_CUDA_PLUGIN_PATH"
if (-not (Test-Path $env:ORT_CUDA_PLUGIN_PATH)) {
Write-Error "CUDA plugin EP library not found at $env:ORT_CUDA_PLUGIN_PATH"
exit 1
}
python test_cuda_plugin_ep.py
if ($lastExitCode -ne 0) {
exit $lastExitCode
}
env:
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
setVcvars: true
ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
16 changes: 14 additions & 2 deletions cmake/onnxruntime_providers_cuda_plugin.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,22 @@ target_compile_options(onnxruntime_providers_cuda_plugin PRIVATE
"$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--std c++20>"
"$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr;-Xcudafe;--diag_suppress=550>"
"$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcudafe --diag_suppress=2810>"
"$<$<COMPILE_LANGUAGE:CXX>:-include;${REPO_ROOT}/include/onnxruntime/ep/adapters.h>"
"$<$<COMPILE_LANGUAGE:CXX>:SHELL:-include ${CUDA_PLUGIN_EP_DIR}/cuda_kernel_adapter.h>"
)

# Force-include adapter headers for CXX files.
# MSVC uses /FI; GCC/Clang use -include.
if (MSVC)
target_compile_options(onnxruntime_providers_cuda_plugin PRIVATE
"$<$<COMPILE_LANGUAGE:CXX>:SHELL:/FI \"${REPO_ROOT}/include/onnxruntime/ep/adapters.h\">"
"$<$<COMPILE_LANGUAGE:CXX>:SHELL:/FI \"${CUDA_PLUGIN_EP_DIR}/cuda_kernel_adapter.h\">"
)
else()
target_compile_options(onnxruntime_providers_cuda_plugin PRIVATE
"$<$<COMPILE_LANGUAGE:CXX>:SHELL:-include ${REPO_ROOT}/include/onnxruntime/ep/adapters.h>"
"$<$<COMPILE_LANGUAGE:CXX>:SHELL:-include ${CUDA_PLUGIN_EP_DIR}/cuda_kernel_adapter.h>"
)
endif()

if (MSVC)
target_compile_options(onnxruntime_providers_cuda_plugin PRIVATE
"$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /permissive>"
Expand Down
Loading
Loading