Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
283 changes: 283 additions & 0 deletions .github/workflows/windows_tensorrt_rtx.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
name: Windows GPU TensorRT RTX CI Pipeline

on:
push:
branches:
- main
- rel-*
pull_request:
branches:
- main
- rel-*
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }}
cancel-in-progress: true

#TODO: enable --build_nodejs
jobs:
build:
name: Windows GPU TensorRT RTX CI Pipeline
runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-vs2022-latest"]
steps:
- uses: actions/checkout@v5
with:
fetch-depth: 0
submodules: 'none'

- uses: actions/setup-python@v6
with:
python-version: '3.12'
architecture: x64

- name: Locate vcvarsall and Setup Env
uses: ./.github/actions/locate-vcvarsall-and-setup-env
with:
architecture: x64

- name: Install python modules
run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt
working-directory: ${{ github.workspace }}
shell: cmd

- name: Download CUDA SDK v12.8
working-directory: ${{ runner.temp }}
run: |
azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v12.8" .
dir
shell: pwsh

- name: Download TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9
run: 'azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9" ${{ runner.temp }}'
shell: pwsh

- name: Add CUDA to PATH
shell: powershell
run: |
Write-Host "Adding CUDA to PATH"
Write-Host "CUDA Path: $env:RUNNER_TEMP\v12.8\bin"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\bin"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\extras\CUPTI\lib64"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9\lib"

- uses: actions/setup-node@v5
with:
node-version: '20.x'

- uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: '17'
architecture: x64

- uses: actions/cache@v4
id: onnx-node-tests-cache
with:
path: ${{ github.workspace }}/js/test/
key: onnxnodetests-${{ hashFiles('js/scripts/prepare-onnx-node-tests.ts') }}

- name: API Documentation Check and generate
run: |
set ORT_DOXY_SRC=${{ github.workspace }}
set ORT_DOXY_OUT=${{ runner.temp }}\build\RelWithDebInfo\RelWithDebInfo
mkdir %ORT_DOXY_SRC%
mkdir %ORT_DOXY_OUT%
"C:\Program Files\doxygen\bin\doxygen.exe" ${{ github.workspace }}\tools\ci_build\github\Doxyfile_csharp.cfg
working-directory: ${{ github.workspace }}
shell: cmd

- uses: actions/setup-dotnet@v5
env:
PROCESSOR_ARCHITECTURE: x64
with:
dotnet-version: '8.x'

- name: Use Nuget 6.x
uses: nuget/setup-nuget@v2
with:
nuget-version: '6.x'

- name: NuGet restore
run: nuget restore ${{ github.workspace }}\packages.config -ConfigFile ${{ github.workspace }}\NuGet.config -PackagesDirectory ${{ runner.temp }}\build\RelWithDebInfo
shell: cmd

- name: Set OnnxRuntimeBuildDirectory
shell: pwsh
run: |
$buildDir = Join-Path ${{ runner.temp }} "build"
echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV

- name: Build and Clean Binaries
working-directory: ${{ runner.temp }}
run: |
npm install -g typescript
if ($lastExitCode -ne 0) {
exit $lastExitCode
}
# Execute the build process
python ${{ github.workspace }}\tools\ci_build\build.py --config RelWithDebInfo --parallel --use_binskim_compliant_compile_flags --build_dir build --skip_submodule_sync --build_shared_lib --build --update --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_cuda --use_nv_tensorrt_rtx --tensorrt_rtx_home="${{ runner.temp }}\TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9" --cuda_home="${{ runner.temp }}\v12.8" --use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
if ($lastExitCode -ne 0) {
exit $lastExitCode
}

# Clean up the output directory before uploading artifacts
$outputDir = "${{ runner.temp }}\build\RelWithDebInfo"
Write-Host "Cleaning up files from $outputDir..."

Remove-Item -Path "$outputDir\onnxruntime" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\pybind11" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\models" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\vcpkg_installed" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\_deps" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\CMakeCache.txt" -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\CMakeFiles" -Recurse -Force -ErrorAction SilentlyContinue
# Remove intermediate object files as in the original script
Remove-Item -Path $outputDir -Include "*.obj" -Recurse
shell: pwsh

- name: Upload build artifacts
uses: actions/upload-artifact@v4
with:
name: build-artifacts
path: ${{ runner.temp }}\build
env:
OrtPackageId: Microsoft.ML.OnnxRuntime.Gpu
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
setVcvars: true
ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
DocUpdateNeeded: false
ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
ORT_UNIT_TEST_MAIN_LOG_LEVEL: 0

test:
name: Windows GPU TensorRT RTX CI Pipeline Test Job
needs: build
timeout-minutes: 300
runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-Win2022-GPU-A10"]
steps:
- uses: actions/checkout@v5
with:
fetch-depth: 0
submodules: 'none'

- name: Download build artifacts
uses: actions/download-artifact@v5
with:
name: build-artifacts
path: ${{ runner.temp }}\build

- uses: actions/setup-python@v6
with:
python-version: '3.12'
architecture: x64

- uses: actions/setup-node@v5
with:
node-version: '20.x'

- uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: '17'
architecture: x64

- name: Locate vcvarsall and Setup Env
uses: ./.github/actions/locate-vcvarsall-and-setup-env
with:
architecture: x64

- name: Install python modules
run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt
working-directory: ${{ github.workspace }}
shell: cmd

- name: Download CUDA SDK v12.8
working-directory: ${{ runner.temp }}
run: |
azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v12.8" .
dir
shell: pwsh

- name: Download TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9
run: 'azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9" ${{ runner.temp }}'
shell: pwsh

- name: Add CUDA to PATH
shell: powershell
run: |
Write-Host "Adding CUDA to PATH"
Write-Host "CUDA Path: $env:RUNNER_TEMP\v12.8\bin"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\bin"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\extras\CUPTI\lib64"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9\lib"

- name: Set OnnxRuntimeBuildDirectory
shell: pwsh
run: |
$buildDir = Join-Path ${{ runner.temp }} "build"
echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV

- name: Install ONNX Runtime Wheel
uses: ./.github/actions/install-onnxruntime-wheel
with:
whl-directory: ${{ runner.temp }}\build\RelWithDebInfo\RelWithDebInfo\dist

- name: Show total physical memory
run: |
$ram = (Get-CimInstance Win32_ComputerSystem).TotalPhysicalMemory
$ramGB = [math]::Round($ram / 1GB, 2)
Write-Host "Total RAM: $ramGB GB"
shell: pwsh

- name: nvidia-smi
shell: powershell
run: |
nvidia-smi.exe
nvidia-smi.exe --query-gpu=memory.total,memory.used,memory.free --format=csv

- name: Run Tests
working-directory: ${{ runner.temp }}
run: |
npm install -g typescript
if ($lastExitCode -ne 0) {
exit $lastExitCode
}

python.exe ${{ github.workspace }}\tools\python\update_ctest_path.py "${{ runner.temp }}\build\RelWithDebInfo\CTestTestfile.cmake" "${{ runner.temp }}\build\RelWithDebInfo"
if ($lastExitCode -ne 0) {
exit $lastExitCode
}

python ${{ github.workspace }}\tools\ci_build\build.py --config RelWithDebInfo --parallel --use_binskim_compliant_compile_flags --build_dir build --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_nv_tensorrt_rtx --tensorrt_rtx_home="${{ runner.temp }}\TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9" --cuda_home="${{ runner.temp }}\v12.8" --use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
if ($lastExitCode -ne 0) {
exit $lastExitCode
}
shell: pwsh

- name: Check GPU memory usage and fail if >90%
shell: powershell
run: |
$csv = nvidia-smi --query-gpu=memory.used,memory.total --format=csv,noheader
$parts = $csv -split ','
$used = [int]($parts[0] -replace ' MiB','')
$total = [int]($parts[1] -replace ' MiB','')
$percent = ($used / $total) * 100
Write-Host "GPU memory used: $used MiB / $total MiB ($percent %)"
if ($percent -gt 90) { throw "GPU memory usage exceeded threshold ($percent%)" }

- name: Validate C# native delegates
run: python tools\ValidateNativeDelegateAttributes.py
working-directory: ${{ github.workspace }}\csharp
shell: cmd
env:
OrtPackageId: Microsoft.ML.OnnxRuntime.Gpu
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
setVcvars: true
ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
DocUpdateNeeded: false
ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
3 changes: 2 additions & 1 deletion cmake/onnxruntime_providers_nv.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Licensed under the MIT License.
find_package(CUDAToolkit REQUIRED 12.8)
enable_language(CUDA)
# No need to do this here if using onnxruntime build script, because "enable_language(CUDA)" already being called in CMakeLists.txt if CUDA EP is enabled.
#enable_language(CUDA)
if(onnxruntime_DISABLE_CONTRIB_OPS)
message( FATAL_ERROR "To compile TensorRT execution provider contrib ops have to be enabled to dump an engine using com.microsoft:EPContext node." )
endif()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1736,7 +1736,9 @@ SubGraphCollection_t NvExecutionProvider::GetSupportedList(SubGraphCollection_t
TensorrtLogger& trt_logger = GetTensorrtLogger(detailed_build_log_);
auto trt_builder = GetBuilder(trt_logger);
auto network_flags = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED);
LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] After createNetworkV2()";
auto trt_network = std::unique_ptr<nvinfer1::INetworkDefinition>(trt_builder->createNetworkV2(network_flags));
LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] After createNetworkV2()";

bool is_model_supported = false;

Expand All @@ -1755,8 +1757,10 @@ SubGraphCollection_t NvExecutionProvider::GetSupportedList(SubGraphCollection_t
ORT_THROW("'nv_use_external_data_initializer' is only supported on TensorRT RTX 1.1.x.x and above.");
#endif
} else {
LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] Before supportsModelV2()";
is_model_supported = trt_parser->supportsModelV2(string_buf.data(), string_buf.size(), model_path_);
}
LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] After supportsModelV2()";

// Note: Calling getNbSubgraphs or getSubgraphNodes before calling supportsModelV2 results in undefined behavior.
auto num_subgraphs = trt_parser->getNbSubgraphs();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,15 +276,16 @@ onnxruntime::NodeArg& AddGroupQueryAttention(
void CreateLargeLLMModel(const PathString& model_path, const PathString& external_data_path) {
// Model parameters (example: 24 layers, 4096 hidden dim, 32 attention heads, 8 kv heads => GQA)
int batch_size = 1;
int num_layers = 32;
int hidden_dim = 2048;
int num_layers = 1;
int hidden_dim = 128;
int q_num_heads = 8;
int kv_num_heads = 1; // GQA: q_num_heads > kv_num_heads, and divisible.
int seq_length = 128; // Short, for demonstration.
int vocab_size = 32000;
auto dtype = ONNX_NAMESPACE::TensorProto_DataType_FLOAT16;

// Set up model/graph
DefaultLoggingManager().SetDefaultLoggerSeverity(onnxruntime::logging::Severity::kVERBOSE);
onnxruntime::Model model("LLM_With_GQA", false, DefaultLoggingManager().DefaultLogger());
auto& graph = model.MainGraph();

Expand Down
1 change: 1 addition & 0 deletions onnxruntime/test/unittest_main/test_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ extern "C" void ortenv_setup() {
log_level = static_cast<OrtLoggingLevel>(*log_level_override);
}

log_level = ORT_LOGGING_LEVEL_VERBOSE;
ort_env.reset(new Ort::Env(&tpo, log_level, "Default"));

#if defined(TEST_MAIN_ENABLE_DYNAMIC_PLUGIN_EP_USAGE)
Expand Down
Loading