From 3df9761780771358a11dcac4bfe33e7b9f4371f2 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 17 Oct 2025 10:14:13 -0700 Subject: [PATCH 01/20] Add Windows pipelines --- .github/workflows/windows_tensorrt_rtx.yml | 258 +++++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100644 .github/workflows/windows_tensorrt_rtx.yml diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml new file mode 100644 index 0000000000000..a2cbc22586121 --- /dev/null +++ b/.github/workflows/windows_tensorrt_rtx.yml @@ -0,0 +1,258 @@ +name: Windows GPU TensorRT RTX CI Pipeline + +on: + push: + branches: + - main + - rel-* + pull_request: + branches: + - main + - rel-* + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }} + cancel-in-progress: true + +#TODO: enable --build_nodejs +jobs: + build: + name: Windows GPU TensorRT RTX CI Pipeline + runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-vs2022-latest"] + steps: + - uses: actions/checkout@v5 + with: + fetch-depth: 0 + submodules: 'none' + + - uses: actions/setup-python@v6 + with: + python-version: '3.12' + architecture: x64 + + - name: Locate vcvarsall and Setup Env + uses: ./.github/actions/locate-vcvarsall-and-setup-env + with: + architecture: x64 + + - name: Install python modules + run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt + working-directory: ${{ github.workspace }} + shell: cmd + + - name: Download CUDA SDK v12.8 + working-directory: ${{ runner.temp }} + run: | + azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v12.8" . + dir + shell: pwsh + + - name: Download TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9 + run: 'azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9" ${{ runner.temp }}' + shell: pwsh + + - name: Add CUDA to PATH + shell: powershell + run: | + Write-Host "Adding CUDA to PATH" + Write-Host "CUDA Path: $env:RUNNER_TEMP\v12.8\bin" + Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\bin" + Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\extras\CUPTI\lib64" + Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9\lib" + + - uses: actions/setup-node@v5 + with: + node-version: '20.x' + + - uses: actions/setup-java@v5 + with: + distribution: 'temurin' + java-version: '17' + architecture: x64 + + - uses: actions/cache@v4 + id: onnx-node-tests-cache + with: + path: ${{ github.workspace }}/js/test/ + key: onnxnodetests-${{ hashFiles('js/scripts/prepare-onnx-node-tests.ts') }} + + - name: API Documentation Check and generate + run: | + set ORT_DOXY_SRC=${{ github.workspace }} + set ORT_DOXY_OUT=${{ runner.temp }}\build\RelWithDebInfo\RelWithDebInfo + mkdir %ORT_DOXY_SRC% + mkdir %ORT_DOXY_OUT% + "C:\Program Files\doxygen\bin\doxygen.exe" ${{ github.workspace }}\tools\ci_build\github\Doxyfile_csharp.cfg + working-directory: ${{ github.workspace }} + shell: cmd + + - uses: actions/setup-dotnet@v5 + env: + PROCESSOR_ARCHITECTURE: x64 + with: + dotnet-version: '8.x' + + - name: Use Nuget 6.x + uses: nuget/setup-nuget@v2 + with: + nuget-version: '6.x' + + - name: NuGet restore + run: nuget restore ${{ github.workspace }}\packages.config -ConfigFile ${{ github.workspace }}\NuGet.config -PackagesDirectory ${{ runner.temp }}\build\RelWithDebInfo + shell: cmd + + - name: Set OnnxRuntimeBuildDirectory + shell: pwsh + run: | + $buildDir = Join-Path ${{ runner.temp }} "build" + echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV + + - name: Build and Clean Binaries + working-directory: ${{ runner.temp }} + run: | + npm install -g typescript + if ($lastExitCode -ne 0) { + exit $lastExitCode + } + # Execute the build process + python ${{ github.workspace }}\tools\ci_build\build.py --config RelWithDebInfo --parallel --use_binskim_compliant_compile_flags --build_dir build --skip_submodule_sync --build_shared_lib --build --update --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_nv_tensorrt_rtx --tensorrt_rtx_home="${{ runner.temp }}\TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9" --cuda_home="${{ runner.temp }}\v12.8" --use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 + if ($lastExitCode -ne 0) { + exit $lastExitCode + } + + # Clean up the output directory before uploading artifacts + $outputDir = "${{ runner.temp }}\build\RelWithDebInfo" + Write-Host "Cleaning up files from $outputDir..." + + Remove-Item -Path "$outputDir\onnxruntime" -Recurse -Force -ErrorAction SilentlyContinue + Remove-Item -Path "$outputDir\pybind11" -Recurse -Force -ErrorAction SilentlyContinue + Remove-Item -Path "$outputDir\models" -Recurse -Force -ErrorAction SilentlyContinue + Remove-Item -Path "$outputDir\vcpkg_installed" -Recurse -Force -ErrorAction SilentlyContinue + Remove-Item -Path "$outputDir\_deps" -Recurse -Force -ErrorAction SilentlyContinue + Remove-Item -Path "$outputDir\CMakeCache.txt" -Force -ErrorAction SilentlyContinue + Remove-Item -Path "$outputDir\CMakeFiles" -Recurse -Force -ErrorAction SilentlyContinue + # Remove intermediate object files as in the original script + Remove-Item -Path $outputDir -Include "*.obj" -Recurse + shell: pwsh + + - name: Upload build artifacts + uses: actions/upload-artifact@v4 + with: + name: build-artifacts + path: ${{ runner.temp }}\build + env: + OrtPackageId: Microsoft.ML.OnnxRuntime.Gpu + DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true + setVcvars: true + ALLOW_RELEASED_ONNX_OPSET_ONLY: '0' + DocUpdateNeeded: false + ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0' + AZCOPY_AUTO_LOGIN_TYPE: MSI + AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4 + + test: + name: Windows GPU TensorRT RTX CI Pipeline Test Job + needs: build + timeout-minutes: 300 + runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-Win2022-GPU-A10"] + steps: + - uses: actions/checkout@v5 + with: + fetch-depth: 0 + submodules: 'none' + + - name: Download build artifacts + uses: actions/download-artifact@v5 + with: + name: build-artifacts + path: ${{ runner.temp }}\build + + - uses: actions/setup-python@v6 + with: + python-version: '3.12' + architecture: x64 + + - uses: actions/setup-node@v5 + with: + node-version: '20.x' + + - uses: actions/setup-java@v5 + with: + distribution: 'temurin' + java-version: '17' + architecture: x64 + + - name: Locate vcvarsall and Setup Env + uses: ./.github/actions/locate-vcvarsall-and-setup-env + with: + architecture: x64 + + - name: Install python modules + run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt + working-directory: ${{ github.workspace }} + shell: cmd + + - name: Download CUDA SDK v12.8 + working-directory: ${{ runner.temp }} + run: | + azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v12.8" . + dir + shell: pwsh + + - name: Download TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9 + run: 'azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9" ${{ runner.temp }}' + shell: pwsh + + - name: Add CUDA to PATH + shell: powershell + run: | + Write-Host "Adding CUDA to PATH" + Write-Host "CUDA Path: $env:RUNNER_TEMP\v12.8\bin" + Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\bin" + Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\extras\CUPTI\lib64" + Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9\lib" + + - name: Set OnnxRuntimeBuildDirectory + shell: pwsh + run: | + $buildDir = Join-Path ${{ runner.temp }} "build" + echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV + + - name: Install ONNX Runtime Wheel + uses: ./.github/actions/install-onnxruntime-wheel + with: + whl-directory: ${{ runner.temp }}\build\RelWithDebInfo\RelWithDebInfo\dist + + - name: Run Tests + working-directory: ${{ runner.temp }} + run: | + npm install -g typescript + if ($lastExitCode -ne 0) { + exit $lastExitCode + } + + python.exe ${{ github.workspace }}\tools\python\update_ctest_path.py "${{ runner.temp }}\build\RelWithDebInfo\CTestTestfile.cmake" "${{ runner.temp }}\build\RelWithDebInfo" + if ($lastExitCode -ne 0) { + exit $lastExitCode + } + + python ${{ github.workspace }}\tools\ci_build\build.py --config RelWithDebInfo --parallel --use_binskim_compliant_compile_flags --build_dir build --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_nv_tensorrt_rtx --tensorrt_rtx_home="${{ runner.temp }}\TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9" --cuda_home="${{ runner.temp }}\v12.8" --use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 + if ($lastExitCode -ne 0) { + exit $lastExitCode + } + shell: pwsh + + - name: Validate C# native delegates + run: python tools\ValidateNativeDelegateAttributes.py + working-directory: ${{ github.workspace }}\csharp + shell: cmd + env: + OrtPackageId: Microsoft.ML.OnnxRuntime.Gpu + DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true + setVcvars: true + ALLOW_RELEASED_ONNX_OPSET_ONLY: '0' + DocUpdateNeeded: false + ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0' + AZCOPY_AUTO_LOGIN_TYPE: MSI + AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4 From a01659dd91a44f9a84764798d8158f1c658b842e Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 17 Oct 2025 13:43:38 -0700 Subject: [PATCH 02/20] set CMAKE_CUDA_COMPILTER --- cmake/onnxruntime_providers_nv.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/onnxruntime_providers_nv.cmake b/cmake/onnxruntime_providers_nv.cmake index e59463b6b91f1..26b895656237d 100644 --- a/cmake/onnxruntime_providers_nv.cmake +++ b/cmake/onnxruntime_providers_nv.cmake @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # Licensed under the MIT License. find_package(CUDAToolkit REQUIRED 12.8) + set(CMAKE_CUDA_COMPILER "${CUDAToolkit_ROOT}/bin/nvcc.exe") enable_language(CUDA) if(onnxruntime_DISABLE_CONTRIB_OPS) message( FATAL_ERROR "To compile TensorRT execution provider contrib ops have to be enabled to dump an engine using com.microsoft:EPContext node." ) From c2246aba928a0b2750190d2fa3e94c5c0d4a6a2b Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 17 Oct 2025 14:20:20 -0700 Subject: [PATCH 03/20] Use to locate nvcc --- cmake/onnxruntime_providers_nv.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/onnxruntime_providers_nv.cmake b/cmake/onnxruntime_providers_nv.cmake index 26b895656237d..b0aa4b129685d 100644 --- a/cmake/onnxruntime_providers_nv.cmake +++ b/cmake/onnxruntime_providers_nv.cmake @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # Licensed under the MIT License. find_package(CUDAToolkit REQUIRED 12.8) - set(CMAKE_CUDA_COMPILER "${CUDAToolkit_ROOT}/bin/nvcc.exe") + set(CMAKE_CUDA_COMPILER "${CUDAToolkit_BIN_DIR}/nvcc.exe") enable_language(CUDA) if(onnxruntime_DISABLE_CONTRIB_OPS) message( FATAL_ERROR "To compile TensorRT execution provider contrib ops have to be enabled to dump an engine using com.microsoft:EPContext node." ) From 904082842abb027c951dc30fa125e7311c2aea35 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 17 Oct 2025 14:39:36 -0700 Subject: [PATCH 04/20] update --- cmake/onnxruntime_providers_nv.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/onnxruntime_providers_nv.cmake b/cmake/onnxruntime_providers_nv.cmake index b0aa4b129685d..66cf6d33dfd1f 100644 --- a/cmake/onnxruntime_providers_nv.cmake +++ b/cmake/onnxruntime_providers_nv.cmake @@ -2,7 +2,8 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # Licensed under the MIT License. find_package(CUDAToolkit REQUIRED 12.8) - set(CMAKE_CUDA_COMPILER "${CUDAToolkit_BIN_DIR}/nvcc.exe") + set(CMAKE_CUDA_COMPILER "${CUDAToolkit_BIN_DIR}/nvcc${CMAKE_EXECUTABLE_SUFFIX}" CACHE FILEPATH "CUDA compiler" FORCE) + message(STATUS "NVCC path: ${CMAKE_CUDA_COMPILER}") enable_language(CUDA) if(onnxruntime_DISABLE_CONTRIB_OPS) message( FATAL_ERROR "To compile TensorRT execution provider contrib ops have to be enabled to dump an engine using com.microsoft:EPContext node." ) From 16334416e3c2cf3d15f7338a5b302e51c06ad9d3 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 17 Oct 2025 14:53:32 -0700 Subject: [PATCH 05/20] update --- cmake/onnxruntime_providers_nv.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/onnxruntime_providers_nv.cmake b/cmake/onnxruntime_providers_nv.cmake index 66cf6d33dfd1f..697d8930ccba7 100644 --- a/cmake/onnxruntime_providers_nv.cmake +++ b/cmake/onnxruntime_providers_nv.cmake @@ -2,9 +2,9 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # Licensed under the MIT License. find_package(CUDAToolkit REQUIRED 12.8) - set(CMAKE_CUDA_COMPILER "${CUDAToolkit_BIN_DIR}/nvcc${CMAKE_EXECUTABLE_SUFFIX}" CACHE FILEPATH "CUDA compiler" FORCE) - message(STATUS "NVCC path: ${CMAKE_CUDA_COMPILER}") - enable_language(CUDA) + #set(CMAKE_CUDA_COMPILER "${CUDAToolkit_BIN_DIR}/nvcc${CMAKE_EXECUTABLE_SUFFIX}" CACHE FILEPATH "CUDA compiler" FORCE) + #message(STATUS "NVCC path: ${CMAKE_CUDA_COMPILER}") + #enable_language(CUDA) if(onnxruntime_DISABLE_CONTRIB_OPS) message( FATAL_ERROR "To compile TensorRT execution provider contrib ops have to be enabled to dump an engine using com.microsoft:EPContext node." ) endif() From 30a38524e61983ba32baea49183dd05bdb440114 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 17 Oct 2025 15:52:50 -0700 Subject: [PATCH 06/20] update --- .github/workflows/windows_tensorrt_rtx.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml index a2cbc22586121..da14280ae7c23 100644 --- a/.github/workflows/windows_tensorrt_rtx.yml +++ b/.github/workflows/windows_tensorrt_rtx.yml @@ -116,7 +116,7 @@ jobs: exit $lastExitCode } # Execute the build process - python ${{ github.workspace }}\tools\ci_build\build.py --config RelWithDebInfo --parallel --use_binskim_compliant_compile_flags --build_dir build --skip_submodule_sync --build_shared_lib --build --update --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_nv_tensorrt_rtx --tensorrt_rtx_home="${{ runner.temp }}\TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9" --cuda_home="${{ runner.temp }}\v12.8" --use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 + python ${{ github.workspace }}\tools\ci_build\build.py --config RelWithDebInfo --parallel --use_binskim_compliant_compile_flags --build_dir build --skip_submodule_sync --build_shared_lib --build --update --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_cuda --use_nv_tensorrt_rtx --tensorrt_rtx_home="${{ runner.temp }}\TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9" --cuda_home="${{ runner.temp }}\v12.8" --use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 if ($lastExitCode -ne 0) { exit $lastExitCode } From a31d43194caed64fbc16103e945c4e4321f703da Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Fri, 17 Oct 2025 23:07:46 -0700 Subject: [PATCH 07/20] comment out enable_language(CUDA) --- cmake/onnxruntime_providers_nv.cmake | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cmake/onnxruntime_providers_nv.cmake b/cmake/onnxruntime_providers_nv.cmake index 697d8930ccba7..688c1513ee325 100644 --- a/cmake/onnxruntime_providers_nv.cmake +++ b/cmake/onnxruntime_providers_nv.cmake @@ -2,8 +2,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # Licensed under the MIT License. find_package(CUDAToolkit REQUIRED 12.8) - #set(CMAKE_CUDA_COMPILER "${CUDAToolkit_BIN_DIR}/nvcc${CMAKE_EXECUTABLE_SUFFIX}" CACHE FILEPATH "CUDA compiler" FORCE) - #message(STATUS "NVCC path: ${CMAKE_CUDA_COMPILER}") + # No need to do this here if using onnxruntime build script, because "enable_language(CUDA)" already being called in CMakeLists.txt if CUDA EP is enabled. #enable_language(CUDA) if(onnxruntime_DISABLE_CONTRIB_OPS) message( FATAL_ERROR "To compile TensorRT execution provider contrib ops have to be enabled to dump an engine using com.microsoft:EPContext node." ) From 0749aa790c80f729dd0075fce7e0b2158e001ca5 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Tue, 28 Oct 2025 13:45:42 -0700 Subject: [PATCH 08/20] enable verbose log to test --- .github/workflows/windows_tensorrt_rtx.yml | 1 + .../test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml index da14280ae7c23..bc26c3dbe1676 100644 --- a/.github/workflows/windows_tensorrt_rtx.yml +++ b/.github/workflows/windows_tensorrt_rtx.yml @@ -150,6 +150,7 @@ jobs: ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0' AZCOPY_AUTO_LOGIN_TYPE: MSI AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4 + ORT_UNIT_TEST_MAIN_LOG_LEVEL: 0 test: name: Windows GPU TensorRT RTX CI Pipeline Test Job diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc index 3a91fc1ba09bb..e99fd6371f3ba 100644 --- a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc +++ b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc @@ -285,6 +285,7 @@ void CreateLargeLLMModel(const PathString& model_path, const PathString& externa auto dtype = ONNX_NAMESPACE::TensorProto_DataType_FLOAT16; // Set up model/graph + DefaultLoggingManager().SetDefaultLoggerSeverity(onnxruntime::logging::Severity::kVERBOSE); onnxruntime::Model model("LLM_With_GQA", false, DefaultLoggingManager().DefaultLogger()); auto& graph = model.MainGraph(); From 730f76d299fc69394dfd3d5f4fa43fb16c88a8ca Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Tue, 28 Oct 2025 17:40:02 -0700 Subject: [PATCH 09/20] enable verbose log --- onnxruntime/test/unittest_main/test_main.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/onnxruntime/test/unittest_main/test_main.cc b/onnxruntime/test/unittest_main/test_main.cc index 117a26d48efe9..64382cbcf4e78 100644 --- a/onnxruntime/test/unittest_main/test_main.cc +++ b/onnxruntime/test/unittest_main/test_main.cc @@ -71,6 +71,7 @@ extern "C" void ortenv_setup() { log_level = static_cast(*log_level_override); } + log_level = ORT_LOGGING_LEVEL_VERBOSE; ort_env.reset(new Ort::Env(&tpo, log_level, "Default")); #if defined(TEST_MAIN_ENABLE_DYNAMIC_PLUGIN_EP_USAGE) From 2ad8459d2b9b884c4eff2bf0acc0fc4e912d9900 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 3 Nov 2025 11:40:40 -0800 Subject: [PATCH 10/20] add log --- .github/workflows/windows_tensorrt_rtx.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml index bc26c3dbe1676..be62eae8c928b 100644 --- a/.github/workflows/windows_tensorrt_rtx.yml +++ b/.github/workflows/windows_tensorrt_rtx.yml @@ -108,6 +108,11 @@ jobs: $buildDir = Join-Path ${{ runner.temp }} "build" echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV + - name: nvidia-smi + shell: powershell + run: | + nvidia-smi.exe + - name: Build and Clean Binaries working-directory: ${{ runner.temp }} run: | From 79c01c3e3ea5308156327df8a97ee0feb9829c45 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 3 Nov 2025 11:57:00 -0800 Subject: [PATCH 11/20] update command to run nvidia-smi --- .github/workflows/windows_tensorrt_rtx.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml index be62eae8c928b..ae10fa42deedd 100644 --- a/.github/workflows/windows_tensorrt_rtx.yml +++ b/.github/workflows/windows_tensorrt_rtx.yml @@ -111,7 +111,7 @@ jobs: - name: nvidia-smi shell: powershell run: | - nvidia-smi.exe + "C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" - name: Build and Clean Binaries working-directory: ${{ runner.temp }} From d54d1c05d50cdd10c3b1aa05c4e224e16a5cc3bd Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 3 Nov 2025 12:03:48 -0800 Subject: [PATCH 12/20] update vm --- .github/workflows/windows_tensorrt_rtx.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml index ae10fa42deedd..bdd2d3ae6b1ec 100644 --- a/.github/workflows/windows_tensorrt_rtx.yml +++ b/.github/workflows/windows_tensorrt_rtx.yml @@ -19,7 +19,7 @@ concurrency: jobs: build: name: Windows GPU TensorRT RTX CI Pipeline - runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-vs2022-latest"] + runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-Win2022-GPU-A10"] steps: - uses: actions/checkout@v5 with: @@ -111,7 +111,7 @@ jobs: - name: nvidia-smi shell: powershell run: | - "C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" + nvidia-smi.exe - name: Build and Clean Binaries working-directory: ${{ runner.temp }} From 8f16f9e31ae01b6c6b93ca4c5760661470bc377c Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 3 Nov 2025 12:22:05 -0800 Subject: [PATCH 13/20] Add log for host RAM --- .github/workflows/windows_tensorrt_rtx.yml | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml index bdd2d3ae6b1ec..96660b5577eac 100644 --- a/.github/workflows/windows_tensorrt_rtx.yml +++ b/.github/workflows/windows_tensorrt_rtx.yml @@ -36,6 +36,18 @@ jobs: with: architecture: x64 + - name: Show total physical memory + run: | + $ram = (Get-CimInstance Win32_ComputerSystem).TotalPhysicalMemory + $ramGB = [math]::Round($ram / 1GB, 2) + Write-Host "Total RAM: $ramGB GB" + shell: pwsh + + - name: nvidia-smi + shell: powershell + run: | + nvidia-smi.exe + - name: Install python modules run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt working-directory: ${{ github.workspace }} @@ -108,11 +120,6 @@ jobs: $buildDir = Join-Path ${{ runner.temp }} "build" echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV - - name: nvidia-smi - shell: powershell - run: | - nvidia-smi.exe - - name: Build and Clean Binaries working-directory: ${{ runner.temp }} run: | From 7fd6dc2879b1d3a338333db2240f61c27ca62767 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 3 Nov 2025 13:36:54 -0800 Subject: [PATCH 14/20] reduce LLM size in the test --- .github/workflows/windows_tensorrt_rtx.yml | 26 +++++++++---------- .../test_nv_trt_rtx_ep_util.cc | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml index 96660b5577eac..d636e762ab89c 100644 --- a/.github/workflows/windows_tensorrt_rtx.yml +++ b/.github/workflows/windows_tensorrt_rtx.yml @@ -19,7 +19,7 @@ concurrency: jobs: build: name: Windows GPU TensorRT RTX CI Pipeline - runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-Win2022-GPU-A10"] + runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-vs2022-latest"] steps: - uses: actions/checkout@v5 with: @@ -36,18 +36,6 @@ jobs: with: architecture: x64 - - name: Show total physical memory - run: | - $ram = (Get-CimInstance Win32_ComputerSystem).TotalPhysicalMemory - $ramGB = [math]::Round($ram / 1GB, 2) - Write-Host "Total RAM: $ramGB GB" - shell: pwsh - - - name: nvidia-smi - shell: powershell - run: | - nvidia-smi.exe - - name: Install python modules run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt working-directory: ${{ github.workspace }} @@ -237,6 +225,18 @@ jobs: with: whl-directory: ${{ runner.temp }}\build\RelWithDebInfo\RelWithDebInfo\dist + - name: Show total physical memory + run: | + $ram = (Get-CimInstance Win32_ComputerSystem).TotalPhysicalMemory + $ramGB = [math]::Round($ram / 1GB, 2) + Write-Host "Total RAM: $ramGB GB" + shell: pwsh + + - name: nvidia-smi + shell: powershell + run: | + nvidia-smi.exe + - name: Run Tests working-directory: ${{ runner.temp }} run: | diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc index e99fd6371f3ba..cefcf6b2e9ee6 100644 --- a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc +++ b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc @@ -276,7 +276,7 @@ onnxruntime::NodeArg& AddGroupQueryAttention( void CreateLargeLLMModel(const PathString& model_path, const PathString& external_data_path) { // Model parameters (example: 24 layers, 4096 hidden dim, 32 attention heads, 8 kv heads => GQA) int batch_size = 1; - int num_layers = 32; + int num_layers = 8; int hidden_dim = 2048; int q_num_heads = 8; int kv_num_heads = 1; // GQA: q_num_heads > kv_num_heads, and divisible. From 7b62db85c55bff9b4eb89783f702c7bf838ca01c Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 5 Nov 2025 08:58:59 -0800 Subject: [PATCH 15/20] make LLM 4 layers --- .../test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc index cefcf6b2e9ee6..21a151cb2b1c2 100644 --- a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc +++ b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc @@ -276,7 +276,7 @@ onnxruntime::NodeArg& AddGroupQueryAttention( void CreateLargeLLMModel(const PathString& model_path, const PathString& external_data_path) { // Model parameters (example: 24 layers, 4096 hidden dim, 32 attention heads, 8 kv heads => GQA) int batch_size = 1; - int num_layers = 8; + int num_layers = 4; int hidden_dim = 2048; int q_num_heads = 8; int kv_num_heads = 1; // GQA: q_num_heads > kv_num_heads, and divisible. From 87ccb4f581bb11f08f927303a3906ad05cea92c1 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 5 Nov 2025 16:19:52 -0800 Subject: [PATCH 16/20] Make LLM layer 1 --- .../test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc index 21a151cb2b1c2..7f7c67125366c 100644 --- a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc +++ b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc @@ -276,7 +276,7 @@ onnxruntime::NodeArg& AddGroupQueryAttention( void CreateLargeLLMModel(const PathString& model_path, const PathString& external_data_path) { // Model parameters (example: 24 layers, 4096 hidden dim, 32 attention heads, 8 kv heads => GQA) int batch_size = 1; - int num_layers = 4; + int num_layers = 1; int hidden_dim = 2048; int q_num_heads = 8; int kv_num_heads = 1; // GQA: q_num_heads > kv_num_heads, and divisible. From 1976d070596224d665e90a3899c93cd39147fb5f Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 10 Nov 2025 13:56:49 -0800 Subject: [PATCH 17/20] change hidden dim to 512 --- .../test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc index 7f7c67125366c..246e942880393 100644 --- a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc +++ b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc @@ -277,7 +277,7 @@ void CreateLargeLLMModel(const PathString& model_path, const PathString& externa // Model parameters (example: 24 layers, 4096 hidden dim, 32 attention heads, 8 kv heads => GQA) int batch_size = 1; int num_layers = 1; - int hidden_dim = 2048; + int hidden_dim = 512; int q_num_heads = 8; int kv_num_heads = 1; // GQA: q_num_heads > kv_num_heads, and divisible. int seq_length = 128; // Short, for demonstration. From 8267ee5c8bc5ff8b9b37e0c57007e0b97782430f Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Mon, 10 Nov 2025 16:29:02 -0800 Subject: [PATCH 18/20] change hidden dim to 128 --- .../test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc index 246e942880393..e15d272437824 100644 --- a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc +++ b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc @@ -277,7 +277,7 @@ void CreateLargeLLMModel(const PathString& model_path, const PathString& externa // Model parameters (example: 24 layers, 4096 hidden dim, 32 attention heads, 8 kv heads => GQA) int batch_size = 1; int num_layers = 1; - int hidden_dim = 512; + int hidden_dim = 128; int q_num_heads = 8; int kv_num_heads = 1; // GQA: q_num_heads > kv_num_heads, and divisible. int seq_length = 128; // Short, for demonstration. From cdb742415355d26a1ac50fbcfb7a7cbb9ce14657 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Tue, 11 Nov 2025 09:35:24 -0800 Subject: [PATCH 19/20] add gpu monitor in pipelines --- .github/workflows/windows_tensorrt_rtx.yml | 18 ++++++++++++++++++ .../nv_tensorrt_rtx/nv_execution_provider.cc | 4 ++++ 2 files changed, 22 insertions(+) diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml index d636e762ab89c..6e8807cf7f64a 100644 --- a/.github/workflows/windows_tensorrt_rtx.yml +++ b/.github/workflows/windows_tensorrt_rtx.yml @@ -108,6 +108,12 @@ jobs: $buildDir = Join-Path ${{ runner.temp }} "build" echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV + - name: nvidia-smi + shell: powershell + run: | + nvidia-smi.exe + nvidia-smi.exe --query-gpu=memory.total,memory.used,memory.free --format=csv + - name: Build and Clean Binaries working-directory: ${{ runner.temp }} run: | @@ -236,6 +242,7 @@ jobs: shell: powershell run: | nvidia-smi.exe + nvidia-smi.exe --query-gpu=memory.total,memory.used,memory.free --format=csv - name: Run Tests working-directory: ${{ runner.temp }} @@ -256,6 +263,17 @@ jobs: } shell: pwsh + - name: Check GPU memory usage and fail if >90% + shell: powershell + run: | + $csv = nvidia-smi --query-gpu=memory.used,memory.total --format=csv,noheader + $parts = $csv -split ',' + $used = [int]($parts[0] -replace ' MiB','') + $total = [int]($parts[1] -replace ' MiB','') + $percent = ($used / $total) * 100 + Write-Host "GPU memory used: $used MiB / $total MiB ($percent %)" + if ($percent -gt 90) { throw "GPU memory usage exceeded threshold ($percent%)" } + - name: Validate C# native delegates run: python tools\ValidateNativeDelegateAttributes.py working-directory: ${{ github.workspace }}\csharp diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc index 62210d65848d1..30dc1b02aad78 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc @@ -1736,7 +1736,9 @@ SubGraphCollection_t NvExecutionProvider::GetSupportedList(SubGraphCollection_t TensorrtLogger& trt_logger = GetTensorrtLogger(detailed_build_log_); auto trt_builder = GetBuilder(trt_logger); auto network_flags = 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED); + LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] After createNetworkV2()"; auto trt_network = std::unique_ptr(trt_builder->createNetworkV2(network_flags)); + LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] After createNetworkV2()"; bool is_model_supported = false; @@ -1755,8 +1757,10 @@ SubGraphCollection_t NvExecutionProvider::GetSupportedList(SubGraphCollection_t ORT_THROW("'nv_use_external_data_initializer' is only supported on TensorRT RTX 1.1.x.x and above."); #endif } else { + LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] Before supportsModelV2()"; is_model_supported = trt_parser->supportsModelV2(string_buf.data(), string_buf.size(), model_path_); } + LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] After supportsModelV2()"; // Note: Calling getNbSubgraphs or getSubgraphNodes before calling supportsModelV2 results in undefined behavior. auto num_subgraphs = trt_parser->getNbSubgraphs(); From e1201919bac72af128afa38056d5960ae30c9507 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Tue, 11 Nov 2025 09:51:26 -0800 Subject: [PATCH 20/20] update add gpu monitor in pipelines --- .github/workflows/windows_tensorrt_rtx.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml index 6e8807cf7f64a..81b6651bccbbf 100644 --- a/.github/workflows/windows_tensorrt_rtx.yml +++ b/.github/workflows/windows_tensorrt_rtx.yml @@ -108,12 +108,6 @@ jobs: $buildDir = Join-Path ${{ runner.temp }} "build" echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV - - name: nvidia-smi - shell: powershell - run: | - nvidia-smi.exe - nvidia-smi.exe --query-gpu=memory.total,memory.used,memory.free --format=csv - - name: Build and Clean Binaries working-directory: ${{ runner.temp }} run: |