From 3df9761780771358a11dcac4bfe33e7b9f4371f2 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 17 Oct 2025 10:14:13 -0700
Subject: [PATCH 01/20] Add Windows pipelines

---
 .github/workflows/windows_tensorrt_rtx.yml | 258 +++++++++++++++++++++
 1 file changed, 258 insertions(+)
 create mode 100644 .github/workflows/windows_tensorrt_rtx.yml

diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml
new file mode 100644
index 0000000000000..a2cbc22586121
--- /dev/null
+++ b/.github/workflows/windows_tensorrt_rtx.yml
@@ -0,0 +1,258 @@
+name: Windows GPU TensorRT RTX CI Pipeline
+
+on:
+  push:
+    branches:
+      - main
+      - rel-*
+  pull_request:
+    branches:
+      - main
+      - rel-*
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }}
+  cancel-in-progress: true
+
+#TODO: enable  --build_nodejs
+jobs:
+  build:
+    name: Windows GPU TensorRT RTX CI Pipeline
+    runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-vs2022-latest"]
+    steps:
+      - uses: actions/checkout@v5
+        with:
+          fetch-depth: 0
+          submodules: 'none'
+
+      - uses: actions/setup-python@v6
+        with:
+          python-version: '3.12'
+          architecture: x64
+
+      - name: Locate vcvarsall and Setup Env
+        uses: ./.github/actions/locate-vcvarsall-and-setup-env
+        with:
+          architecture: x64
+
+      - name: Install python modules
+        run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt
+        working-directory: ${{ github.workspace }}
+        shell: cmd
+
+      - name: Download CUDA SDK v12.8
+        working-directory: ${{ runner.temp }}
+        run: |
+          azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v12.8" .
+          dir
+        shell: pwsh
+
+      - name: Download TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9
+        run: 'azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9" ${{ runner.temp }}'
+        shell: pwsh
+
+      - name: Add CUDA to PATH
+        shell: powershell
+        run: |
+          Write-Host "Adding CUDA to PATH"
+          Write-Host "CUDA Path: $env:RUNNER_TEMP\v12.8\bin"
+          Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\bin"
+          Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\extras\CUPTI\lib64"
+          Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9\lib"
+
+      - uses: actions/setup-node@v5
+        with:
+          node-version: '20.x'
+
+      - uses: actions/setup-java@v5
+        with:
+          distribution: 'temurin'
+          java-version: '17'
+          architecture: x64
+
+      - uses: actions/cache@v4
+        id: onnx-node-tests-cache
+        with:
+          path: ${{ github.workspace }}/js/test/
+          key: onnxnodetests-${{ hashFiles('js/scripts/prepare-onnx-node-tests.ts') }}
+
+      - name: API Documentation Check and generate
+        run: |
+          set ORT_DOXY_SRC=${{ github.workspace }}
+          set ORT_DOXY_OUT=${{ runner.temp }}\build\RelWithDebInfo\RelWithDebInfo
+          mkdir %ORT_DOXY_SRC%
+          mkdir %ORT_DOXY_OUT%
+          "C:\Program Files\doxygen\bin\doxygen.exe" ${{ github.workspace }}\tools\ci_build\github\Doxyfile_csharp.cfg
+        working-directory: ${{ github.workspace }}
+        shell: cmd
+
+      - uses: actions/setup-dotnet@v5
+        env:
+          PROCESSOR_ARCHITECTURE: x64
+        with:
+          dotnet-version: '8.x'
+
+      - name: Use Nuget 6.x
+        uses: nuget/setup-nuget@v2
+        with:
+          nuget-version: '6.x'
+
+      - name: NuGet restore
+        run: nuget restore ${{ github.workspace }}\packages.config -ConfigFile ${{ github.workspace }}\NuGet.config -PackagesDirectory ${{ runner.temp }}\build\RelWithDebInfo
+        shell: cmd
+
+      - name: Set OnnxRuntimeBuildDirectory
+        shell: pwsh
+        run: |
+          $buildDir = Join-Path ${{ runner.temp }} "build"
+          echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV
+
+      - name: Build and Clean Binaries
+        working-directory: ${{ runner.temp }}
+        run: |
+          npm install -g typescript
+          if ($lastExitCode -ne 0) {
+            exit $lastExitCode
+          }
+          # Execute the build process
+          python ${{ github.workspace }}\tools\ci_build\build.py --config RelWithDebInfo --parallel --use_binskim_compliant_compile_flags             --build_dir build --skip_submodule_sync --build_shared_lib --build --update --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests             --use_nv_tensorrt_rtx --tensorrt_rtx_home="${{ runner.temp }}\TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9"             --cuda_home="${{ runner.temp }}\v12.8" --use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
+          if ($lastExitCode -ne 0) {
+            exit $lastExitCode
+          }
+
+          # Clean up the output directory before uploading artifacts
+          $outputDir = "${{ runner.temp }}\build\RelWithDebInfo"
+          Write-Host "Cleaning up files from $outputDir..."
+
+          Remove-Item -Path "$outputDir\onnxruntime" -Recurse -Force -ErrorAction SilentlyContinue
+          Remove-Item -Path "$outputDir\pybind11" -Recurse -Force -ErrorAction SilentlyContinue
+          Remove-Item -Path "$outputDir\models" -Recurse -Force -ErrorAction SilentlyContinue
+          Remove-Item -Path "$outputDir\vcpkg_installed" -Recurse -Force -ErrorAction SilentlyContinue
+          Remove-Item -Path "$outputDir\_deps" -Recurse -Force -ErrorAction SilentlyContinue
+          Remove-Item -Path "$outputDir\CMakeCache.txt" -Force -ErrorAction SilentlyContinue
+          Remove-Item -Path "$outputDir\CMakeFiles" -Recurse -Force -ErrorAction SilentlyContinue
+          # Remove intermediate object files as in the original script
+          Remove-Item -Path $outputDir -Include "*.obj" -Recurse
+        shell: pwsh
+
+      - name: Upload build artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: build-artifacts
+          path: ${{ runner.temp }}\build
+    env:
+      OrtPackageId: Microsoft.ML.OnnxRuntime.Gpu
+      DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
+      setVcvars: true
+      ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
+      DocUpdateNeeded: false
+      ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
+      AZCOPY_AUTO_LOGIN_TYPE: MSI
+      AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
+
+  test:
+    name: Windows GPU TensorRT RTX CI Pipeline Test Job
+    needs: build
+    timeout-minutes: 300
+    runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-Win2022-GPU-A10"]
+    steps:
+      - uses: actions/checkout@v5
+        with:
+          fetch-depth: 0
+          submodules: 'none'
+
+      - name: Download build artifacts
+        uses: actions/download-artifact@v5
+        with:
+          name: build-artifacts
+          path: ${{ runner.temp }}\build
+
+      - uses: actions/setup-python@v6
+        with:
+          python-version: '3.12'
+          architecture: x64
+
+      - uses: actions/setup-node@v5
+        with:
+          node-version: '20.x'
+
+      - uses: actions/setup-java@v5
+        with:
+          distribution: 'temurin'
+          java-version: '17'
+          architecture: x64
+
+      - name: Locate vcvarsall and Setup Env
+        uses: ./.github/actions/locate-vcvarsall-and-setup-env
+        with:
+          architecture: x64
+
+      - name: Install python modules
+        run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt
+        working-directory: ${{ github.workspace }}
+        shell: cmd
+
+      - name: Download CUDA SDK v12.8
+        working-directory: ${{ runner.temp }}
+        run: |
+          azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v12.8" .
+          dir
+        shell: pwsh
+
+      - name: Download TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9
+        run: 'azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9" ${{ runner.temp }}'
+        shell: pwsh
+
+      - name: Add CUDA to PATH
+        shell: powershell
+        run: |
+          Write-Host "Adding CUDA to PATH"
+          Write-Host "CUDA Path: $env:RUNNER_TEMP\v12.8\bin"
+          Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\bin"
+          Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v12.8\extras\CUPTI\lib64"
+          Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9\lib"
+
+      - name: Set OnnxRuntimeBuildDirectory
+        shell: pwsh
+        run: |
+          $buildDir = Join-Path ${{ runner.temp }} "build"
+          echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV
+
+      - name: Install ONNX Runtime Wheel
+        uses: ./.github/actions/install-onnxruntime-wheel
+        with:
+          whl-directory: ${{ runner.temp }}\build\RelWithDebInfo\RelWithDebInfo\dist
+
+      - name: Run Tests
+        working-directory: ${{ runner.temp }}
+        run: |
+          npm install -g typescript
+          if ($lastExitCode -ne 0) {
+            exit $lastExitCode
+          }
+
+          python.exe ${{ github.workspace }}\tools\python\update_ctest_path.py   "${{ runner.temp }}\build\RelWithDebInfo\CTestTestfile.cmake" "${{ runner.temp }}\build\RelWithDebInfo"
+          if ($lastExitCode -ne 0) {
+            exit $lastExitCode
+          }
+
+          python ${{ github.workspace }}\tools\ci_build\build.py --config RelWithDebInfo --parallel --use_binskim_compliant_compile_flags             --build_dir build --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests             --use_nv_tensorrt_rtx --tensorrt_rtx_home="${{ runner.temp }}\TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9"             --cuda_home="${{ runner.temp }}\v12.8" --use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
+          if ($lastExitCode -ne 0) {
+            exit $lastExitCode
+          }
+        shell: pwsh
+
+      - name: Validate C# native delegates
+        run: python tools\ValidateNativeDelegateAttributes.py
+        working-directory: ${{ github.workspace }}\csharp
+        shell: cmd
+    env:
+      OrtPackageId: Microsoft.ML.OnnxRuntime.Gpu
+      DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
+      setVcvars: true
+      ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
+      DocUpdateNeeded: false
+      ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
+      AZCOPY_AUTO_LOGIN_TYPE: MSI
+      AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4

From a01659dd91a44f9a84764798d8158f1c658b842e Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 17 Oct 2025 13:43:38 -0700
Subject: [PATCH 02/20] set CMAKE_CUDA_COMPILTER

---
 cmake/onnxruntime_providers_nv.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cmake/onnxruntime_providers_nv.cmake b/cmake/onnxruntime_providers_nv.cmake
index e59463b6b91f1..26b895656237d 100644
--- a/cmake/onnxruntime_providers_nv.cmake
+++ b/cmake/onnxruntime_providers_nv.cmake
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # Licensed under the MIT License.
   find_package(CUDAToolkit REQUIRED 12.8)
+  set(CMAKE_CUDA_COMPILER "${CUDAToolkit_ROOT}/bin/nvcc.exe")
   enable_language(CUDA)
   if(onnxruntime_DISABLE_CONTRIB_OPS)
     message( FATAL_ERROR "To compile TensorRT execution provider contrib ops have to be enabled to dump an engine using com.microsoft:EPContext node." )

From c2246aba928a0b2750190d2fa3e94c5c0d4a6a2b Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 17 Oct 2025 14:20:20 -0700
Subject: [PATCH 03/20] Use  to locate nvcc

---
 cmake/onnxruntime_providers_nv.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/onnxruntime_providers_nv.cmake b/cmake/onnxruntime_providers_nv.cmake
index 26b895656237d..b0aa4b129685d 100644
--- a/cmake/onnxruntime_providers_nv.cmake
+++ b/cmake/onnxruntime_providers_nv.cmake
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # Licensed under the MIT License.
   find_package(CUDAToolkit REQUIRED 12.8)
-  set(CMAKE_CUDA_COMPILER "${CUDAToolkit_ROOT}/bin/nvcc.exe")
+  set(CMAKE_CUDA_COMPILER "${CUDAToolkit_BIN_DIR}/nvcc.exe")
   enable_language(CUDA)
   if(onnxruntime_DISABLE_CONTRIB_OPS)
     message( FATAL_ERROR "To compile TensorRT execution provider contrib ops have to be enabled to dump an engine using com.microsoft:EPContext node." )

From 904082842abb027c951dc30fa125e7311c2aea35 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 17 Oct 2025 14:39:36 -0700
Subject: [PATCH 04/20] update

---
 cmake/onnxruntime_providers_nv.cmake | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cmake/onnxruntime_providers_nv.cmake b/cmake/onnxruntime_providers_nv.cmake
index b0aa4b129685d..66cf6d33dfd1f 100644
--- a/cmake/onnxruntime_providers_nv.cmake
+++ b/cmake/onnxruntime_providers_nv.cmake
@@ -2,7 +2,8 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # Licensed under the MIT License.
   find_package(CUDAToolkit REQUIRED 12.8)
-  set(CMAKE_CUDA_COMPILER "${CUDAToolkit_BIN_DIR}/nvcc.exe")
+  set(CMAKE_CUDA_COMPILER "${CUDAToolkit_BIN_DIR}/nvcc${CMAKE_EXECUTABLE_SUFFIX}" CACHE FILEPATH "CUDA compiler" FORCE)
+  message(STATUS "NVCC path: ${CMAKE_CUDA_COMPILER}")
   enable_language(CUDA)
   if(onnxruntime_DISABLE_CONTRIB_OPS)
     message( FATAL_ERROR "To compile TensorRT execution provider contrib ops have to be enabled to dump an engine using com.microsoft:EPContext node." )

From 16334416e3c2cf3d15f7338a5b302e51c06ad9d3 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 17 Oct 2025 14:53:32 -0700
Subject: [PATCH 05/20] update

---
 cmake/onnxruntime_providers_nv.cmake | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmake/onnxruntime_providers_nv.cmake b/cmake/onnxruntime_providers_nv.cmake
index 66cf6d33dfd1f..697d8930ccba7 100644
--- a/cmake/onnxruntime_providers_nv.cmake
+++ b/cmake/onnxruntime_providers_nv.cmake
@@ -2,9 +2,9 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # Licensed under the MIT License.
   find_package(CUDAToolkit REQUIRED 12.8)
-  set(CMAKE_CUDA_COMPILER "${CUDAToolkit_BIN_DIR}/nvcc${CMAKE_EXECUTABLE_SUFFIX}" CACHE FILEPATH "CUDA compiler" FORCE)
-  message(STATUS "NVCC path: ${CMAKE_CUDA_COMPILER}")
-  enable_language(CUDA)
+  #set(CMAKE_CUDA_COMPILER "${CUDAToolkit_BIN_DIR}/nvcc${CMAKE_EXECUTABLE_SUFFIX}" CACHE FILEPATH "CUDA compiler" FORCE)
+  #message(STATUS "NVCC path: ${CMAKE_CUDA_COMPILER}")
+  #enable_language(CUDA)
   if(onnxruntime_DISABLE_CONTRIB_OPS)
     message( FATAL_ERROR "To compile TensorRT execution provider contrib ops have to be enabled to dump an engine using com.microsoft:EPContext node." )
   endif()

From 30a38524e61983ba32baea49183dd05bdb440114 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 17 Oct 2025 15:52:50 -0700
Subject: [PATCH 06/20] update

---
 .github/workflows/windows_tensorrt_rtx.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml
index a2cbc22586121..da14280ae7c23 100644
--- a/.github/workflows/windows_tensorrt_rtx.yml
+++ b/.github/workflows/windows_tensorrt_rtx.yml
@@ -116,7 +116,7 @@ jobs:
             exit $lastExitCode
           }
           # Execute the build process
-          python ${{ github.workspace }}\tools\ci_build\build.py --config RelWithDebInfo --parallel --use_binskim_compliant_compile_flags             --build_dir build --skip_submodule_sync --build_shared_lib --build --update --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests             --use_nv_tensorrt_rtx --tensorrt_rtx_home="${{ runner.temp }}\TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9"             --cuda_home="${{ runner.temp }}\v12.8" --use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
+          python ${{ github.workspace }}\tools\ci_build\build.py --config RelWithDebInfo --parallel --use_binskim_compliant_compile_flags             --build_dir build --skip_submodule_sync --build_shared_lib --build --update --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests             --use_cuda --use_nv_tensorrt_rtx --tensorrt_rtx_home="${{ runner.temp }}\TensorRT-RTX-1.1.1.26.Windows.win10.cuda-12.9"             --cuda_home="${{ runner.temp }}\v12.8" --use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
           if ($lastExitCode -ne 0) {
             exit $lastExitCode
           }

From a31d43194caed64fbc16103e945c4e4321f703da Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 17 Oct 2025 23:07:46 -0700
Subject: [PATCH 07/20] comment out enable_language(CUDA)

---
 cmake/onnxruntime_providers_nv.cmake | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cmake/onnxruntime_providers_nv.cmake b/cmake/onnxruntime_providers_nv.cmake
index 697d8930ccba7..688c1513ee325 100644
--- a/cmake/onnxruntime_providers_nv.cmake
+++ b/cmake/onnxruntime_providers_nv.cmake
@@ -2,8 +2,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # Licensed under the MIT License.
   find_package(CUDAToolkit REQUIRED 12.8)
-  #set(CMAKE_CUDA_COMPILER "${CUDAToolkit_BIN_DIR}/nvcc${CMAKE_EXECUTABLE_SUFFIX}" CACHE FILEPATH "CUDA compiler" FORCE)
-  #message(STATUS "NVCC path: ${CMAKE_CUDA_COMPILER}")
+  # No need to do this here if using onnxruntime build script, because "enable_language(CUDA)" already being called in CMakeLists.txt if CUDA EP is enabled.
   #enable_language(CUDA)
   if(onnxruntime_DISABLE_CONTRIB_OPS)
     message( FATAL_ERROR "To compile TensorRT execution provider contrib ops have to be enabled to dump an engine using com.microsoft:EPContext node." )

From 0749aa790c80f729dd0075fce7e0b2158e001ca5 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Tue, 28 Oct 2025 13:45:42 -0700
Subject: [PATCH 08/20] enable verbose log to test

---
 .github/workflows/windows_tensorrt_rtx.yml                       | 1 +
 .../test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml
index da14280ae7c23..bc26c3dbe1676 100644
--- a/.github/workflows/windows_tensorrt_rtx.yml
+++ b/.github/workflows/windows_tensorrt_rtx.yml
@@ -150,6 +150,7 @@ jobs:
       ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
       AZCOPY_AUTO_LOGIN_TYPE: MSI
       AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
+      ORT_UNIT_TEST_MAIN_LOG_LEVEL: 0
 
   test:
     name: Windows GPU TensorRT RTX CI Pipeline Test Job
diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
index 3a91fc1ba09bb..e99fd6371f3ba 100644
--- a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
+++ b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
@@ -285,6 +285,7 @@ void CreateLargeLLMModel(const PathString& model_path, const PathString& externa
   auto dtype = ONNX_NAMESPACE::TensorProto_DataType_FLOAT16;
 
   // Set up model/graph
+  DefaultLoggingManager().SetDefaultLoggerSeverity(onnxruntime::logging::Severity::kVERBOSE);
   onnxruntime::Model model("LLM_With_GQA", false, DefaultLoggingManager().DefaultLogger());
   auto& graph = model.MainGraph();
 

From 730f76d299fc69394dfd3d5f4fa43fb16c88a8ca Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Tue, 28 Oct 2025 17:40:02 -0700
Subject: [PATCH 09/20] enable verbose log

---
 onnxruntime/test/unittest_main/test_main.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/onnxruntime/test/unittest_main/test_main.cc b/onnxruntime/test/unittest_main/test_main.cc
index 117a26d48efe9..64382cbcf4e78 100644
--- a/onnxruntime/test/unittest_main/test_main.cc
+++ b/onnxruntime/test/unittest_main/test_main.cc
@@ -71,6 +71,7 @@ extern "C" void ortenv_setup() {
       log_level = static_cast<OrtLoggingLevel>(*log_level_override);
     }
 
+    log_level = ORT_LOGGING_LEVEL_VERBOSE;
     ort_env.reset(new Ort::Env(&tpo, log_level, "Default"));
 
 #if defined(TEST_MAIN_ENABLE_DYNAMIC_PLUGIN_EP_USAGE)

From 2ad8459d2b9b884c4eff2bf0acc0fc4e912d9900 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 3 Nov 2025 11:40:40 -0800
Subject: [PATCH 10/20] add log

---
 .github/workflows/windows_tensorrt_rtx.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml
index bc26c3dbe1676..be62eae8c928b 100644
--- a/.github/workflows/windows_tensorrt_rtx.yml
+++ b/.github/workflows/windows_tensorrt_rtx.yml
@@ -108,6 +108,11 @@ jobs:
           $buildDir = Join-Path ${{ runner.temp }} "build"
           echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV
 
+      - name: nvidia-smi
+        shell: powershell
+        run: |
+          nvidia-smi.exe
+
       - name: Build and Clean Binaries
         working-directory: ${{ runner.temp }}
         run: |

From 79c01c3e3ea5308156327df8a97ee0feb9829c45 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 3 Nov 2025 11:57:00 -0800
Subject: [PATCH 11/20] update command to run nvidia-smi

---
 .github/workflows/windows_tensorrt_rtx.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml
index be62eae8c928b..ae10fa42deedd 100644
--- a/.github/workflows/windows_tensorrt_rtx.yml
+++ b/.github/workflows/windows_tensorrt_rtx.yml
@@ -111,7 +111,7 @@ jobs:
       - name: nvidia-smi
         shell: powershell
         run: |
-          nvidia-smi.exe
+          "C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe"
 
       - name: Build and Clean Binaries
         working-directory: ${{ runner.temp }}

From d54d1c05d50cdd10c3b1aa05c4e224e16a5cc3bd Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 3 Nov 2025 12:03:48 -0800
Subject: [PATCH 12/20] update vm

---
 .github/workflows/windows_tensorrt_rtx.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml
index ae10fa42deedd..bdd2d3ae6b1ec 100644
--- a/.github/workflows/windows_tensorrt_rtx.yml
+++ b/.github/workflows/windows_tensorrt_rtx.yml
@@ -19,7 +19,7 @@ concurrency:
 jobs:
   build:
     name: Windows GPU TensorRT RTX CI Pipeline
-    runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-vs2022-latest"]
+    runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-Win2022-GPU-A10"]
     steps:
       - uses: actions/checkout@v5
         with:
@@ -111,7 +111,7 @@ jobs:
       - name: nvidia-smi
         shell: powershell
         run: |
-          "C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe"
+          nvidia-smi.exe
 
       - name: Build and Clean Binaries
         working-directory: ${{ runner.temp }}

From 8f16f9e31ae01b6c6b93ca4c5760661470bc377c Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 3 Nov 2025 12:22:05 -0800
Subject: [PATCH 13/20] Add log for host RAM

---
 .github/workflows/windows_tensorrt_rtx.yml | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml
index bdd2d3ae6b1ec..96660b5577eac 100644
--- a/.github/workflows/windows_tensorrt_rtx.yml
+++ b/.github/workflows/windows_tensorrt_rtx.yml
@@ -36,6 +36,18 @@ jobs:
         with:
           architecture: x64
 
+      - name: Show total physical memory
+        run: |
+          $ram = (Get-CimInstance Win32_ComputerSystem).TotalPhysicalMemory
+          $ramGB = [math]::Round($ram / 1GB, 2)
+          Write-Host "Total RAM: $ramGB GB"
+        shell: pwsh
+
+      - name: nvidia-smi
+        shell: powershell
+        run: |
+          nvidia-smi.exe
+
       - name: Install python modules
         run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt
         working-directory: ${{ github.workspace }}
@@ -108,11 +120,6 @@ jobs:
           $buildDir = Join-Path ${{ runner.temp }} "build"
           echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV
 
-      - name: nvidia-smi
-        shell: powershell
-        run: |
-          nvidia-smi.exe
-
       - name: Build and Clean Binaries
         working-directory: ${{ runner.temp }}
         run: |

From 7fd6dc2879b1d3a338333db2240f61c27ca62767 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 3 Nov 2025 13:36:54 -0800
Subject: [PATCH 14/20] reduce LLM size in the test

---
 .github/workflows/windows_tensorrt_rtx.yml    | 26 +++++++++----------
 .../test_nv_trt_rtx_ep_util.cc                |  2 +-
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml
index 96660b5577eac..d636e762ab89c 100644
--- a/.github/workflows/windows_tensorrt_rtx.yml
+++ b/.github/workflows/windows_tensorrt_rtx.yml
@@ -19,7 +19,7 @@ concurrency:
 jobs:
   build:
     name: Windows GPU TensorRT RTX CI Pipeline
-    runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-Win2022-GPU-A10"]
+    runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-vs2022-latest"]
     steps:
       - uses: actions/checkout@v5
         with:
@@ -36,18 +36,6 @@ jobs:
         with:
           architecture: x64
 
-      - name: Show total physical memory
-        run: |
-          $ram = (Get-CimInstance Win32_ComputerSystem).TotalPhysicalMemory
-          $ramGB = [math]::Round($ram / 1GB, 2)
-          Write-Host "Total RAM: $ramGB GB"
-        shell: pwsh
-
-      - name: nvidia-smi
-        shell: powershell
-        run: |
-          nvidia-smi.exe
-
       - name: Install python modules
         run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt
         working-directory: ${{ github.workspace }}
@@ -237,6 +225,18 @@ jobs:
         with:
           whl-directory: ${{ runner.temp }}\build\RelWithDebInfo\RelWithDebInfo\dist
 
+      - name: Show total physical memory
+        run: |
+          $ram = (Get-CimInstance Win32_ComputerSystem).TotalPhysicalMemory
+          $ramGB = [math]::Round($ram / 1GB, 2)
+          Write-Host "Total RAM: $ramGB GB"
+        shell: pwsh
+
+      - name: nvidia-smi
+        shell: powershell
+        run: |
+          nvidia-smi.exe
+
       - name: Run Tests
         working-directory: ${{ runner.temp }}
         run: |
diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
index e99fd6371f3ba..cefcf6b2e9ee6 100644
--- a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
+++ b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
@@ -276,7 +276,7 @@ onnxruntime::NodeArg& AddGroupQueryAttention(
 void CreateLargeLLMModel(const PathString& model_path, const PathString& external_data_path) {
   // Model parameters (example: 24 layers, 4096 hidden dim, 32 attention heads, 8 kv heads => GQA)
   int batch_size = 1;
-  int num_layers = 32;
+  int num_layers = 8;
   int hidden_dim = 2048;
   int q_num_heads = 8;
   int kv_num_heads = 1;  // GQA: q_num_heads > kv_num_heads, and divisible.

From 7b62db85c55bff9b4eb89783f702c7bf838ca01c Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Wed, 5 Nov 2025 08:58:59 -0800
Subject: [PATCH 15/20] make LLM 4 layers

---
 .../test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
index cefcf6b2e9ee6..21a151cb2b1c2 100644
--- a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
+++ b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
@@ -276,7 +276,7 @@ onnxruntime::NodeArg& AddGroupQueryAttention(
 void CreateLargeLLMModel(const PathString& model_path, const PathString& external_data_path) {
   // Model parameters (example: 24 layers, 4096 hidden dim, 32 attention heads, 8 kv heads => GQA)
   int batch_size = 1;
-  int num_layers = 8;
+  int num_layers = 4;
   int hidden_dim = 2048;
   int q_num_heads = 8;
   int kv_num_heads = 1;  // GQA: q_num_heads > kv_num_heads, and divisible.

From 87ccb4f581bb11f08f927303a3906ad05cea92c1 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Wed, 5 Nov 2025 16:19:52 -0800
Subject: [PATCH 16/20] Make LLM layer 1

---
 .../test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
index 21a151cb2b1c2..7f7c67125366c 100644
--- a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
+++ b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
@@ -276,7 +276,7 @@ onnxruntime::NodeArg& AddGroupQueryAttention(
 void CreateLargeLLMModel(const PathString& model_path, const PathString& external_data_path) {
   // Model parameters (example: 24 layers, 4096 hidden dim, 32 attention heads, 8 kv heads => GQA)
   int batch_size = 1;
-  int num_layers = 4;
+  int num_layers = 1;
   int hidden_dim = 2048;
   int q_num_heads = 8;
   int kv_num_heads = 1;  // GQA: q_num_heads > kv_num_heads, and divisible.

From 1976d070596224d665e90a3899c93cd39147fb5f Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 10 Nov 2025 13:56:49 -0800
Subject: [PATCH 17/20] change hidden dim to 512

---
 .../test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
index 7f7c67125366c..246e942880393 100644
--- a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
+++ b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
@@ -277,7 +277,7 @@ void CreateLargeLLMModel(const PathString& model_path, const PathString& externa
   // Model parameters (example: 24 layers, 4096 hidden dim, 32 attention heads, 8 kv heads => GQA)
   int batch_size = 1;
   int num_layers = 1;
-  int hidden_dim = 2048;
+  int hidden_dim = 512;
   int q_num_heads = 8;
   int kv_num_heads = 1;  // GQA: q_num_heads > kv_num_heads, and divisible.
   int seq_length = 128;  // Short, for demonstration.

From 8267ee5c8bc5ff8b9b37e0c57007e0b97782430f Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 10 Nov 2025 16:29:02 -0800
Subject: [PATCH 18/20] change hidden dim to 128

---
 .../test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
index 246e942880393..e15d272437824 100644
--- a/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
+++ b/onnxruntime/test/providers/nv_tensorrt_rtx/test_nv_trt_rtx_ep_util.cc
@@ -277,7 +277,7 @@ void CreateLargeLLMModel(const PathString& model_path, const PathString& externa
   // Model parameters (example: 24 layers, 4096 hidden dim, 32 attention heads, 8 kv heads => GQA)
   int batch_size = 1;
   int num_layers = 1;
-  int hidden_dim = 512;
+  int hidden_dim = 128;
   int q_num_heads = 8;
   int kv_num_heads = 1;  // GQA: q_num_heads > kv_num_heads, and divisible.
   int seq_length = 128;  // Short, for demonstration.

From cdb742415355d26a1ac50fbcfb7a7cbb9ce14657 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Tue, 11 Nov 2025 09:35:24 -0800
Subject: [PATCH 19/20] add gpu monitor in pipelines

---
 .github/workflows/windows_tensorrt_rtx.yml     | 18 ++++++++++++++++++
 .../nv_tensorrt_rtx/nv_execution_provider.cc   |  4 ++++
 2 files changed, 22 insertions(+)

diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml
index d636e762ab89c..6e8807cf7f64a 100644
--- a/.github/workflows/windows_tensorrt_rtx.yml
+++ b/.github/workflows/windows_tensorrt_rtx.yml
@@ -108,6 +108,12 @@ jobs:
           $buildDir = Join-Path ${{ runner.temp }} "build"
           echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV
 
+      - name: nvidia-smi
+        shell: powershell
+        run: |
+          nvidia-smi.exe
+          nvidia-smi.exe --query-gpu=memory.total,memory.used,memory.free --format=csv
+
       - name: Build and Clean Binaries
         working-directory: ${{ runner.temp }}
         run: |
@@ -236,6 +242,7 @@ jobs:
         shell: powershell
         run: |
           nvidia-smi.exe
+          nvidia-smi.exe --query-gpu=memory.total,memory.used,memory.free --format=csv
 
       - name: Run Tests
         working-directory: ${{ runner.temp }}
@@ -256,6 +263,17 @@ jobs:
           }
         shell: pwsh
 
+      - name: Check GPU memory usage and fail if >90%
+        shell: powershell
+        run: |
+          $csv = nvidia-smi --query-gpu=memory.used,memory.total --format=csv,noheader
+          $parts = $csv -split ','
+          $used = [int]($parts[0] -replace ' MiB','')
+          $total = [int]($parts[1] -replace ' MiB','')
+          $percent = ($used / $total) * 100
+          Write-Host "GPU memory used: $used MiB / $total MiB ($percent %)"
+          if ($percent -gt 90) { throw "GPU memory usage exceeded threshold ($percent%)" }
+
       - name: Validate C# native delegates
         run: python tools\ValidateNativeDelegateAttributes.py
         working-directory: ${{ github.workspace }}\csharp
diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc
index 62210d65848d1..30dc1b02aad78 100644
--- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc
+++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc
@@ -1736,7 +1736,9 @@ SubGraphCollection_t NvExecutionProvider::GetSupportedList(SubGraphCollection_t
         TensorrtLogger& trt_logger = GetTensorrtLogger(detailed_build_log_);
         auto trt_builder = GetBuilder(trt_logger);
         auto network_flags = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED);
+        LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] After createNetworkV2()";
         auto trt_network = std::unique_ptr<nvinfer1::INetworkDefinition>(trt_builder->createNetworkV2(network_flags));
+        LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] After createNetworkV2()";
 
         bool is_model_supported = false;
 
@@ -1755,8 +1757,10 @@ SubGraphCollection_t NvExecutionProvider::GetSupportedList(SubGraphCollection_t
             ORT_THROW("'nv_use_external_data_initializer' is only supported on TensorRT RTX 1.1.x.x and above.");
 #endif
           } else {
+            LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] Before supportsModelV2()";
             is_model_supported = trt_parser->supportsModelV2(string_buf.data(), string_buf.size(), model_path_);
           }
+          LOGS_DEFAULT(INFO) << "[NvTensorRTRTX EP] After supportsModelV2()";
 
           // Note: Calling getNbSubgraphs or getSubgraphNodes before calling supportsModelV2 results in undefined behavior.
           auto num_subgraphs = trt_parser->getNbSubgraphs();

From e1201919bac72af128afa38056d5960ae30c9507 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Tue, 11 Nov 2025 09:51:26 -0800
Subject: [PATCH 20/20] update add gpu monitor in pipelines

---
 .github/workflows/windows_tensorrt_rtx.yml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/.github/workflows/windows_tensorrt_rtx.yml b/.github/workflows/windows_tensorrt_rtx.yml
index 6e8807cf7f64a..81b6651bccbbf 100644
--- a/.github/workflows/windows_tensorrt_rtx.yml
+++ b/.github/workflows/windows_tensorrt_rtx.yml
@@ -108,12 +108,6 @@ jobs:
           $buildDir = Join-Path ${{ runner.temp }} "build"
           echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV
 
-      - name: nvidia-smi
-        shell: powershell
-        run: |
-          nvidia-smi.exe
-          nvidia-smi.exe --query-gpu=memory.total,memory.used,memory.free --format=csv
-
       - name: Build and Clean Binaries
         working-directory: ${{ runner.temp }}
         run: |