diff --git a/.github/ISSUE_TEMPLATE/03-mobile.yml b/.github/ISSUE_TEMPLATE/03-mobile.yml
index 7c6de5aad523d..07bb40ff94a2e 100644
--- a/.github/ISSUE_TEMPLATE/03-mobile.yml
+++ b/.github/ISSUE_TEMPLATE/03-mobile.yml
@@ -131,6 +131,6 @@ body:
     id: ep-version
     attributes:
       label: Execution Provider Library Version
-      placeholder: ex. CUDA 11.6 or ROCm 5.1.1
+      placeholder: ex. CUDA 12.2 or ROCm 5.1.1
     validations:
       required: false
diff --git a/.github/ISSUE_TEMPLATE/05-performance.yml b/.github/ISSUE_TEMPLATE/05-performance.yml
index da0e6c7ada7a7..5d678033f6a42 100644
--- a/.github/ISSUE_TEMPLATE/05-performance.yml
+++ b/.github/ISSUE_TEMPLATE/05-performance.yml
@@ -127,7 +127,7 @@ body:
     id: ep-version
     attributes:
       label: Execution Provider Library Version
-      placeholder: ex. CUDA 11.6 or ROCm 5.1.1
+      placeholder: ex. CUDA 12.2 or ROCm 5.1.1
     validations:
       required: false
   - type: textarea
diff --git a/.github/ISSUE_TEMPLATE/06-training.yml b/.github/ISSUE_TEMPLATE/06-training.yml
index 790d64dac0051..fec2ab3a1b285 100644
--- a/.github/ISSUE_TEMPLATE/06-training.yml
+++ b/.github/ISSUE_TEMPLATE/06-training.yml
@@ -72,6 +72,6 @@ body:
     id: ep-version
     attributes:
       label: Execution Provider Library Version
-      placeholder: ex. CUDA 11.6 or ROCm 5.1.1
+      placeholder: ex. CUDA 12.2 or ROCm 5.1.1
     validations:
       required: false
diff --git a/.github/ISSUE_TEMPLATE/08-general.yml b/.github/ISSUE_TEMPLATE/08-general.yml
index 241be0044fe7d..53269c240429f 100644
--- a/.github/ISSUE_TEMPLATE/08-general.yml
+++ b/.github/ISSUE_TEMPLATE/08-general.yml
@@ -125,6 +125,6 @@ body:
     id: ep-version
     attributes:
       label: Execution Provider Library Version
-      placeholder: ex. CUDA 11.6 or ROCm 5.1.1
+      placeholder: ex. CUDA 12.2 or ROCm 5.1.1
     validations:
       required: false
diff --git a/js/node/script/install.js b/js/node/script/install.js
index 7fdaeb9586278..d406da3591eec 100644
--- a/js/node/script/install.js
+++ b/js/node/script/install.js
@@ -9,7 +9,6 @@
 
 // The purpose of this script is to download the required binaries for the platform and architecture.
 // Currently, most of the binaries are already bundled in the package, except for the following:
-// - Linux/x64/CUDA 11
 // - Linux/x64/CUDA 12
 //
 // The CUDA binaries are not bundled because they are too large to be allowed in the npm registry. Instead, they are
diff --git a/onnxruntime/python/tools/tensorrt/perf/build/build_image.py b/onnxruntime/python/tools/tensorrt/perf/build/build_image.py
index 3e913094628c3..a4fbc21b43c85 100644
--- a/onnxruntime/python/tools/tensorrt/perf/build/build_image.py
+++ b/onnxruntime/python/tools/tensorrt/perf/build/build_image.py
@@ -16,7 +16,6 @@
 import sys
 
 TRT_DOCKER_FILES = {
-    "10.9_cuda11.8_cudnn8": "tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_tensorrt10",
     "10.9_cuda12.8_cudnn9": "tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda12_tensorrt10",
     "BIN": "tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt_bin",
 }
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md b/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md
index dc83f4dc220f0..2506ffe8a3f50 100644
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md
@@ -143,29 +143,11 @@ conda activate py310
 
 ### Setup Environment (CUDA) without docker
 
-First, we need install CUDA 11.8 or 12.x, [cuDNN](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html), and [TensorRT](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html) in the machine.
+First, we need install CUDA 12.x, [cuDNN](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html), and [TensorRT](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html) in the machine.
 
 The verison of CuDNN can be found in https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements.
 The version of TensorRT can be found in https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#requirements.
 
-#### CUDA 11.8:
-
-In the Conda environment, install PyTorch 2.1 up to 2.3.1, and other required packages like the following:
-```
-pip install torch>=2.1,<2.4 --index-url https://download.pytorch.org/whl/cu118
-pip install --upgrade polygraphy onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com
-pip install -r requirements/cuda11/requirements.txt
-```
-
-For Windows, install nvtx like the following:
-```
-conda install -c conda-forge nvtx
-```
-
-We cannot directly `pip install tensorrt` for CUDA 11. Follow https://github.com/NVIDIA/TensorRT/issues/2773 to install TensorRT for CUDA 11 in Linux.
-
-For Windows, pip install the tensorrt wheel in the downloaded TensorRT zip file instead. Like `pip install tensorrt-8.6.1.6.windows10.x86_64.cuda-11.8\tensorrt-8.6.1.6\python\tensorrt-8.6.1-cp310-none-win_amd64.whl`.
-
 #### CUDA 12.*:
 The official package of onnxruntime-gpu 1.19.x is built for CUDA 12.x. You can install it and other python packages like the following:
 ```
diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
index 4a5ba81943b40..71c3edfaee8bd 100644
--- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
+++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
@@ -76,12 +76,12 @@ variables:
 - name: ReleaseVersionSuffix
   value: ''
 - name: win_trt_version
-  value: 11.8
+  value: 12.2
 
 - name: win_trt_home
-  value: $(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda11 }}
+  value: $(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda12 }}
 - name: win_cuda_home
-  value: $(Agent.TempDirectory)\v11.8
+  value: $(Agent.TempDirectory)\v12.2
 
 stages:
 - template: stages/set_packaging_variables_stage.yml
@@ -110,13 +110,13 @@ stages:
 
 - template: stages/java-cuda-packaging-stage.yml
   parameters:
-    CudaVersion: 11.8
+    CudaVersion: 12.2
     SpecificArtifact: ${{ parameters.SpecificArtifact }}
     BuildId: ${{ parameters.BuildId }}
 
 - template: stages/nuget-combine-cuda-stage.yml
   parameters:
-      CudaVersion: 11.8
+      CudaVersion: 12.2
       RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }}
       UseIncreasedTimeoutForTests: ${{ parameters.UseIncreasedTimeoutForTests }}
       win_trt_home: ${{ variables.win_trt_home }}
diff --git a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml
index 6eb1e858a812b..83d41f23d9de2 100644
--- a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml
@@ -50,7 +50,6 @@ parameters:
   type: string
   default: '12.2'
   values:
-  - 11.8
   - 12.2
 
 variables:
diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml
index 665b7435eed66..492bada97acc9 100644
--- a/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml
@@ -33,7 +33,6 @@ parameters:
     type: string
     default: '12.2'
     values:
-      - 11.8
       - 12.2
 
   - name: SpecificArtifact
diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
index dd5288ab3a436..c6eabed7c5b4a 100644
--- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
@@ -33,7 +33,6 @@ parameters:
     type: string
     default: '12.2'
     values:
-      - 11.8
       - 12.2
 
 variables:
diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml
index ad9d9bfc4b5e5..0ec05909b846f 100644
--- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml
@@ -33,7 +33,6 @@ parameters:
     type: string
     default: '12.2'
     values:
-      - 11.8
       - 12.2
 
 variables:
diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml
index 2dc597fcb2351..ac9153e1b4887 100644
--- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml
@@ -10,7 +10,6 @@ parameters:
   type: string
   default: 10.9_cuda12.8_cudnn9
   values:
-  - 10.9_cuda11.8_cudnn8
   - 10.9_cuda12.8_cudnn9
   - BIN
 
diff --git a/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml b/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml
index 78c07c28d6f4e..2cecedb401724 100644
--- a/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml
+++ b/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml
@@ -4,7 +4,6 @@ parameters:
   type: string
   default: '12.2'
   values:
-    - 11.8
     - 12.2
 
 variables:
diff --git a/tools/ci_build/github/azure-pipelines/py-cuda-alt-package-test-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-cuda-alt-package-test-pipeline.yml
deleted file mode 100644
index 960b59f93bee0..0000000000000
--- a/tools/ci_build/github/azure-pipelines/py-cuda-alt-package-test-pipeline.yml
+++ /dev/null
@@ -1,57 +0,0 @@
-resources:
-  pipelines:
-  - pipeline: build
-    source: 'Python CUDA ALT Packaging Pipeline'
-    trigger: true
-    branch: main # branch to pick the artifact, Used only for manual triggered pipeline runs for testing the pipeline itself
-
-stages:
-  # ****The following Stage depend on all previous tags. ***
-  # GPU resources are very limited,
-  # To utilize gpu resource more efficiently, run GPU job only after all cpus jobs succeed
-  - stage: Linux_Test_CUDA_Alt_x86_64_stage
-    dependsOn:
-    jobs:
-      - template: templates/py-packaging-linux-test-cuda.yml
-        parameters:
-          arch: 'x86_64'
-          machine_pool: 'Onnxruntime-Linux-GPU'
-          python_wheel_suffix: '_gpu'
-          timeout: 480
-          docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250124.1
-          cuda_version: '11.8'
-
-  - stage: Republish_Wheels
-    dependsOn:
-    jobs:
-    - job: Python_Publishing_GPU
-      pool: 'onnxruntime-Ubuntu2204-AMD-CPU'
-      steps:
-      - checkout: none
-      - download: build
-        displayName: 'Download Pipeline Artifact - onnxruntime_gpu'
-        artifact: 'onnxruntime_gpu'
-        patterns: '*.whl'
-      - download: build
-        displayName: 'Download Pipeline Artifact - Win GPU 3.10'
-        artifact: 'win_gpu_wheel_3.10'
-        patterns: '*.whl'
-      - download: build
-        displayName: 'Download Pipeline Artifact - Win GPU 3.11'
-        artifact: 'win_gpu_wheel_3.11'
-        patterns: '*.whl'
-      - download: build
-        displayName: 'Download Pipeline Artifact - Win GPU 3.12'
-        artifact: 'win_gpu_wheel_3.12'
-        patterns: '*.whl'
-      - download: build
-        displayName: 'Download Pipeline Artifact - Win GPU 3.13'
-        artifact: 'win_gpu_wheel_3.13'
-        patterns: '*.whl'
-
-      - script: find $(Pipeline.Workspace) -name \*win_amd64.whl -exec mv {} $(Pipeline.Workspace)/build/onnxruntime_gpu \;
-        displayName: 'Merge files together'
-
-      - publish: $(Pipeline.Workspace)/build/onnxruntime_gpu
-        artifact: whl
-        displayName: Republish artifacts
\ No newline at end of file
diff --git a/tools/ci_build/github/azure-pipelines/py-cuda-alt-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-cuda-alt-packaging-pipeline.yml
deleted file mode 100644
index b4870db90a755..0000000000000
--- a/tools/ci_build/github/azure-pipelines/py-cuda-alt-packaging-pipeline.yml
+++ /dev/null
@@ -1,60 +0,0 @@
-trigger: none
-resources:
-  repositories:
-  - repository: 1esPipelines
-    type: git
-    name: 1ESPipelineTemplates/1ESPipelineTemplates
-    ref: refs/tags/release
-parameters:
-  - name: enable_linux_cuda
-    type: boolean
-    default: true
-
-  - name: enable_windows_cuda
-    type: boolean
-    default: true
-
-  - name: cmake_build_type
-    type: string
-    default: 'Release'
-    values:
-      - Debug
-      - Release
-      - RelWithDebInfo
-      - MinSizeRel
-extends:
-  # The pipeline extends the 1ES PT which will inject different SDL and compliance tasks.
-  # For non-production pipelines, use "Unofficial" as defined below.
-  # For productions pipelines, use "Official".
-  template: v1/1ES.Official.PipelineTemplate.yml@1esPipelines
-  parameters:
-    sdl:
-      componentgovernance:
-        ignoreDirectories: '$(Build.Repository.LocalPath)/cmake/external/emsdk/upstream/emscripten/tests,$(Build.Repository.LocalPath)/cmake/external/onnx/third_party/benchmark,$(Build.Repository.LocalPath)/cmake/external/onnx/third_party/pybind11,$(Build.Repository.LocalPath)/cmake/external/onnx/third_party/pybind11/tests,$(Build.Repository.LocalPath)/cmake/external/onnxruntime-extensions,$(Build.Repository.LocalPath)/js/react_native/e2e/node_modules,$(Build.Repository.LocalPath)/js/node_modules,$(Build.Repository.LocalPath)/onnxruntime-inference-examples,$(Build.SourcesDirectory)/cmake/external/emsdk/upstream/emscripten/tests,$(Build.SourcesDirectory)/cmake/external/onnx/third_party/benchmark,$(Build.SourcesDirectory)/cmake/external/onnx/third_party/pybind11,$(Build.SourcesDirectory)/cmake/external/onnx/third_party/pybind11/tests,$(Build.SourcesDirectory)/cmake/external/onnxruntime-extensions,$(Build.SourcesDirectory)/js/react_native/e2e/node_modules,$(Build.SourcesDirectory)/js/node_modules,$(Build.SourcesDirectory)/onnxruntime-inference-examples,$(Build.BinariesDirectory)'
-        alertWarningLevel: High
-        failOnAlert: false
-        verbosity: Normal
-        timeout: 3600
-      tsa:
-        enabled: true
-      codeSignValidation:
-        enabled: true
-        break: true
-      policheck:
-        enabled: true
-        exclusionsFile: '$(Build.SourcesDirectory)\tools\ci_build\policheck_exclusions.xml'
-      codeql:
-        compiled:
-          enabled: false
-          justificationForDisabling: 'CodeQL is taking nearly 4 hours resulting in timeouts in our production pipelines'
-    pool:
-      name: 'onnxruntime-Win-CPU-2022'  # Name of your hosted pool
-      os: windows  # OS of the image. This value cannot be a variable. Allowed values: windows, linux, macOS
-
-    stages:
-      - template: stages/py-gpu-packaging-stage.yml
-        parameters:
-          enable_linux_cuda: ${{ parameters.enable_linux_cuda }}
-          enable_windows_cuda: ${{ parameters.enable_windows_cuda }}
-          cmake_build_type: ${{ parameters.cmake_build_type }}
-          cuda_version: '11.8'
diff --git a/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml b/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml
index 85366ffc28b3a..49d65b10b393c 100644
--- a/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml
@@ -2,9 +2,8 @@ parameters:
   - name: CudaVersion
     displayName: 'CUDA version'
     type: string
-    default: '11.8'
+    default: '12.2'
     values:
-      - 11.8
       - 12.2
   - name: machine_pool
     type: string
diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml
index 06576ac05569e..b1d0625c3b11b 100644
--- a/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml
@@ -1,7 +1,7 @@
 parameters:
 - name: CudaVersion
   type: string
-  default: '11.8'
+  default: '12.2'
 - name: buildJava
   type: boolean
 - name: buildNodejs
diff --git a/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml
index 362c2a3d74083..eea9b672eef3d 100644
--- a/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml
@@ -36,7 +36,6 @@ parameters:
   displayName: 'CUDA version. Windows Only.'
   default: '12.2'
   values:
-   - 11.8
    - 12.2
 
 - name: PythonVersions
diff --git a/tools/ci_build/github/azure-pipelines/stages/py-linux-gpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-linux-gpu-stage.yml
index 60b2e04e82136..17eb10fae7eb2 100644
--- a/tools/ci_build/github/azure-pipelines/stages/py-linux-gpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/py-linux-gpu-stage.yml
@@ -22,9 +22,8 @@ parameters:
 
 - name: cuda_version
   type: string
-  default: '11.8'
+  default: '12.2'
   values:
-   - 11.8
    - 12.2
 
 stages:
@@ -67,8 +66,8 @@ stages:
 
       - template: ../templates/get-docker-image-steps.yml
         parameters:
-          Dockerfile: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/python/cuda/Dockerfile
-          Context: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/python/cuda
+          Dockerfile: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/default/cuda12/Dockerfile
+          Context: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/default/cuda12
           DockerBuildArgs: "--build-arg BASEIMAGE=${{ parameters.docker_base_image }} --build-arg TRT_VERSION=${{ variables.trt_version }} --build-arg BUILD_UID=$( id -u )"
           Repository: onnxruntimecuda${{ replace(parameters.cuda_version, '.', '') }}xtrt86build${{ parameters.arch }}
 
diff --git a/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml
index 32004366fb947..fe2b85976d38b 100644
--- a/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml
@@ -19,9 +19,8 @@ parameters:
   default: ''
 - name: CudaVersion
   type: string
-  default: '11.8'
+  default: '12.2'
   values:
-    - 11.8
     - 12.2
 
 - name: cmake_build_type
diff --git a/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml b/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml
index 142b76ee43b99..96436883fb8b8 100644
--- a/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml
@@ -10,12 +10,12 @@ parameters:
   - name: PrimaryCUDAVersion
     type: string
     default: '12.2'
-  - name: SecondaryCUDAVersion
-    type: string
-    default: '11.8'
-  - name: win_trt_folder_cuda11
-    type: string
-    default: 'TensorRT-10.9.0.34.Windows10.x86_64.cuda-11.8'
+#  - name: SecondaryCUDAVersion
+#    type: string
+#    default: '11.8'
+#  - name: win_trt_folder_cuda11
+#    type: string
+#    default: 'TensorRT-10.9.0.34.Windows10.x86_64.cuda-11.8'
   - name: win_trt_folder_cuda12
     type: string
     default: 'TensorRT-10.9.0.34.Windows10.x86_64.cuda-12.8'
@@ -25,16 +25,16 @@ steps:
       - powershell: |
           azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ parameters.PrimaryCUDAVersion }}" $(Agent.TempDirectory)
         displayName: 'Download Primary CUDA SDK v${{ parameters.PrimaryCUDAVersion }}'
-      - powershell: |
-          azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ parameters.SecondaryCUDAVersion }}" $(Agent.TempDirectory)
-        displayName: 'Download Secondary CUDA SDK v${{ parameters.SecondaryCUDAVersion }}'
+#      - powershell: |
+#          azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ parameters.SecondaryCUDAVersion }}" $(Agent.TempDirectory)
+#        displayName: 'Download Secondary CUDA SDK v${{ parameters.SecondaryCUDAVersion }}'
   - ${{ if eq(parameters.DownloadTRT, 'true') }}:
       - powershell: |
           azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/${{ parameters.win_trt_folder_cuda12 }}" $(Agent.TempDirectory)
         displayName: 'Download ${{ parameters.win_trt_folder_cuda12 }}'
-      - powershell: |
-          azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/${{ parameters.win_trt_folder_cuda11 }}" $(Agent.TempDirectory)
-        displayName: 'Download ${{ parameters.win_trt_folder_cuda11 }}'
+#      - powershell: |
+#          azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/${{ parameters.win_trt_folder_cuda11 }}" $(Agent.TempDirectory)
+#        displayName: 'Download ${{ parameters.win_trt_folder_cuda11 }}'
 
   - task: BatchScript@1
     displayName: 'setup env'
diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml
index e7c702042b441..96ff9943dc178 100644
--- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml
@@ -18,9 +18,8 @@ parameters:
 
 - name: cuda_version
   type: string
-  default: '11.8'
+  default: '12.2'
   values:
-   - 11.8
    - 12.2
 
 # TODO: Ideally it should fetch information from the build that triggers it
@@ -91,8 +90,8 @@ jobs:
 
     - template: get-docker-image-steps.yml
       parameters:
-        Dockerfile: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/python/cuda/Dockerfile
-        Context: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/python/cuda
+        Dockerfile: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/default/cuda12/Dockerfile
+        Context: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/default/cuda12
         DockerBuildArgs: "--build-arg BASEIMAGE=${{ parameters.docker_base_image }} --build-arg TRT_VERSION=${{ variables.trt_version }} --build-arg BUILD_UID=$( id -u )"
         Repository: onnxruntimecuda${{ replace(parameters.cuda_version, '.', '') }}xtrt86build${{ parameters.arch }}
 
diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-cuda-minimal-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-cuda-minimal-ci-pipeline.yml
index c68ba01485db2..c12bb3552920c 100644
--- a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-cuda-minimal-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-cuda-minimal-ci-pipeline.yml
@@ -33,7 +33,6 @@ parameters:
   type: string
   default: '12.2'
   values:
-    - 11.8
     - 12.2
 
 variables:
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda11_8_tensorrt8_6 b/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda11_8_tensorrt8_6
deleted file mode 100644
index 9de88d1664b82..0000000000000
--- a/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda11_8_tensorrt8_6
+++ /dev/null
@@ -1,46 +0,0 @@
-# --------------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-# --------------------------------------------------------------
-# Dockerfile to Test ONNX Runtime on UBI8 with CUDA 11.8 and TensorRT 8.6
-
-# Build base image with required system packages
-ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8
-ARG TRT_VERSION=8.6.1.6-1.cuda11.8
-FROM $BASEIMAGE AS base
-ARG TRT_VERSION
-ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${PATH}
-
-RUN dnf install -y bash wget &&\
-    dnf clean dbcache
-
-# Install python3
-RUN dnf install -y \
-    python3.10 \
-    python310-pip \
-    python310-wheel &&\
-    cd /usr/local/bin &&\
-    ln -s /usr/bin/python3 python3.10 &&\
-    ln -s /usr/bin/pip3 pip3.10;
-
-RUN pip3 install --upgrade pip
-RUN pip3 install setuptools>=68.2.2
-
-# Install TensorRT
-RUN dnf install -y libnvinfer8 libnvonnxparsers8 libnvparsers8 libnvinfer-plugin8 libnvinfer-lean8 libnvinfer-vc-plugin8 libnvinfer-dispatch8
-RUN dnf downgrade -y libnvinfer8-${TRT_VERSION} libnvinfer8-${TRT_VERSION} libnvonnxparsers8-${TRT_VERSION} libnvparsers8-${TRT_VERSION} libnvinfer-plugin8-${TRT_VERSION} libnvinfer-lean8-${TRT_VERSION} libnvinfer-vc-plugin8-${TRT_VERSION} libnvinfer-dispatch8-${TRT_VERSION} &&\
-    dnf install -y dnf-plugin-versionlock &&\
-    dnf versionlock libnvinfer8 libnvonnxparsers8 libnvparsers8 libnvinfer-plugin8 libnvinfer-lean8 libnvinfer-vc-plugin8 libnvinfer-dispatch8
-RUN dnf clean dbcache
-
-
-ADD scripts /tmp/scripts
-RUN cd /tmp/scripts && /tmp/scripts/install_dotnet.sh && /tmp/scripts/install_java.sh && rm -rf /tmp/scripts
-
-# Build final image from base.
-FROM base as final
-ARG BUILD_USER=onnxruntimedev
-ARG BUILD_UID=1000
-RUN adduser --uid $BUILD_UID $BUILD_USER
-WORKDIR /home/$BUILD_USER
-USER $BUILD_USER
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu b/tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu
index 9b392fa0e3a68..8a84b9b940306 100644
--- a/tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu
+++ b/tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu
@@ -5,8 +5,8 @@
 # Dockerfile to run ONNXRuntime with TensorRT integration
 
 # Build base image with required system packages
-ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
-ARG TRT_VERSION=10.9.0.34-1+cuda11.8
+ARG BASEIMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu20.04
+ARG TRT_VERSION=10.9.0.34-1+cuda12.8
 ARG LD_LIBRARY_PATH_ARG=/usr/local/lib64:/usr/local/cuda/lib64
 FROM $BASEIMAGE AS base
 ARG TRT_VERSION
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_tensorrt10 b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_tensorrt10
deleted file mode 100644
index f68f488a9d8b8..0000000000000
--- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_tensorrt10
+++ /dev/null
@@ -1,109 +0,0 @@
-# --------------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-# --------------------------------------------------------------
-# Dockerfile to run ONNXRuntime with TensorRT integration
-
-# Build base image with required system packages
-FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS base
-
-# The local directory into which to build and install CMAKE
-ARG ONNXRUNTIME_LOCAL_CODE_DIR=/code
-
-ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${ONNXRUNTIME_LOCAL_CODE_DIR}/cmake-3.31.5-linux-x86_64/bin:/opt/miniconda/bin:${PATH}
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get update &&\
-    apt-get install -y sudo git bash unattended-upgrades wget
-RUN unattended-upgrade
-
-# Install python3
-RUN apt-get install -y --no-install-recommends \
-    python3 \
-    python3-pip \
-    python3-dev \
-    python3-wheel &&\
-    cd /usr/local/bin &&\
-    ln -s /usr/bin/python3 python &&\
-    ln -s /usr/bin/pip3 pip;
-
-RUN pip install --upgrade pip
-RUN pip install psutil setuptools>=68.2.2
-
-# Install TensorRT
-RUN TRT_VERSION="10.9.0.34-1+cuda11.8" &&\
-    apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub &&\
-    apt-get update &&\
-    apt-get install -y \
-    libnvinfer-dev=${TRT_VERSION} \
-    libnvinfer-dispatch-dev=${TRT_VERSION} \
-    libnvinfer-dispatch10=${TRT_VERSION} \
-    libnvinfer-headers-dev=${TRT_VERSION} \
-    libnvinfer-headers-plugin-dev=${TRT_VERSION} \
-    libnvinfer-lean-dev=${TRT_VERSION} \
-    libnvinfer-lean10=${TRT_VERSION} \
-    libnvinfer-plugin-dev=${TRT_VERSION} \
-    libnvinfer-plugin10=${TRT_VERSION} \
-    libnvinfer-vc-plugin-dev=${TRT_VERSION} \
-    libnvinfer-vc-plugin10=${TRT_VERSION} \
-    libnvinfer10=${TRT_VERSION} \
-    libnvonnxparsers-dev=${TRT_VERSION} \
-    libnvonnxparsers10=${TRT_VERSION} \
-    tensorrt-dev=${TRT_VERSION} \
-    libnvinfer-bin=${TRT_VERSION}
-
-# Compile trtexec if not installed
-RUN if [ ! -d /usr/src/tensorrt/bin ] || [ ! -f /usr/src/tensorrt/bin/trtexec ]; then \
-    cd /usr/src/tensorrt/samples/trtexec && make; \
-    fi
-
-# Install Valgrind
-RUN apt-get install -y valgrind
-
-# Build final image from base. Builds ORT.
-FROM base AS final
-ARG BUILD_USER=onnxruntimedev
-ARG BUILD_UID=1000
-RUN adduser --gecos 'onnxruntime Build User' --disabled-password $BUILD_USER --uid $BUILD_UID
-USER $BUILD_USER
-
-# ONNX Runtime arguments
-
-# URL to the github repo from which to clone ORT.
-ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
-
-# The local directory into which to clone ORT.
-ARG ONNXRUNTIME_LOCAL_CODE_DIR=/code
-
-# The git branch of ORT to checkout and build.
-ARG ONNXRUNTIME_BRANCH=main
-
-# Optional. The specific commit to pull and build from. If not set, the latest commit is used.
-ARG ONNXRUNTIME_COMMIT_ID
-
-# The supported CUDA architecture
-ARG CMAKE_CUDA_ARCHITECTURES=75
-
-WORKDIR ${ONNXRUNTIME_LOCAL_CODE_DIR}
-
-# Clone ORT repository with branch
-RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXRUNTIME_REPO} onnxruntime &&\
-    /bin/sh onnxruntime/dockerfiles/scripts/install_common_deps.sh
-
-WORKDIR ${ONNXRUNTIME_LOCAL_CODE_DIR}/onnxruntime
-
-# Reset to a specific commit if specified by build args.
-RUN if [ -z "$ONNXRUNTIME_COMMIT_ID" ] ; then echo "Building branch ${ONNXRUNTIME_BRANCH}" ;\
-    else echo "Building branch ${ONNXRUNTIME_BRANCH} @ commit ${ONNXRUNTIME_COMMIT_ID}" &&\
-    git reset --hard ${ONNXRUNTIME_COMMIT_ID} && git submodule update --recursive ; fi
-
-# Build ORT
-ENV CUDA_MODULE_LOADING="LAZY"
-ARG PARSER_CONFIG=""
-RUN /bin/sh build.sh ${PARSER_CONFIG} --parallel --build_shared_lib --cuda_home /usr/local/cuda --cudnn_home /usr/lib/x86_64-linux-gnu/ --use_tensorrt --tensorrt_home /usr/lib/x86_64-linux-gnu/ --config Release --build_wheel --skip_tests --skip_submodule_sync --cmake_extra_defines '"CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'"'
-
-# Switch to root to continue following steps of CI
-USER root
-
-# Intall ORT wheel
-RUN pip install ${ONNXRUNTIME_LOCAL_CODE_DIR}/onnxruntime/build/Linux/Release/dist/*.whl
diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda11/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda11/Dockerfile
deleted file mode 100644
index 24287fd34d3ea..0000000000000
--- a/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda11/Dockerfile
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-
-# This file is used by Zip-Nuget Packaging NoContribOps Pipeline,Zip-Nuget-Java Packaging Pipeline
-FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11_dotnet:20250124.1
-
-ARG TRT_VERSION
-#Install TensorRT only if TRT_VERSION is not empty
-RUN if [ -n "$TRT_VERSION" ]; then  \
-    echo "TRT_VERSION is $TRT_VERSION" && \
-    dnf -y install  \
-    libnvinfer10-${TRT_VERSION}  \
-    libnvinfer-headers-devel-${TRT_VERSION}  \
-    libnvinfer-devel-${TRT_VERSION}  \
-    libnvinfer-lean10-${TRT_VERSION}  \
-    libnvonnxparsers10-${TRT_VERSION}  \
-    libnvonnxparsers-devel-${TRT_VERSION}  \
-    libnvinfer-dispatch10-${TRT_VERSION}  \
-    libnvinfer-plugin10-${TRT_VERSION}  \
-    libnvinfer-vc-plugin10-${TRT_VERSION}  \
-    libnvinfer-bin-${TRT_VERSION}  \
-    libnvinfer-plugin10-${TRT_VERSION}  \
-    libnvinfer-plugin-devel-${TRT_VERSION}  \
-    libnvinfer-vc-plugin-devel-${TRT_VERSION}  \
-    libnvinfer-lean-devel-${TRT_VERSION}  \
-    libnvinfer-dispatch-devel-${TRT_VERSION}  \
-    libnvinfer-headers-plugin-devel-${TRT_VERSION} && \
-    dnf clean dbcache ; \
-else \
-    echo "TRT_VERSION is none skipping Tensor RT Installation" ; \
-fi
-
-ENV PATH=/usr/lib/jvm/msopenjdk-17/bin:$PATH
-ENV LANG=en_US.UTF-8
-ENV LC_ALL=en_US.UTF-8
-ENV JAVA_HOME=/usr/lib/jvm/msopenjdk-17
-ENV CUDAHOSTCXX=/opt/rh/gcc-toolset-11/root/usr/bin/g++
-ADD scripts /tmp/scripts
-RUN cd /tmp/scripts && /tmp/scripts/install_deps.sh && rm -rf /tmp/scripts
-
-ARG BUILD_UID=1001
-ARG BUILD_USER=onnxruntimedev
-RUN adduser --uid $BUILD_UID $BUILD_USER
-WORKDIR /home/$BUILD_USER
-USER $BUILD_USER
diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda11/scripts/install_deps.sh b/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda11/scripts/install_deps.sh
deleted file mode 100755
index e98429946f4b3..0000000000000
--- a/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda11/scripts/install_deps.sh
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/bin/bash
-set -e -x
-
-# Download a file from internet
-function GetFile {
-  local uri=$1
-  local path=$2
-  local force=${3:-false}
-  local download_retries=${4:-5}
-  local retry_wait_time_seconds=${5:-30}
-
-  if [[ -f $path ]]; then
-    if [[ $force = false ]]; then
-      echo "File '$path' already exists. Skipping download"
-      return 0
-    else
-      rm -rf $path
-    fi
-  fi
-
-  if [[ -f $uri ]]; then
-    echo "'$uri' is a file path, copying file to '$path'"
-    cp $uri $path
-    return $?
-  fi
-
-  echo "Downloading $uri"
-  # Use aria2c if available, otherwise use curl
-  if command -v aria2c > /dev/null; then
-    aria2c -q -d $(dirname $path) -o $(basename $path) "$uri"
-  else
-    curl "$uri" -sSL --retry $download_retries --retry-delay $retry_wait_time_seconds --create-dirs -o "$path" --fail
-  fi
-
-  return $?
-}
-mkdir -p /tmp/src
-
-cd /tmp/src
-
-
-echo "Installing Node.js"
-CPU_ARCH=`uname -m`
-if [[ "$CPU_ARCH" = "x86_64" ]]; then
-  NODEJS_ARCH=x64
-elif [[ "$CPU_ARCH" = "aarch64" ]]; then
-  NODEJS_ARCH=arm64
-else
-  NODEJS_ARCH=$CPU_ARCH
-fi
-# The EOL for nodejs v18.17.1 LTS is April 2025
-GetFile https://nodejs.org/dist/v18.17.1/node-v18.17.1-linux-${NODEJS_ARCH}.tar.gz /tmp/src/node-v18.17.1-linux-${NODEJS_ARCH}.tar.gz
-tar --strip 1 -xf /tmp/src/node-v18.17.1-linux-${NODEJS_ARCH}.tar.gz -C /usr
-
-cd /
-rm -rf /tmp/src
diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile
deleted file mode 100644
index d1df74e2a4506..0000000000000
--- a/tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile
+++ /dev/null
@@ -1,45 +0,0 @@
-# The default ARGs are for cuda 11.8 with cudnn8, TensorRT is optional
-# Please overwrite BASEIMAGE, TRT_VERSION and other arguments with
-# --docker-build-args ' --build-arg BASEIMAGE=other_base_image --build-arg TRT_VERSION=other_trt_version etc...'
-# for other cuda version and TRT version
-ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8
-
-FROM $BASEIMAGE
-ARG TRT_VERSION=10.9.0.34-1.cuda11.8
-
-#Install TensorRT only if TRT_VERSION is not empty
-RUN if [ -n "${TRT_VERSION}" ]; then  \
-    echo "TRT_VERSION is $TRT_VERSION" && \
-    dnf -y install  \
-    libnvinfer10-${TRT_VERSION}  \
-    libnvinfer-headers-devel-${TRT_VERSION}  \
-    libnvinfer-devel-${TRT_VERSION}  \
-    libnvinfer-lean10-${TRT_VERSION}  \
-    libnvonnxparsers10-${TRT_VERSION}  \
-    libnvonnxparsers-devel-${TRT_VERSION}  \
-    libnvinfer-dispatch10-${TRT_VERSION}  \
-    libnvinfer-plugin10-${TRT_VERSION}  \
-    libnvinfer-vc-plugin10-${TRT_VERSION}  \
-    libnvinfer-bin-${TRT_VERSION}  \
-    libnvinfer-plugin10-${TRT_VERSION}  \
-    libnvinfer-plugin-devel-${TRT_VERSION}  \
-    libnvinfer-vc-plugin-devel-${TRT_VERSION}  \
-    libnvinfer-lean-devel-${TRT_VERSION}  \
-    libnvinfer-dispatch-devel-${TRT_VERSION}  \
-    libnvinfer-headers-plugin-devel-${TRT_VERSION} && \
-    dnf clean dbcache ; \
-else \
-    echo "TRT_VERSION is x${TRT_VERSION} skipping Tensor RT Installation" ; \
-fi
-
-ENV PATH=/usr/local/cuda/bin:$PATH
-ENV CUDA_MODULE_LOADING="LAZY"
-
-ADD scripts /tmp/scripts
-RUN cd /tmp/scripts && /tmp/scripts/install_centos.sh && rm -rf /tmp/scripts
-
-ARG BUILD_UID=1001
-ARG BUILD_USER=onnxruntimedev
-RUN adduser --uid $BUILD_UID $BUILD_USER
-WORKDIR /home/$BUILD_USER
-USER $BUILD_USER
diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/scripts/install_centos.sh b/tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/scripts/install_centos.sh
deleted file mode 100755
index d0b58ed28b8c9..0000000000000
--- a/tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/scripts/install_centos.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-set -e
-
-os_major_version=$(tr -dc '0-9.' < /etc/redhat-release |cut -d \. -f1)
-
-echo "installing for os major version : $os_major_version"
-dnf install -y glibc-langpack-\*
-yum install -y which redhat-lsb-core expat-devel tar unzip zlib-devel make bzip2 bzip2-devel perl-IPC-Cmd openssl-devel wget
-
-echo "installing rapidjson for AzureEP"
-wget https://github.com/Tencent/rapidjson/archive/refs/tags/v1.1.0.tar.gz
-tar zxvf v1.1.0.tar.gz
-cd rapidjson-1.1.0
-mkdir build
-cd build
-cmake ..
-cmake --install .
-cd ../..