diff --git a/.github/ISSUE_TEMPLATE/03-mobile.yml b/.github/ISSUE_TEMPLATE/03-mobile.yml index 7c6de5aad523d..07bb40ff94a2e 100644 --- a/.github/ISSUE_TEMPLATE/03-mobile.yml +++ b/.github/ISSUE_TEMPLATE/03-mobile.yml @@ -131,6 +131,6 @@ body: id: ep-version attributes: label: Execution Provider Library Version - placeholder: ex. CUDA 11.6 or ROCm 5.1.1 + placeholder: ex. CUDA 12.2 or ROCm 5.1.1 validations: required: false diff --git a/.github/ISSUE_TEMPLATE/05-performance.yml b/.github/ISSUE_TEMPLATE/05-performance.yml index da0e6c7ada7a7..5d678033f6a42 100644 --- a/.github/ISSUE_TEMPLATE/05-performance.yml +++ b/.github/ISSUE_TEMPLATE/05-performance.yml @@ -127,7 +127,7 @@ body: id: ep-version attributes: label: Execution Provider Library Version - placeholder: ex. CUDA 11.6 or ROCm 5.1.1 + placeholder: ex. CUDA 12.2 or ROCm 5.1.1 validations: required: false - type: textarea diff --git a/.github/ISSUE_TEMPLATE/06-training.yml b/.github/ISSUE_TEMPLATE/06-training.yml index 790d64dac0051..fec2ab3a1b285 100644 --- a/.github/ISSUE_TEMPLATE/06-training.yml +++ b/.github/ISSUE_TEMPLATE/06-training.yml @@ -72,6 +72,6 @@ body: id: ep-version attributes: label: Execution Provider Library Version - placeholder: ex. CUDA 11.6 or ROCm 5.1.1 + placeholder: ex. CUDA 12.2 or ROCm 5.1.1 validations: required: false diff --git a/.github/ISSUE_TEMPLATE/08-general.yml b/.github/ISSUE_TEMPLATE/08-general.yml index 241be0044fe7d..53269c240429f 100644 --- a/.github/ISSUE_TEMPLATE/08-general.yml +++ b/.github/ISSUE_TEMPLATE/08-general.yml @@ -125,6 +125,6 @@ body: id: ep-version attributes: label: Execution Provider Library Version - placeholder: ex. CUDA 11.6 or ROCm 5.1.1 + placeholder: ex. CUDA 12.2 or ROCm 5.1.1 validations: required: false diff --git a/js/node/script/install.js b/js/node/script/install.js index 7fdaeb9586278..d406da3591eec 100644 --- a/js/node/script/install.js +++ b/js/node/script/install.js @@ -9,7 +9,6 @@ // The purpose of this script is to download the required binaries for the platform and architecture. // Currently, most of the binaries are already bundled in the package, except for the following: -// - Linux/x64/CUDA 11 // - Linux/x64/CUDA 12 // // The CUDA binaries are not bundled because they are too large to be allowed in the npm registry. Instead, they are diff --git a/onnxruntime/python/tools/tensorrt/perf/build/build_image.py b/onnxruntime/python/tools/tensorrt/perf/build/build_image.py index 3e913094628c3..a4fbc21b43c85 100644 --- a/onnxruntime/python/tools/tensorrt/perf/build/build_image.py +++ b/onnxruntime/python/tools/tensorrt/perf/build/build_image.py @@ -16,7 +16,6 @@ import sys TRT_DOCKER_FILES = { - "10.9_cuda11.8_cudnn8": "tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_tensorrt10", "10.9_cuda12.8_cudnn9": "tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda12_tensorrt10", "BIN": "tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt_bin", } diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md b/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md index dc83f4dc220f0..2506ffe8a3f50 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md @@ -143,29 +143,11 @@ conda activate py310 ### Setup Environment (CUDA) without docker -First, we need install CUDA 11.8 or 12.x, [cuDNN](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html), and [TensorRT](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html) in the machine. +First, we need install CUDA 12.x, [cuDNN](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html), and [TensorRT](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html) in the machine. The verison of CuDNN can be found in https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements. The version of TensorRT can be found in https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#requirements. -#### CUDA 11.8: - -In the Conda environment, install PyTorch 2.1 up to 2.3.1, and other required packages like the following: -``` -pip install torch>=2.1,<2.4 --index-url https://download.pytorch.org/whl/cu118 -pip install --upgrade polygraphy onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com -pip install -r requirements/cuda11/requirements.txt -``` - -For Windows, install nvtx like the following: -``` -conda install -c conda-forge nvtx -``` - -We cannot directly `pip install tensorrt` for CUDA 11. Follow https://github.com/NVIDIA/TensorRT/issues/2773 to install TensorRT for CUDA 11 in Linux. - -For Windows, pip install the tensorrt wheel in the downloaded TensorRT zip file instead. Like `pip install tensorrt-8.6.1.6.windows10.x86_64.cuda-11.8\tensorrt-8.6.1.6\python\tensorrt-8.6.1-cp310-none-win_amd64.whl`. - #### CUDA 12.*: The official package of onnxruntime-gpu 1.19.x is built for CUDA 12.x. You can install it and other python packages like the following: ``` diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index 4a5ba81943b40..71c3edfaee8bd 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -76,12 +76,12 @@ variables: - name: ReleaseVersionSuffix value: '' - name: win_trt_version - value: 11.8 + value: 12.2 - name: win_trt_home - value: $(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda11 }} + value: $(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda12 }} - name: win_cuda_home - value: $(Agent.TempDirectory)\v11.8 + value: $(Agent.TempDirectory)\v12.2 stages: - template: stages/set_packaging_variables_stage.yml @@ -110,13 +110,13 @@ stages: - template: stages/java-cuda-packaging-stage.yml parameters: - CudaVersion: 11.8 + CudaVersion: 12.2 SpecificArtifact: ${{ parameters.SpecificArtifact }} BuildId: ${{ parameters.BuildId }} - template: stages/nuget-combine-cuda-stage.yml parameters: - CudaVersion: 11.8 + CudaVersion: 12.2 RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} UseIncreasedTimeoutForTests: ${{ parameters.UseIncreasedTimeoutForTests }} win_trt_home: ${{ variables.win_trt_home }} diff --git a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml index 6eb1e858a812b..83d41f23d9de2 100644 --- a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml @@ -50,7 +50,6 @@ parameters: type: string default: '12.2' values: - - 11.8 - 12.2 variables: diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml index 665b7435eed66..492bada97acc9 100644 --- a/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml @@ -33,7 +33,6 @@ parameters: type: string default: '12.2' values: - - 11.8 - 12.2 - name: SpecificArtifact diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml index dd5288ab3a436..c6eabed7c5b4a 100644 --- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml @@ -33,7 +33,6 @@ parameters: type: string default: '12.2' values: - - 11.8 - 12.2 variables: diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml index ad9d9bfc4b5e5..0ec05909b846f 100644 --- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml @@ -33,7 +33,6 @@ parameters: type: string default: '12.2' values: - - 11.8 - 12.2 variables: diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml index 2dc597fcb2351..ac9153e1b4887 100644 --- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml @@ -10,7 +10,6 @@ parameters: type: string default: 10.9_cuda12.8_cudnn9 values: - - 10.9_cuda11.8_cudnn8 - 10.9_cuda12.8_cudnn9 - BIN diff --git a/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml b/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml index 78c07c28d6f4e..2cecedb401724 100644 --- a/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml +++ b/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml @@ -4,7 +4,6 @@ parameters: type: string default: '12.2' values: - - 11.8 - 12.2 variables: diff --git a/tools/ci_build/github/azure-pipelines/py-cuda-alt-package-test-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-cuda-alt-package-test-pipeline.yml deleted file mode 100644 index 960b59f93bee0..0000000000000 --- a/tools/ci_build/github/azure-pipelines/py-cuda-alt-package-test-pipeline.yml +++ /dev/null @@ -1,57 +0,0 @@ -resources: - pipelines: - - pipeline: build - source: 'Python CUDA ALT Packaging Pipeline' - trigger: true - branch: main # branch to pick the artifact, Used only for manual triggered pipeline runs for testing the pipeline itself - -stages: - # ****The following Stage depend on all previous tags. *** - # GPU resources are very limited, - # To utilize gpu resource more efficiently, run GPU job only after all cpus jobs succeed - - stage: Linux_Test_CUDA_Alt_x86_64_stage - dependsOn: - jobs: - - template: templates/py-packaging-linux-test-cuda.yml - parameters: - arch: 'x86_64' - machine_pool: 'Onnxruntime-Linux-GPU' - python_wheel_suffix: '_gpu' - timeout: 480 - docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250124.1 - cuda_version: '11.8' - - - stage: Republish_Wheels - dependsOn: - jobs: - - job: Python_Publishing_GPU - pool: 'onnxruntime-Ubuntu2204-AMD-CPU' - steps: - - checkout: none - - download: build - displayName: 'Download Pipeline Artifact - onnxruntime_gpu' - artifact: 'onnxruntime_gpu' - patterns: '*.whl' - - download: build - displayName: 'Download Pipeline Artifact - Win GPU 3.10' - artifact: 'win_gpu_wheel_3.10' - patterns: '*.whl' - - download: build - displayName: 'Download Pipeline Artifact - Win GPU 3.11' - artifact: 'win_gpu_wheel_3.11' - patterns: '*.whl' - - download: build - displayName: 'Download Pipeline Artifact - Win GPU 3.12' - artifact: 'win_gpu_wheel_3.12' - patterns: '*.whl' - - download: build - displayName: 'Download Pipeline Artifact - Win GPU 3.13' - artifact: 'win_gpu_wheel_3.13' - patterns: '*.whl' - - - script: find $(Pipeline.Workspace) -name \*win_amd64.whl -exec mv {} $(Pipeline.Workspace)/build/onnxruntime_gpu \; - displayName: 'Merge files together' - - - publish: $(Pipeline.Workspace)/build/onnxruntime_gpu - artifact: whl - displayName: Republish artifacts \ No newline at end of file diff --git a/tools/ci_build/github/azure-pipelines/py-cuda-alt-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-cuda-alt-packaging-pipeline.yml deleted file mode 100644 index b4870db90a755..0000000000000 --- a/tools/ci_build/github/azure-pipelines/py-cuda-alt-packaging-pipeline.yml +++ /dev/null @@ -1,60 +0,0 @@ -trigger: none -resources: - repositories: - - repository: 1esPipelines - type: git - name: 1ESPipelineTemplates/1ESPipelineTemplates - ref: refs/tags/release -parameters: - - name: enable_linux_cuda - type: boolean - default: true - - - name: enable_windows_cuda - type: boolean - default: true - - - name: cmake_build_type - type: string - default: 'Release' - values: - - Debug - - Release - - RelWithDebInfo - - MinSizeRel -extends: - # The pipeline extends the 1ES PT which will inject different SDL and compliance tasks. - # For non-production pipelines, use "Unofficial" as defined below. - # For productions pipelines, use "Official". - template: v1/1ES.Official.PipelineTemplate.yml@1esPipelines - parameters: - sdl: - componentgovernance: - ignoreDirectories: '$(Build.Repository.LocalPath)/cmake/external/emsdk/upstream/emscripten/tests,$(Build.Repository.LocalPath)/cmake/external/onnx/third_party/benchmark,$(Build.Repository.LocalPath)/cmake/external/onnx/third_party/pybind11,$(Build.Repository.LocalPath)/cmake/external/onnx/third_party/pybind11/tests,$(Build.Repository.LocalPath)/cmake/external/onnxruntime-extensions,$(Build.Repository.LocalPath)/js/react_native/e2e/node_modules,$(Build.Repository.LocalPath)/js/node_modules,$(Build.Repository.LocalPath)/onnxruntime-inference-examples,$(Build.SourcesDirectory)/cmake/external/emsdk/upstream/emscripten/tests,$(Build.SourcesDirectory)/cmake/external/onnx/third_party/benchmark,$(Build.SourcesDirectory)/cmake/external/onnx/third_party/pybind11,$(Build.SourcesDirectory)/cmake/external/onnx/third_party/pybind11/tests,$(Build.SourcesDirectory)/cmake/external/onnxruntime-extensions,$(Build.SourcesDirectory)/js/react_native/e2e/node_modules,$(Build.SourcesDirectory)/js/node_modules,$(Build.SourcesDirectory)/onnxruntime-inference-examples,$(Build.BinariesDirectory)' - alertWarningLevel: High - failOnAlert: false - verbosity: Normal - timeout: 3600 - tsa: - enabled: true - codeSignValidation: - enabled: true - break: true - policheck: - enabled: true - exclusionsFile: '$(Build.SourcesDirectory)\tools\ci_build\policheck_exclusions.xml' - codeql: - compiled: - enabled: false - justificationForDisabling: 'CodeQL is taking nearly 4 hours resulting in timeouts in our production pipelines' - pool: - name: 'onnxruntime-Win-CPU-2022' # Name of your hosted pool - os: windows # OS of the image. This value cannot be a variable. Allowed values: windows, linux, macOS - - stages: - - template: stages/py-gpu-packaging-stage.yml - parameters: - enable_linux_cuda: ${{ parameters.enable_linux_cuda }} - enable_windows_cuda: ${{ parameters.enable_windows_cuda }} - cmake_build_type: ${{ parameters.cmake_build_type }} - cuda_version: '11.8' diff --git a/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml b/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml index 85366ffc28b3a..49d65b10b393c 100644 --- a/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml +++ b/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml @@ -2,9 +2,8 @@ parameters: - name: CudaVersion displayName: 'CUDA version' type: string - default: '11.8' + default: '12.2' values: - - 11.8 - 12.2 - name: machine_pool type: string diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml index 06576ac05569e..b1d0625c3b11b 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml @@ -1,7 +1,7 @@ parameters: - name: CudaVersion type: string - default: '11.8' + default: '12.2' - name: buildJava type: boolean - name: buildNodejs diff --git a/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml index 362c2a3d74083..eea9b672eef3d 100644 --- a/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml @@ -36,7 +36,6 @@ parameters: displayName: 'CUDA version. Windows Only.' default: '12.2' values: - - 11.8 - 12.2 - name: PythonVersions diff --git a/tools/ci_build/github/azure-pipelines/stages/py-linux-gpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-linux-gpu-stage.yml index 60b2e04e82136..17eb10fae7eb2 100644 --- a/tools/ci_build/github/azure-pipelines/stages/py-linux-gpu-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/py-linux-gpu-stage.yml @@ -22,9 +22,8 @@ parameters: - name: cuda_version type: string - default: '11.8' + default: '12.2' values: - - 11.8 - 12.2 stages: @@ -67,8 +66,8 @@ stages: - template: ../templates/get-docker-image-steps.yml parameters: - Dockerfile: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/python/cuda/Dockerfile - Context: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/python/cuda + Dockerfile: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/default/cuda12/Dockerfile + Context: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/default/cuda12 DockerBuildArgs: "--build-arg BASEIMAGE=${{ parameters.docker_base_image }} --build-arg TRT_VERSION=${{ variables.trt_version }} --build-arg BUILD_UID=$( id -u )" Repository: onnxruntimecuda${{ replace(parameters.cuda_version, '.', '') }}xtrt86build${{ parameters.arch }} diff --git a/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml index 32004366fb947..fe2b85976d38b 100644 --- a/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml @@ -19,9 +19,8 @@ parameters: default: '' - name: CudaVersion type: string - default: '11.8' + default: '12.2' values: - - 11.8 - 12.2 - name: cmake_build_type diff --git a/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml b/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml index 142b76ee43b99..96436883fb8b8 100644 --- a/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml +++ b/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml @@ -10,12 +10,12 @@ parameters: - name: PrimaryCUDAVersion type: string default: '12.2' - - name: SecondaryCUDAVersion - type: string - default: '11.8' - - name: win_trt_folder_cuda11 - type: string - default: 'TensorRT-10.9.0.34.Windows10.x86_64.cuda-11.8' +# - name: SecondaryCUDAVersion +# type: string +# default: '11.8' +# - name: win_trt_folder_cuda11 +# type: string +# default: 'TensorRT-10.9.0.34.Windows10.x86_64.cuda-11.8' - name: win_trt_folder_cuda12 type: string default: 'TensorRT-10.9.0.34.Windows10.x86_64.cuda-12.8' @@ -25,16 +25,16 @@ steps: - powershell: | azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ parameters.PrimaryCUDAVersion }}" $(Agent.TempDirectory) displayName: 'Download Primary CUDA SDK v${{ parameters.PrimaryCUDAVersion }}' - - powershell: | - azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ parameters.SecondaryCUDAVersion }}" $(Agent.TempDirectory) - displayName: 'Download Secondary CUDA SDK v${{ parameters.SecondaryCUDAVersion }}' +# - powershell: | +# azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ parameters.SecondaryCUDAVersion }}" $(Agent.TempDirectory) +# displayName: 'Download Secondary CUDA SDK v${{ parameters.SecondaryCUDAVersion }}' - ${{ if eq(parameters.DownloadTRT, 'true') }}: - powershell: | azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/${{ parameters.win_trt_folder_cuda12 }}" $(Agent.TempDirectory) displayName: 'Download ${{ parameters.win_trt_folder_cuda12 }}' - - powershell: | - azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/${{ parameters.win_trt_folder_cuda11 }}" $(Agent.TempDirectory) - displayName: 'Download ${{ parameters.win_trt_folder_cuda11 }}' +# - powershell: | +# azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/${{ parameters.win_trt_folder_cuda11 }}" $(Agent.TempDirectory) +# displayName: 'Download ${{ parameters.win_trt_folder_cuda11 }}' - task: BatchScript@1 displayName: 'setup env' diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml index e7c702042b441..96ff9943dc178 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml @@ -18,9 +18,8 @@ parameters: - name: cuda_version type: string - default: '11.8' + default: '12.2' values: - - 11.8 - 12.2 # TODO: Ideally it should fetch information from the build that triggers it @@ -91,8 +90,8 @@ jobs: - template: get-docker-image-steps.yml parameters: - Dockerfile: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/python/cuda/Dockerfile - Context: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/python/cuda + Dockerfile: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/default/cuda12/Dockerfile + Context: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/default/cuda12 DockerBuildArgs: "--build-arg BASEIMAGE=${{ parameters.docker_base_image }} --build-arg TRT_VERSION=${{ variables.trt_version }} --build-arg BUILD_UID=$( id -u )" Repository: onnxruntimecuda${{ replace(parameters.cuda_version, '.', '') }}xtrt86build${{ parameters.arch }} diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-cuda-minimal-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-cuda-minimal-ci-pipeline.yml index c68ba01485db2..c12bb3552920c 100644 --- a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-cuda-minimal-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-cuda-minimal-ci-pipeline.yml @@ -33,7 +33,6 @@ parameters: type: string default: '12.2' values: - - 11.8 - 12.2 variables: diff --git a/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda11_8_tensorrt8_6 b/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda11_8_tensorrt8_6 deleted file mode 100644 index 9de88d1664b82..0000000000000 --- a/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda11_8_tensorrt8_6 +++ /dev/null @@ -1,46 +0,0 @@ -# -------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------- -# Dockerfile to Test ONNX Runtime on UBI8 with CUDA 11.8 and TensorRT 8.6 - -# Build base image with required system packages -ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8 -ARG TRT_VERSION=8.6.1.6-1.cuda11.8 -FROM $BASEIMAGE AS base -ARG TRT_VERSION -ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${PATH} - -RUN dnf install -y bash wget &&\ - dnf clean dbcache - -# Install python3 -RUN dnf install -y \ - python3.10 \ - python310-pip \ - python310-wheel &&\ - cd /usr/local/bin &&\ - ln -s /usr/bin/python3 python3.10 &&\ - ln -s /usr/bin/pip3 pip3.10; - -RUN pip3 install --upgrade pip -RUN pip3 install setuptools>=68.2.2 - -# Install TensorRT -RUN dnf install -y libnvinfer8 libnvonnxparsers8 libnvparsers8 libnvinfer-plugin8 libnvinfer-lean8 libnvinfer-vc-plugin8 libnvinfer-dispatch8 -RUN dnf downgrade -y libnvinfer8-${TRT_VERSION} libnvinfer8-${TRT_VERSION} libnvonnxparsers8-${TRT_VERSION} libnvparsers8-${TRT_VERSION} libnvinfer-plugin8-${TRT_VERSION} libnvinfer-lean8-${TRT_VERSION} libnvinfer-vc-plugin8-${TRT_VERSION} libnvinfer-dispatch8-${TRT_VERSION} &&\ - dnf install -y dnf-plugin-versionlock &&\ - dnf versionlock libnvinfer8 libnvonnxparsers8 libnvparsers8 libnvinfer-plugin8 libnvinfer-lean8 libnvinfer-vc-plugin8 libnvinfer-dispatch8 -RUN dnf clean dbcache - - -ADD scripts /tmp/scripts -RUN cd /tmp/scripts && /tmp/scripts/install_dotnet.sh && /tmp/scripts/install_java.sh && rm -rf /tmp/scripts - -# Build final image from base. -FROM base as final -ARG BUILD_USER=onnxruntimedev -ARG BUILD_UID=1000 -RUN adduser --uid $BUILD_UID $BUILD_USER -WORKDIR /home/$BUILD_USER -USER $BUILD_USER diff --git a/tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu b/tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu index 9b392fa0e3a68..8a84b9b940306 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu +++ b/tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu @@ -5,8 +5,8 @@ # Dockerfile to run ONNXRuntime with TensorRT integration # Build base image with required system packages -ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 -ARG TRT_VERSION=10.9.0.34-1+cuda11.8 +ARG BASEIMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu20.04 +ARG TRT_VERSION=10.9.0.34-1+cuda12.8 ARG LD_LIBRARY_PATH_ARG=/usr/local/lib64:/usr/local/cuda/lib64 FROM $BASEIMAGE AS base ARG TRT_VERSION diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_tensorrt10 b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_tensorrt10 deleted file mode 100644 index f68f488a9d8b8..0000000000000 --- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda11_tensorrt10 +++ /dev/null @@ -1,109 +0,0 @@ -# -------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------- -# Dockerfile to run ONNXRuntime with TensorRT integration - -# Build base image with required system packages -FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS base - -# The local directory into which to build and install CMAKE -ARG ONNXRUNTIME_LOCAL_CODE_DIR=/code - -ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${ONNXRUNTIME_LOCAL_CODE_DIR}/cmake-3.31.5-linux-x86_64/bin:/opt/miniconda/bin:${PATH} -ENV DEBIAN_FRONTEND=noninteractive - -RUN apt-get update &&\ - apt-get install -y sudo git bash unattended-upgrades wget -RUN unattended-upgrade - -# Install python3 -RUN apt-get install -y --no-install-recommends \ - python3 \ - python3-pip \ - python3-dev \ - python3-wheel &&\ - cd /usr/local/bin &&\ - ln -s /usr/bin/python3 python &&\ - ln -s /usr/bin/pip3 pip; - -RUN pip install --upgrade pip -RUN pip install psutil setuptools>=68.2.2 - -# Install TensorRT -RUN TRT_VERSION="10.9.0.34-1+cuda11.8" &&\ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub &&\ - apt-get update &&\ - apt-get install -y \ - libnvinfer-dev=${TRT_VERSION} \ - libnvinfer-dispatch-dev=${TRT_VERSION} \ - libnvinfer-dispatch10=${TRT_VERSION} \ - libnvinfer-headers-dev=${TRT_VERSION} \ - libnvinfer-headers-plugin-dev=${TRT_VERSION} \ - libnvinfer-lean-dev=${TRT_VERSION} \ - libnvinfer-lean10=${TRT_VERSION} \ - libnvinfer-plugin-dev=${TRT_VERSION} \ - libnvinfer-plugin10=${TRT_VERSION} \ - libnvinfer-vc-plugin-dev=${TRT_VERSION} \ - libnvinfer-vc-plugin10=${TRT_VERSION} \ - libnvinfer10=${TRT_VERSION} \ - libnvonnxparsers-dev=${TRT_VERSION} \ - libnvonnxparsers10=${TRT_VERSION} \ - tensorrt-dev=${TRT_VERSION} \ - libnvinfer-bin=${TRT_VERSION} - -# Compile trtexec if not installed -RUN if [ ! -d /usr/src/tensorrt/bin ] || [ ! -f /usr/src/tensorrt/bin/trtexec ]; then \ - cd /usr/src/tensorrt/samples/trtexec && make; \ - fi - -# Install Valgrind -RUN apt-get install -y valgrind - -# Build final image from base. Builds ORT. -FROM base AS final -ARG BUILD_USER=onnxruntimedev -ARG BUILD_UID=1000 -RUN adduser --gecos 'onnxruntime Build User' --disabled-password $BUILD_USER --uid $BUILD_UID -USER $BUILD_USER - -# ONNX Runtime arguments - -# URL to the github repo from which to clone ORT. -ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime - -# The local directory into which to clone ORT. -ARG ONNXRUNTIME_LOCAL_CODE_DIR=/code - -# The git branch of ORT to checkout and build. -ARG ONNXRUNTIME_BRANCH=main - -# Optional. The specific commit to pull and build from. If not set, the latest commit is used. -ARG ONNXRUNTIME_COMMIT_ID - -# The supported CUDA architecture -ARG CMAKE_CUDA_ARCHITECTURES=75 - -WORKDIR ${ONNXRUNTIME_LOCAL_CODE_DIR} - -# Clone ORT repository with branch -RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXRUNTIME_REPO} onnxruntime &&\ - /bin/sh onnxruntime/dockerfiles/scripts/install_common_deps.sh - -WORKDIR ${ONNXRUNTIME_LOCAL_CODE_DIR}/onnxruntime - -# Reset to a specific commit if specified by build args. -RUN if [ -z "$ONNXRUNTIME_COMMIT_ID" ] ; then echo "Building branch ${ONNXRUNTIME_BRANCH}" ;\ - else echo "Building branch ${ONNXRUNTIME_BRANCH} @ commit ${ONNXRUNTIME_COMMIT_ID}" &&\ - git reset --hard ${ONNXRUNTIME_COMMIT_ID} && git submodule update --recursive ; fi - -# Build ORT -ENV CUDA_MODULE_LOADING="LAZY" -ARG PARSER_CONFIG="" -RUN /bin/sh build.sh ${PARSER_CONFIG} --parallel --build_shared_lib --cuda_home /usr/local/cuda --cudnn_home /usr/lib/x86_64-linux-gnu/ --use_tensorrt --tensorrt_home /usr/lib/x86_64-linux-gnu/ --config Release --build_wheel --skip_tests --skip_submodule_sync --cmake_extra_defines '"CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'"' - -# Switch to root to continue following steps of CI -USER root - -# Intall ORT wheel -RUN pip install ${ONNXRUNTIME_LOCAL_CODE_DIR}/onnxruntime/build/Linux/Release/dist/*.whl diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda11/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda11/Dockerfile deleted file mode 100644 index 24287fd34d3ea..0000000000000 --- a/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda11/Dockerfile +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - -# This file is used by Zip-Nuget Packaging NoContribOps Pipeline,Zip-Nuget-Java Packaging Pipeline -FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11_dotnet:20250124.1 - -ARG TRT_VERSION -#Install TensorRT only if TRT_VERSION is not empty -RUN if [ -n "$TRT_VERSION" ]; then \ - echo "TRT_VERSION is $TRT_VERSION" && \ - dnf -y install \ - libnvinfer10-${TRT_VERSION} \ - libnvinfer-headers-devel-${TRT_VERSION} \ - libnvinfer-devel-${TRT_VERSION} \ - libnvinfer-lean10-${TRT_VERSION} \ - libnvonnxparsers10-${TRT_VERSION} \ - libnvonnxparsers-devel-${TRT_VERSION} \ - libnvinfer-dispatch10-${TRT_VERSION} \ - libnvinfer-plugin10-${TRT_VERSION} \ - libnvinfer-vc-plugin10-${TRT_VERSION} \ - libnvinfer-bin-${TRT_VERSION} \ - libnvinfer-plugin10-${TRT_VERSION} \ - libnvinfer-plugin-devel-${TRT_VERSION} \ - libnvinfer-vc-plugin-devel-${TRT_VERSION} \ - libnvinfer-lean-devel-${TRT_VERSION} \ - libnvinfer-dispatch-devel-${TRT_VERSION} \ - libnvinfer-headers-plugin-devel-${TRT_VERSION} && \ - dnf clean dbcache ; \ -else \ - echo "TRT_VERSION is none skipping Tensor RT Installation" ; \ -fi - -ENV PATH=/usr/lib/jvm/msopenjdk-17/bin:$PATH -ENV LANG=en_US.UTF-8 -ENV LC_ALL=en_US.UTF-8 -ENV JAVA_HOME=/usr/lib/jvm/msopenjdk-17 -ENV CUDAHOSTCXX=/opt/rh/gcc-toolset-11/root/usr/bin/g++ -ADD scripts /tmp/scripts -RUN cd /tmp/scripts && /tmp/scripts/install_deps.sh && rm -rf /tmp/scripts - -ARG BUILD_UID=1001 -ARG BUILD_USER=onnxruntimedev -RUN adduser --uid $BUILD_UID $BUILD_USER -WORKDIR /home/$BUILD_USER -USER $BUILD_USER diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda11/scripts/install_deps.sh b/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda11/scripts/install_deps.sh deleted file mode 100755 index e98429946f4b3..0000000000000 --- a/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda11/scripts/install_deps.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash -set -e -x - -# Download a file from internet -function GetFile { - local uri=$1 - local path=$2 - local force=${3:-false} - local download_retries=${4:-5} - local retry_wait_time_seconds=${5:-30} - - if [[ -f $path ]]; then - if [[ $force = false ]]; then - echo "File '$path' already exists. Skipping download" - return 0 - else - rm -rf $path - fi - fi - - if [[ -f $uri ]]; then - echo "'$uri' is a file path, copying file to '$path'" - cp $uri $path - return $? - fi - - echo "Downloading $uri" - # Use aria2c if available, otherwise use curl - if command -v aria2c > /dev/null; then - aria2c -q -d $(dirname $path) -o $(basename $path) "$uri" - else - curl "$uri" -sSL --retry $download_retries --retry-delay $retry_wait_time_seconds --create-dirs -o "$path" --fail - fi - - return $? -} -mkdir -p /tmp/src - -cd /tmp/src - - -echo "Installing Node.js" -CPU_ARCH=`uname -m` -if [[ "$CPU_ARCH" = "x86_64" ]]; then - NODEJS_ARCH=x64 -elif [[ "$CPU_ARCH" = "aarch64" ]]; then - NODEJS_ARCH=arm64 -else - NODEJS_ARCH=$CPU_ARCH -fi -# The EOL for nodejs v18.17.1 LTS is April 2025 -GetFile https://nodejs.org/dist/v18.17.1/node-v18.17.1-linux-${NODEJS_ARCH}.tar.gz /tmp/src/node-v18.17.1-linux-${NODEJS_ARCH}.tar.gz -tar --strip 1 -xf /tmp/src/node-v18.17.1-linux-${NODEJS_ARCH}.tar.gz -C /usr - -cd / -rm -rf /tmp/src diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile deleted file mode 100644 index d1df74e2a4506..0000000000000 --- a/tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile +++ /dev/null @@ -1,45 +0,0 @@ -# The default ARGs are for cuda 11.8 with cudnn8, TensorRT is optional -# Please overwrite BASEIMAGE, TRT_VERSION and other arguments with -# --docker-build-args ' --build-arg BASEIMAGE=other_base_image --build-arg TRT_VERSION=other_trt_version etc...' -# for other cuda version and TRT version -ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8 - -FROM $BASEIMAGE -ARG TRT_VERSION=10.9.0.34-1.cuda11.8 - -#Install TensorRT only if TRT_VERSION is not empty -RUN if [ -n "${TRT_VERSION}" ]; then \ - echo "TRT_VERSION is $TRT_VERSION" && \ - dnf -y install \ - libnvinfer10-${TRT_VERSION} \ - libnvinfer-headers-devel-${TRT_VERSION} \ - libnvinfer-devel-${TRT_VERSION} \ - libnvinfer-lean10-${TRT_VERSION} \ - libnvonnxparsers10-${TRT_VERSION} \ - libnvonnxparsers-devel-${TRT_VERSION} \ - libnvinfer-dispatch10-${TRT_VERSION} \ - libnvinfer-plugin10-${TRT_VERSION} \ - libnvinfer-vc-plugin10-${TRT_VERSION} \ - libnvinfer-bin-${TRT_VERSION} \ - libnvinfer-plugin10-${TRT_VERSION} \ - libnvinfer-plugin-devel-${TRT_VERSION} \ - libnvinfer-vc-plugin-devel-${TRT_VERSION} \ - libnvinfer-lean-devel-${TRT_VERSION} \ - libnvinfer-dispatch-devel-${TRT_VERSION} \ - libnvinfer-headers-plugin-devel-${TRT_VERSION} && \ - dnf clean dbcache ; \ -else \ - echo "TRT_VERSION is x${TRT_VERSION} skipping Tensor RT Installation" ; \ -fi - -ENV PATH=/usr/local/cuda/bin:$PATH -ENV CUDA_MODULE_LOADING="LAZY" - -ADD scripts /tmp/scripts -RUN cd /tmp/scripts && /tmp/scripts/install_centos.sh && rm -rf /tmp/scripts - -ARG BUILD_UID=1001 -ARG BUILD_USER=onnxruntimedev -RUN adduser --uid $BUILD_UID $BUILD_USER -WORKDIR /home/$BUILD_USER -USER $BUILD_USER diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/scripts/install_centos.sh b/tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/scripts/install_centos.sh deleted file mode 100755 index d0b58ed28b8c9..0000000000000 --- a/tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/scripts/install_centos.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -set -e - -os_major_version=$(tr -dc '0-9.' < /etc/redhat-release |cut -d \. -f1) - -echo "installing for os major version : $os_major_version" -dnf install -y glibc-langpack-\* -yum install -y which redhat-lsb-core expat-devel tar unzip zlib-devel make bzip2 bzip2-devel perl-IPC-Cmd openssl-devel wget - -echo "installing rapidjson for AzureEP" -wget https://github.com/Tencent/rapidjson/archive/refs/tags/v1.1.0.tar.gz -tar zxvf v1.1.0.tar.gz -cd rapidjson-1.1.0 -mkdir build -cd build -cmake .. -cmake --install . -cd ../..