diff --git a/docs/source/installation/linux.md b/docs/source/installation/linux.md index 9bccba451c7..ab471e8c1db 100644 --- a/docs/source/installation/linux.md +++ b/docs/source/installation/linux.md @@ -9,14 +9,22 @@ Before the pre-built Python wheel can be installed via `pip`, a few prerequisites must be put into place: + Install CUDA Toolkit following the [CUDA Installation Guide for Linux](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/) and + make sure `CUDA_HOME` environment variable is properly set. + ```bash - # Optional step: Only required for Blackwell and Grace Hopper + # Optional step: Only required for NVIDIA Blackwell GPUs and SBSA platform pip3 install torch==2.7.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128 + # Optional step: Workaround for deep_gemm installation failure on SBSA platform + # The actual deep_gemm package and version should be obtained from the requirements.txt file. + pip3 install 'deep_gemm @ git+https://github.com/zongfeijing/DeepGEMM.git@a9d538ef4dff0326fe521c6ca0bfde115703b56a' \ + --extra-index-url https://download.pytorch.org/whl/cu128 + sudo apt-get -y install libopenmpi-dev ``` - PyTorch CUDA 12.8 package is required for supporting NVIDIA Blackwell and Grace Hopper GPUs. On prior GPUs, this extra installation is not required. + PyTorch CUDA 12.8 package is required for supporting NVIDIA Blackwell GPUs and SBSA platform. On prior GPUs or Linux x86_64 platform, this extra installation is not required. ```{tip} Instead of manually installing the preqrequisites as described @@ -55,16 +63,3 @@ There are some known limitations when you pip install pre-built TensorRT-LLM whe when OMPI was not configured --with-slurm and we weren't able to discover a SLURM installation in the usual places. ``` - -2. CUDA Toolkit - - `pip install tensorrt-llm` won't install CUDA toolkit in your system, and the CUDA Toolkit is not required if want to just deploy a TensorRT-LLM engine. - TensorRT-LLM uses the [ModelOpt](https://nvidia.github.io/TensorRT-Model-Optimizer/) to quantize a model, while the ModelOpt requires CUDA toolkit to jit compile certain kernels which is not included in the pytorch to do quantization effectively. - Please install CUDA toolkit when you see the following message when running ModelOpt quantization. - - ``` - /usr/local/lib/python3.10/dist-packages/modelopt/torch/utils/cpp_extension.py:65: - UserWarning: CUDA_HOME environment variable is not set. Please set it to your CUDA install root. - Unable to load extension modelopt_cuda_ext and falling back to CPU version. - ``` - The installation of CUDA toolkit can be found in [CUDA Toolkit Documentation](https://docs.nvidia.com/cuda/). diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index 4e795daa1a9..4c68e4796c8 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -2091,19 +2091,31 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 uninstall -y tensorrt") if (values[5] != DLFW_IMAGE) { def ubuntu_version = key.contains("UB2404") ? "ubuntu2404" : "ubuntu2204" - def platform = values[2] == X86_64_TRIPLE ? "x86_64" : "sbsa" + def platform = cpu_arch == X86_64_TRIPLE ? "x86_64" : "sbsa" trtllm_utils.llmExecStepWithRetry(pipeline, script: "wget https://developer.download.nvidia.com/compute/cuda/repos/${ubuntu_version}/${platform}/cuda-keyring_1.1-1_all.deb") trtllm_utils.llmExecStepWithRetry(pipeline, script: "dpkg -i cuda-keyring_1.1-1_all.deb") trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update") trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get -y install cuda-toolkit-12-9") } - // Extra PyTorch CUDA 12.8 install + // Extra PyTorch CUDA 12.8 install for SBSA platform and Blackwell GPUs bare-metal environments if (values[6]) { echo "###### Extra PyTorch CUDA 12.8 install Start ######" trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install torch==2.7.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128") } + // Workaround for https://nvbugs/5433581 where deep_gemm installation fails on SBSA platform + if (cpu_arch == AARCH64_TRIPLE) { + echo "###### Workaround for https://nvbugs/5433581 Start ######" + def deepGemmLine = readFile("${LLM_ROOT}/requirements.txt").readLines().find { it.trim().startsWith('deep_gemm') } + if (deepGemmLine) { + trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install '${deepGemmLine.trim()}' --extra-index-url https://download.pytorch.org/whl/cu128") + } + else { + echo "deep_gemm package not found in requirements.txt" + } + } + def libEnv = [] if (env.alternativeTRT) { stage("Replace TensorRT") {