From e7cdc3a34d81784b81550723bd069680a3319d12 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 6 Jan 2026 10:05:28 -0600 Subject: [PATCH 1/2] ci-wheel: ensure libnccl is always installed --- ci-wheel.Dockerfile | 153 ++++++++++++++++++++++++++------------------ 1 file changed, 89 insertions(+), 64 deletions(-) diff --git a/ci-wheel.Dockerfile b/ci-wheel.Dockerfile index bb99b56..9e345bd 100644 --- a/ci-wheel.Dockerfile +++ b/ci-wheel.Dockerfile @@ -81,39 +81,52 @@ RUN < /dev/null 2>&1; then + echo "libnccl-dev not found, manually installing it" + LIBRARIES_TO_INSTALL+=(libnccl-dev) + else + echo "linccl-dev already installed" + fi + + apt-get install -y --no-install-recommends \ + "${LIBRARIES_TO_INSTALL[@]}" + update-ca-certificates add-apt-repository ppa:git-core/ppa add-apt-repository ppa:ubuntu-toolchain-r/test @@ -128,39 +141,51 @@ case "${LINUX_VER}" in dnf update -y dnf install -y epel-release dnf update -y - dnf install -y \ - autoconf \ - automake \ - bzip2 \ - bzip2-devel \ - ca-certificates \ - cmake \ - curl \ - dnf-plugins-core \ - gcc \ - git \ - jq \ - libcudnn8-devel \ - libcurl-devel \ - libffi-devel \ - libtool \ - ncurses-devel \ - numactl \ - numactl-devel \ - openslide-devel \ - openssh-clients \ - patch \ - protobuf-compiler \ - readline-devel \ - sqlite \ - sqlite-devel \ - unzip \ - wget \ - which \ - xz \ - xz-devel \ - zip \ + LIBRARIES_TO_INSTALL=( + autoconf + automake + bzip2 + bzip2-devel + ca-certificates + cmake + curl + dnf-plugins-core + gcc + git + jq + libcudnn8-devel + libcurl-devel + libffi-devel + libtool + ncurses-devel + numactl + numactl-devel + openslide-devel + openssh-clients + patch + protobuf-compiler + readline-devel + sqlite + sqlite-devel + unzip + wget + which + xz + xz-devel + zip zlib-devel + ) + + # only re-install NCCL if there wasn't one already installed in the image + if ! dnf info libnccl-devel > /dev/null 2>&1; then + echo "libnccl-devel not found, manually installing it" + LIBRARIES_TO_INSTALL+=(libnccl-devel) + else + echo "linccl-devel already installed" + fi + + dnf install -y \ + "${LIBRARIES_TO_INSTALL[@]}" update-ca-trust extract dnf config-manager --set-enabled powertools dnf install -y blas-devel lapack-devel From 7475257f61a81a7986db1b434a9021f13305c2d4 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 6 Jan 2026 11:32:02 -0600 Subject: [PATCH 2/2] fix check for installation --- ci-wheel.Dockerfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ci-wheel.Dockerfile b/ci-wheel.Dockerfile index 9e345bd..bb2734d 100644 --- a/ci-wheel.Dockerfile +++ b/ci-wheel.Dockerfile @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 ARG CUDA_VER=notset @@ -117,11 +117,11 @@ case "${LINUX_VER}" in ) # only re-install NCCL if there wasn't one already installed in the image - if ! apt show libnccl-dev > /dev/null 2>&1; then + if ! apt list --installed | grep -E 'libnccl\-dev' 2>&1 >/dev/null; then echo "libnccl-dev not found, manually installing it" LIBRARIES_TO_INSTALL+=(libnccl-dev) else - echo "linccl-dev already installed" + echo "libnccl-dev already installed" fi apt-get install -y --no-install-recommends \ @@ -177,11 +177,11 @@ case "${LINUX_VER}" in ) # only re-install NCCL if there wasn't one already installed in the image - if ! dnf info libnccl-devel > /dev/null 2>&1; then + if ! rpm --query --all | grep -E 'libnccl\-devel' > /dev/null 2>&1; then echo "libnccl-devel not found, manually installing it" LIBRARIES_TO_INSTALL+=(libnccl-devel) else - echo "linccl-devel already installed" + echo "libnccl-devel already installed" fi dnf install -y \