From e17c5c13f2b6f8abc8450dac30ad3cd6f942012b Mon Sep 17 00:00:00 2001 From: FabioLuporini Date: Thu, 13 Apr 2023 10:46:43 +0000 Subject: [PATCH 1/6] install: Overhaul Dockerfile.amd --- docker/Dockerfile.amd | 169 ++++++++++++++++++++++++++++-------------- 1 file changed, 114 insertions(+), 55 deletions(-) diff --git a/docker/Dockerfile.amd b/docker/Dockerfile.amd index c057e1acaa..aa69a6f188 100644 --- a/docker/Dockerfile.amd +++ b/docker/Dockerfile.amd @@ -2,72 +2,129 @@ # This Dockerfile contains AMD compilers ############################################################## +ARG IMAGE="rocm/dev-ubuntu-22.04:5.4.2-complete" ARG arch="aomp" -ARG ROCM_VERSION=5.4.2 -FROM rocm/dev-ubuntu-22.04:${ROCM_VERSION}-complete as sdk-base +FROM ${IMAGE} as sdk-base + +ARG UCX_BRANCH="v1.13.1" +ARG OMPI_BRANCH="v4.1.4" + +# Update and Install basic Linux development tools +RUN rm /etc/apt/sources.list.d/* \ + && apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ + git \ + ssh \ + make \ + vim \ + nano \ + libtinfo* \ + initramfs-tools \ + libelf-dev \ + numactl \ + wget \ + tmux \ + build-essential \ + autoconf \ + automake \ + libtool \ + pkg-config \ + libnuma* \ + gfortran \ + flex \ + hwloc \ + && apt-get clean \ + && ln -s /opt/rocm/llvm/bin/offload-arch /opt/rocm/bin/offload-arch + +# Requires cmake > 3.22 +RUN mkdir -p /opt/cmake \ + && wget --no-check-certificate --quiet -O - https://cmake.org/files/v3.22/cmake-3.22.2-linux-x86_64.tar.gz | tar --strip-components=1 -xz -C /opt/cmake + +ENV ROCM_HOME=/opt/rocm \ + UCX_HOME=/opt/ucx \ + OMPI_HOME=/opt/ompi + +# Adding rocm/cmake to the Environment +ENV PATH=$ROCM_HOME/bin:$ROCM_HOME/profiler/bin:$ROCM_HOME/opencl/bin:/opt/cmake/bin:$PATH \ + LD_LIBRARY_PATH=$ROCM_HOME/lib:$ROCM_HOME/lib64:$ROCM_HOME/llvm/lib:$LD_LIBRARY_PATH \ + LIBRARY_PATH=$ROCM_HOME/lib:$ROCM_HOME/lib64:$LIBRARY_PATH \ + C_INCLUDE_PATH=$ROCM_HOME/include:$C_INCLUDE_PATH \ + CPLUS_INCLUDE_PATH=$ROCM_HOME/include:$CPLUS_INCLUDE_PATH \ + CPATH=$ROCM_HOME/include:$CPATH \ + INCLUDE=$ROCM_HOME/include:$INCLUDE + +WORKDIR /tmp + +# Install UCX +RUN cd /tmp/ \ + && git clone https://github.com/openucx/ucx.git -b ${UCX_BRANCH} \ + && cd ucx \ + && ./autogen.sh \ + && mkdir build \ + && cd build \ + && ../contrib/configure-release --prefix=$UCX_HOME \ + --with-rocm=$ROCM_HOME \ + --without-knem \ + --without-xpmem \ + --without-cuda \ + --enable-optimizations \ + --disable-logging \ + --disable-debug \ + --disable-examples \ + && make -j $(nproc) \ + && make install + +# Install OpenMPI +RUN cd /tmp \ + && git clone --recursive https://github.com/open-mpi/ompi.git -b ${OMPI_BRANCH} \ + && cd ompi \ + && ./autogen.pl \ + && mkdir build \ + && cd build \ + && ../configure --prefix=$OMPI_HOME --with-ucx=$UCX_HOME \ + --enable-mca-no-build=btl-uct \ + --without-verbs \ + --with-pmix \ + --enable-mpi \ + --enable-mpi-fortran=yes \ + --disable-debug \ + && make -j $(nproc) \ + && make install + +# Adding OpenMPI and UCX to Environment +ENV PATH=$OMPI_HOME/bin:$UCX_HOME/bin:$PATH \ + LD_LIBRARY_PATH=$OMPI_HOME/lib:$UCX_HOME/lib:$LD_LIBRARY_PATH \ + LIBRARY_PATH=$OMPI_HOME/lib:$UCX_HOME/lib:$LIBRARY_PATH \ + C_INCLUDE_PATH=$OMPI_HOME/include:$UCX_HOME/include:$C_INCLUDE_PATH \ + CPLUS_INCLUDE_PATH=$OMPI_HOME/include:$UCX_HOME/include:$CPLUS_INCLUDE_PATH \ + CPATH=$OMPI_HOME/include:$UCX_HOME/include:$CPATH \ + INCLUDE=$OMPI_HOME/include:$UCX_HOME/include:$INCLUDE \ + PKG_CONFIG_PATH=$OMPI_HOME/lib/pkgconfig:$UCX_HOME/lib/pkgconfig:$PKG_CONFIG_PATH + +# Adding environment variable for Running as ROOT and set MCA pml to UCX +ENV OMPI_ALLOW_RUN_AS_ROOT=1 \ + OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 \ + OMPI_MCA_pml=ucx -ENV DEBIAN_FRONTEND noninteractive -ARG ROCM_VERSION -ENV rocm=/opt/rocm-${ROCM_VERSION} -ENV AOMP=/opt/rocm-${ROCM_VERSION}/llvm -ENV ROCM_HOME /opt/rocm -ENV HIP_HOME $ROCM_HOME/hip +############################################################## +# Additional Devito requirements +############################################################## -# Some utils needed +# Additional utils and python3 RUN apt-get update && \ - apt-get install -y wget git autoconf dh-autoreconf flex \ - python3-venv python3-dev \ - vim libnuma-dev tmux numactl - -# Install tmpi -RUN curl https://raw.githubusercontent.com/Azrael3000/tmpi/master/tmpi -o /usr/local/bin/tmpi - -# ROCm-aware MPI. Taken from: -# https://github.com/ROCmSoftwarePlatform/rocHPCG/blob/develop/install.sh -# UCX. Clang does not support some of the flags such as '-dynamic-list-data' so build UCX with gcc -RUN mkdir -p /deps && mkdir -p /opt/ucx && cd /deps && \ - git clone --branch v1.13.0 https://github.com/openucx/ucx.git ucx && \ - cd ucx && ./autogen.sh && \ - mkdir build && cd build && \ - ../contrib/configure-release --prefix=/opt/ucx/ --with-rocm=${with_rocm} \ - --without-cuda --without-java \ - --disable-params-check --disable-logging --disable-assertions --disable-debug \ - --enable-optimizations && \ - make -j $(( $(lscpu | awk '/^Socket\(s\)/{ print $2 }') * $(lscpu | awk '/^Core\(s\) per socket/{ print $4 }') )) && \ - make install && \ - rm -rf /deps/ucx - -# OpenMPI -# Build OpenMPI with `amdclang` so that `mpicc` links to the correct compiler -RUN mkdir -p /opt/openmpi && cd /deps && \ - git clone --recursive --branch v4.1.4 https://github.com/open-mpi/ompi.git openmpi && \ - cd openmpi && ./autogen.pl && \ - mkdir build && cd build && \ - ../configure CC=$AOMP/bin/amdclang CXX=$AOMP/bin/amdclang++ FC=$AOMP/bin/amdflang \ - --prefix=/opt/openmpi/ --with-ucx=/opt/ucx \ - --enable-mca-no-build=btl-uct --enable-mpi1-compatibility && \ - make -j $(( $(lscpu | awk '/^Socket\(s\)/{ print $2 }') * $(lscpu | awk '/^Core\(s\) per socket/{ print $4 }') )) && \ - make install && \ - rm -rf /deps/openmpi - -# Set OpenMPI path -ENV PATH=${PATH}:/opt/openmpi/bin:$AOMP/bin -ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/openmpi/lib:$AOMP/lib -ENV OMPI_CC=$AOMP/bin/amdclang -ENV OMPI_CXX=$AOMP/bin/amdclang++ -ENV OMPI_F90=$AOMP/bin/flan - -# Set mpiexec flags for gpu aware mpi -RUN echo "--mca btl '^openib' -x UCX_TLS=sm,self,rocm_copy,rocm_ipc --mca pml ucx" > /opt/openmpi/etc/mpiexec.conf + apt-get install -y dh-autoreconf python3-venv python3-dev python3-pip # Build mpi4py against amdclang -RUN python3 -m venv /venv && \ +RUN which mpicc && \ + python3 -m venv /venv && \ /venv/bin/pip install --no-cache-dir --upgrade pip && \ - CC=$AOMP/bin/amdclang CXX=$AOMP/bin/amdclang++ /venv/bin/pip install --no-cache-dir mpi4py && \ + /venv/bin/pip install --no-cache-dir mpi4py && \ rm -rf ~/.cache/pip + ######################################################################## # AOMP for GPUs (OpenMP offloading) ######################################################################## @@ -79,6 +136,7 @@ ENV DEVITO_ARCH="aomp" ENV DEVITO_PLATFORM="amdgpuX" ENV DEVITO_LANGUAGE="openmp" + ######################################################################## # HIPCC for GPUs (HIP) ######################################################################## @@ -90,6 +148,7 @@ ENV DEVITO_ARCH="hip" ENV DEVITO_PLATFORM="amdgpuX" ENV DEVITO_LANGUAGE="hip" + ######################################################################## # Final image ######################################################################## From 8f3e8f99de66593e592e02fc2a2cb94b2cc75c18 Mon Sep 17 00:00:00 2001 From: FabioLuporini Date: Thu, 13 Apr 2023 15:44:04 +0000 Subject: [PATCH 2/6] arch: Update AOMPCompiler --- devito/arch/compiler.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py index 26c412c2b7..36c15388e8 100644 --- a/devito/arch/compiler.py +++ b/devito/arch/compiler.py @@ -487,24 +487,24 @@ def __init__(self, *args, **kwargs): if not configuration['safe-math']: self.cflags.append('-ffast-math') + language = kwargs.pop('language', configuration['language']) platform = kwargs.pop('platform', configuration['platform']) - if platform in [NVIDIAX, AMDGPUX]: + if platform is NVIDIAX: self.cflags.remove('-std=c99') + elif platform is AMDGPUX: + self.cflags.remove('-std=c99') + # Add flags for OpenMP offloading + if language in ['C', 'openmp']: + self.ldflags += ['-target', 'x86_64-pc-linux-gnu'] + self.ldflags += ['-fopenmp'] + self.ldflags += ['--offload-arch=%s' % platform.march] elif platform in [POWER8, POWER9]: # It doesn't make much sense to use AOMP on Power, but it should work self.cflags.append('-mcpu=native') else: self.cflags.append('-march=native') - # Generic amd flags - self.ldflags.extend(['-fopenmp', '-target', 'x86_64-pc-linux-gnu']) - # amdclang gpu flags, used to be part of aompcc - if platform is AMDGPUX: - self.ldflags.extend(['-fopenmp-targets=amdgcn-amd-amdhsa', - '-Xopenmp-target=amdgcn-amd-amdhsa']) - self.ldflags.append('-march=%s' % platform.march) - def __lookup_cmds__(self): self.CC = 'amdclang' self.CXX = 'amdclang++' From e116bef22679da219a80e3f1f781e93884ac5052 Mon Sep 17 00:00:00 2001 From: mloubout Date: Mon, 17 Apr 2023 09:12:10 -0400 Subject: [PATCH 3/6] arch: add few missing configs to new amd docker and arch detection --- devito/arch/archinfo.py | 6 +++++- docker/Dockerfile.amd | 31 ++++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/devito/arch/archinfo.py b/devito/arch/archinfo.py index 57212dc1c6..bcff8eb365 100644 --- a/devito/arch/archinfo.py +++ b/devito/arch/archinfo.py @@ -701,8 +701,12 @@ def march(cls): # mygpu will only print values accepted by cuda clang in # the clang argument --cuda-gpu-arch. try: - p1 = Popen(['mygpu', '-d', 'gfx900'], stdout=PIPE, stderr=PIPE) + p1 = Popen(['offload-arch'], stdout=PIPE, stderr=PIPE) except OSError: + try: + p1 = Popen(['mygpu', '-d', fallback], stdout=PIPE, stderr=PIPE) + except OSError: + pass return fallback output, _ = p1.communicate() diff --git a/docker/Dockerfile.amd b/docker/Dockerfile.amd index aa69a6f188..858cf5fe92 100644 --- a/docker/Dockerfile.amd +++ b/docker/Dockerfile.amd @@ -2,10 +2,10 @@ # This Dockerfile contains AMD compilers ############################################################## -ARG IMAGE="rocm/dev-ubuntu-22.04:5.4.2-complete" +ARG ROCM_VERSION=5.4.2 ARG arch="aomp" -FROM ${IMAGE} as sdk-base +FROM rocm/dev-ubuntu-22.04:${ROCM_VERSION}-complete as sdk-base ARG UCX_BRANCH="v1.13.1" ARG OMPI_BRANCH="v4.1.4" @@ -35,8 +35,10 @@ RUN rm /etc/apt/sources.list.d/* \ gfortran \ flex \ hwloc \ - && apt-get clean \ - && ln -s /opt/rocm/llvm/bin/offload-arch /opt/rocm/bin/offload-arch + && apt-get clean + +# Until rocm base has it fixed +RUN ln -s /opt/rocm/llvm/bin/offload-arch /opt/rocm/bin/offload-arch # Requires cmake > 3.22 RUN mkdir -p /opt/cmake \ @@ -46,6 +48,9 @@ ENV ROCM_HOME=/opt/rocm \ UCX_HOME=/opt/ucx \ OMPI_HOME=/opt/ompi +# Install tmpi +RUN curl https://raw.githubusercontent.com/Azrael3000/tmpi/master/tmpi -o /usr/local/bin/tmpi + # Adding rocm/cmake to the Environment ENV PATH=$ROCM_HOME/bin:$ROCM_HOME/profiler/bin:$ROCM_HOME/opencl/bin:/opt/cmake/bin:$PATH \ LD_LIBRARY_PATH=$ROCM_HOME/lib:$ROCM_HOME/lib64:$ROCM_HOME/llvm/lib:$LD_LIBRARY_PATH \ @@ -73,8 +78,8 @@ RUN cd /tmp/ \ --disable-logging \ --disable-debug \ --disable-examples \ - && make -j $(nproc) \ - && make install + && make -j $(( $(lscpu | awk '/^Socket\(s\)/{ print $2 }') * $(lscpu | awk '/^Core\(s\) per socket/{ print $4 }') )) \ + && make install \ # Install OpenMPI RUN cd /tmp \ @@ -90,9 +95,13 @@ RUN cd /tmp \ --enable-mpi \ --enable-mpi-fortran=yes \ --disable-debug \ - && make -j $(nproc) \ + && make -j $(( $(lscpu | awk '/^Socket\(s\)/{ print $2 }') * $(lscpu | awk '/^Core\(s\) per socket/{ print $4 }') )) \ && make install +# Cleanup +RUN rm -rf /tmp/ompi && rm -rf /tmp/ucx + + # Adding OpenMPI and UCX to Environment ENV PATH=$OMPI_HOME/bin:$UCX_HOME/bin:$PATH \ LD_LIBRARY_PATH=$OMPI_HOME/lib:$UCX_HOME/lib:$LD_LIBRARY_PATH \ @@ -131,6 +140,10 @@ RUN which mpicc && \ # This will only trigger if arch is aomp since the final stage depends on it FROM sdk-base as aomp +# MPI env +ENV OMPI_CC="amdclang" +ENV OMPI_CXX="amdclang++" + # Devito env ENV DEVITO_ARCH="aomp" ENV DEVITO_PLATFORM="amdgpuX" @@ -143,6 +156,10 @@ ENV DEVITO_LANGUAGE="openmp" # This will only trigger if arch is hip since the final stage depends on it FROM sdk-base as hip +# MPI env +ENV OMPI_CC="hipcc" +ENV OMPI_CXX="hipcc" + # Devito env ENV DEVITO_ARCH="hip" ENV DEVITO_PLATFORM="amdgpuX" From 209f4c63a669167ba5e32e4617ec96074bd9af3b Mon Sep 17 00:00:00 2001 From: mloubout Date: Tue, 18 Apr 2023 08:46:42 -0400 Subject: [PATCH 4/6] docker: switch to predefined nproc --- docker/Dockerfile.amd | 12 +++++------- docker/Dockerfile.nvidia | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/docker/Dockerfile.amd b/docker/Dockerfile.amd index 858cf5fe92..1a82dde980 100644 --- a/docker/Dockerfile.amd +++ b/docker/Dockerfile.amd @@ -1,5 +1,6 @@ ############################################################## # This Dockerfile contains AMD compilers +# Based on https://github.com/amd/InfinityHub-CI/tree/main/base-gpu-mpi-rocm-docker ############################################################## ARG ROCM_VERSION=5.4.2 @@ -78,8 +79,8 @@ RUN cd /tmp/ \ --disable-logging \ --disable-debug \ --disable-examples \ - && make -j $(( $(lscpu | awk '/^Socket\(s\)/{ print $2 }') * $(lscpu | awk '/^Core\(s\) per socket/{ print $4 }') )) \ - && make install \ + && make -j ${nproc} \ + && make install # Install OpenMPI RUN cd /tmp \ @@ -95,7 +96,7 @@ RUN cd /tmp \ --enable-mpi \ --enable-mpi-fortran=yes \ --disable-debug \ - && make -j $(( $(lscpu | awk '/^Socket\(s\)/{ print $2 }') * $(lscpu | awk '/^Core\(s\) per socket/{ print $4 }') )) \ + && make -j ${nproc} \ && make install # Cleanup @@ -117,7 +118,6 @@ ENV OMPI_ALLOW_RUN_AS_ROOT=1 \ OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 \ OMPI_MCA_pml=ucx - ############################################################## # Additional Devito requirements ############################################################## @@ -127,13 +127,11 @@ RUN apt-get update && \ apt-get install -y dh-autoreconf python3-venv python3-dev python3-pip # Build mpi4py against amdclang -RUN which mpicc && \ - python3 -m venv /venv && \ +RUN python3 -m venv /venv && \ /venv/bin/pip install --no-cache-dir --upgrade pip && \ /venv/bin/pip install --no-cache-dir mpi4py && \ rm -rf ~/.cache/pip - ######################################################################## # AOMP for GPUs (OpenMP offloading) ######################################################################## diff --git a/docker/Dockerfile.nvidia b/docker/Dockerfile.nvidia index 7cad2bad75..6b3857e277 100644 --- a/docker/Dockerfile.nvidia +++ b/docker/Dockerfile.nvidia @@ -185,7 +185,7 @@ RUN cd /llvm-project/build && \ -DCLANG_OPENMP_NVPTX_DEFAULT_ARCH=sm_86 \ -DLIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES=all \ -DCMAKE_INSTALL_PREFIX=/llvm && \ - make -j $(( $(lscpu | awk '/^Socket\(s\)/{ print $2 }') * $(lscpu | awk '/^Core\(s\) per socket/{ print $4 }') )) && \ + make -j ${nproc} && \ make install # Set path From 5e8bab992d502b87114f608f8b097761b8b42d1b Mon Sep 17 00:00:00 2001 From: mloubout Date: Wed, 19 Apr 2023 09:05:20 -0400 Subject: [PATCH 5/6] docker: add ucx configs to amd docker --- docker/Dockerfile.amd | 17 ++++++++++++++--- docker/Dockerfile.nvidia | 2 ++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile.amd b/docker/Dockerfile.amd index 1a82dde980..142f9c7213 100644 --- a/docker/Dockerfile.amd +++ b/docker/Dockerfile.amd @@ -113,10 +113,21 @@ ENV PATH=$OMPI_HOME/bin:$UCX_HOME/bin:$PATH \ INCLUDE=$OMPI_HOME/include:$UCX_HOME/include:$INCLUDE \ PKG_CONFIG_PATH=$OMPI_HOME/lib/pkgconfig:$UCX_HOME/lib/pkgconfig:$PKG_CONFIG_PATH + # Adding environment variable for Running as ROOT and set MCA pml to UCX -ENV OMPI_ALLOW_RUN_AS_ROOT=1 \ - OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 \ - OMPI_MCA_pml=ucx +ENV OMPI_ALLOW_RUN_AS_ROOT=1 +ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 +ENV OMPI_MCA_rmaps_base_oversubscribe=1 +ENV OMPI_MCA_btl_base_warn_component_unused=0 +ENV OMPI_MCA_hwloc_base_binding_policy="" +ENV OMPI_MCA_pml=ucx +# Add additional UCX environment variable +ENV UCX_MEMTYPE_CACHE=no +ENV UCX_NET_DEVICES=all +ENV UCX_SHM_DEVICES=all +ENV UCX_ACC_DEVICES=all +ENV UCX_RNDV_THRESH=0 +ENV UCX_RNDV_SCHEME=get_zcopy ############################################################## # Additional Devito requirements diff --git a/docker/Dockerfile.nvidia b/docker/Dockerfile.nvidia index 6b3857e277..87c83aee75 100644 --- a/docker/Dockerfile.nvidia +++ b/docker/Dockerfile.nvidia @@ -62,6 +62,8 @@ ENV UCX_MEMTYPE_CACHE=no ENV UCX_NET_DEVICES=all ENV UCX_SHM_DEVICES=all ENV UCX_ACC_DEVICES=all +ENV UCX_RNDV_THRESH=0 +ENV UCX_RNDV_SCHEME=get_zcopy ENV NCCL_UCX_RNDV_THRESH=0 ENV NCCL_UCX_RNDV_SCHEME=get_zcopy ENV NCCL_PLUGIN_P2P=ucx From c37fafb89b7151d5f612385e12ddfff3fb18e552 Mon Sep 17 00:00:00 2001 From: FabioLuporini Date: Thu, 20 Apr 2023 16:24:16 +0100 Subject: [PATCH 6/6] install: Drop envs causing device-mem leaks with ROCM+UCX --- docker/Dockerfile.amd | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/docker/Dockerfile.amd b/docker/Dockerfile.amd index 142f9c7213..4b392a09c1 100644 --- a/docker/Dockerfile.amd +++ b/docker/Dockerfile.amd @@ -114,20 +114,9 @@ ENV PATH=$OMPI_HOME/bin:$UCX_HOME/bin:$PATH \ PKG_CONFIG_PATH=$OMPI_HOME/lib/pkgconfig:$UCX_HOME/lib/pkgconfig:$PKG_CONFIG_PATH -# Adding environment variable for Running as ROOT and set MCA pml to UCX +# Adding environment variable for Running as ROOT ENV OMPI_ALLOW_RUN_AS_ROOT=1 ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 -ENV OMPI_MCA_rmaps_base_oversubscribe=1 -ENV OMPI_MCA_btl_base_warn_component_unused=0 -ENV OMPI_MCA_hwloc_base_binding_policy="" -ENV OMPI_MCA_pml=ucx -# Add additional UCX environment variable -ENV UCX_MEMTYPE_CACHE=no -ENV UCX_NET_DEVICES=all -ENV UCX_SHM_DEVICES=all -ENV UCX_ACC_DEVICES=all -ENV UCX_RNDV_THRESH=0 -ENV UCX_RNDV_SCHEME=get_zcopy ############################################################## # Additional Devito requirements