Skip to content

Commit

Permalink
Merge pull request #2104 from devitocodes/hip-mpi
Browse files Browse the repository at this point in the history
install: Overhaul Dockerfile.amd for MPI support
  • Loading branch information
mloubout authored Apr 21, 2023
2 parents 4afa2e1 + c37fafb commit 0c9987a
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 63 deletions.
6 changes: 5 additions & 1 deletion devito/arch/archinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,8 +701,12 @@ def march(cls):
# mygpu will only print values accepted by cuda clang in
# the clang argument --cuda-gpu-arch.
try:
p1 = Popen(['mygpu', '-d', 'gfx900'], stdout=PIPE, stderr=PIPE)
p1 = Popen(['offload-arch'], stdout=PIPE, stderr=PIPE)
except OSError:
try:
p1 = Popen(['mygpu', '-d', fallback], stdout=PIPE, stderr=PIPE)
except OSError:
pass
return fallback

output, _ = p1.communicate()
Expand Down
18 changes: 9 additions & 9 deletions devito/arch/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,24 +487,24 @@ def __init__(self, *args, **kwargs):
if not configuration['safe-math']:
self.cflags.append('-ffast-math')

language = kwargs.pop('language', configuration['language'])
platform = kwargs.pop('platform', configuration['platform'])

if platform in [NVIDIAX, AMDGPUX]:
if platform is NVIDIAX:
self.cflags.remove('-std=c99')
elif platform is AMDGPUX:
self.cflags.remove('-std=c99')
# Add flags for OpenMP offloading
if language in ['C', 'openmp']:
self.ldflags += ['-target', 'x86_64-pc-linux-gnu']
self.ldflags += ['-fopenmp']
self.ldflags += ['--offload-arch=%s' % platform.march]
elif platform in [POWER8, POWER9]:
# It doesn't make much sense to use AOMP on Power, but it should work
self.cflags.append('-mcpu=native')
else:
self.cflags.append('-march=native')

# Generic amd flags
self.ldflags.extend(['-fopenmp', '-target', 'x86_64-pc-linux-gnu'])
# amdclang gpu flags, used to be part of aompcc
if platform is AMDGPUX:
self.ldflags.extend(['-fopenmp-targets=amdgcn-amd-amdhsa',
'-Xopenmp-target=amdgcn-amd-amdhsa'])
self.ldflags.append('-march=%s' % platform.march)

def __lookup_cmds__(self):
self.CC = 'amdclang'
self.CXX = 'amdclang++'
Expand Down
178 changes: 126 additions & 52 deletions docker/Dockerfile.amd
Original file line number Diff line number Diff line change
@@ -1,71 +1,135 @@
##############################################################
# This Dockerfile contains AMD compilers
# Based on https://github.com/amd/InfinityHub-CI/tree/main/base-gpu-mpi-rocm-docker
##############################################################

ARG ROCM_VERSION=5.4.2
ARG arch="aomp"

ARG ROCM_VERSION=5.4.2
FROM rocm/dev-ubuntu-22.04:${ROCM_VERSION}-complete as sdk-base

ENV DEBIAN_FRONTEND noninteractive

ARG ROCM_VERSION
ENV rocm=/opt/rocm-${ROCM_VERSION}
ENV AOMP=/opt/rocm-${ROCM_VERSION}/llvm
ENV ROCM_HOME /opt/rocm
ENV HIP_HOME $ROCM_HOME/hip

# Some utils needed
RUN apt-get update && \
apt-get install -y wget git autoconf dh-autoreconf flex \
python3-venv python3-dev \
vim libnuma-dev tmux numactl
ARG UCX_BRANCH="v1.13.1"
ARG OMPI_BRANCH="v4.1.4"

# Update and Install basic Linux development tools
RUN rm /etc/apt/sources.list.d/* \
&& apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
ca-certificates \
git \
ssh \
make \
vim \
nano \
libtinfo* \
initramfs-tools \
libelf-dev \
numactl \
wget \
tmux \
build-essential \
autoconf \
automake \
libtool \
pkg-config \
libnuma* \
gfortran \
flex \
hwloc \
&& apt-get clean

# Until rocm base has it fixed
RUN ln -s /opt/rocm/llvm/bin/offload-arch /opt/rocm/bin/offload-arch

# Requires cmake > 3.22
RUN mkdir -p /opt/cmake \
&& wget --no-check-certificate --quiet -O - https://cmake.org/files/v3.22/cmake-3.22.2-linux-x86_64.tar.gz | tar --strip-components=1 -xz -C /opt/cmake

ENV ROCM_HOME=/opt/rocm \
UCX_HOME=/opt/ucx \
OMPI_HOME=/opt/ompi

# Install tmpi
RUN curl https://raw.githubusercontent.com/Azrael3000/tmpi/master/tmpi -o /usr/local/bin/tmpi

# ROCm-aware MPI. Taken from:
# https://github.com/ROCmSoftwarePlatform/rocHPCG/blob/develop/install.sh
# UCX. Clang does not support some of the flags such as '-dynamic-list-data' so build UCX with gcc
RUN mkdir -p /deps && mkdir -p /opt/ucx && cd /deps && \
git clone --branch v1.13.0 https://github.com/openucx/ucx.git ucx && \
cd ucx && ./autogen.sh && \
mkdir build && cd build && \
../contrib/configure-release --prefix=/opt/ucx/ --with-rocm=${with_rocm} \
--without-cuda --without-java \
--disable-params-check --disable-logging --disable-assertions --disable-debug \
--enable-optimizations && \
make -j $(( $(lscpu | awk '/^Socket\(s\)/{ print $2 }') * $(lscpu | awk '/^Core\(s\) per socket/{ print $4 }') )) && \
make install && \
rm -rf /deps/ucx

# OpenMPI
# Build OpenMPI with `amdclang` so that `mpicc` links to the correct compiler
RUN mkdir -p /opt/openmpi && cd /deps && \
git clone --recursive --branch v4.1.4 https://github.com/open-mpi/ompi.git openmpi && \
cd openmpi && ./autogen.pl && \
mkdir build && cd build && \
../configure CC=$AOMP/bin/amdclang CXX=$AOMP/bin/amdclang++ FC=$AOMP/bin/amdflang \
--prefix=/opt/openmpi/ --with-ucx=/opt/ucx \
--enable-mca-no-build=btl-uct --enable-mpi1-compatibility && \
make -j $(( $(lscpu | awk '/^Socket\(s\)/{ print $2 }') * $(lscpu | awk '/^Core\(s\) per socket/{ print $4 }') )) && \
make install && \
rm -rf /deps/openmpi

# Set OpenMPI path
ENV PATH=${PATH}:/opt/openmpi/bin:$AOMP/bin
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/openmpi/lib:$AOMP/lib
ENV OMPI_CC=$AOMP/bin/amdclang
ENV OMPI_CXX=$AOMP/bin/amdclang++
ENV OMPI_F90=$AOMP/bin/flan

# Set mpiexec flags for gpu aware mpi
RUN echo "--mca btl '^openib' -x UCX_TLS=sm,self,rocm_copy,rocm_ipc --mca pml ucx" > /opt/openmpi/etc/mpiexec.conf
# Adding rocm/cmake to the Environment
ENV PATH=$ROCM_HOME/bin:$ROCM_HOME/profiler/bin:$ROCM_HOME/opencl/bin:/opt/cmake/bin:$PATH \
LD_LIBRARY_PATH=$ROCM_HOME/lib:$ROCM_HOME/lib64:$ROCM_HOME/llvm/lib:$LD_LIBRARY_PATH \
LIBRARY_PATH=$ROCM_HOME/lib:$ROCM_HOME/lib64:$LIBRARY_PATH \
C_INCLUDE_PATH=$ROCM_HOME/include:$C_INCLUDE_PATH \
CPLUS_INCLUDE_PATH=$ROCM_HOME/include:$CPLUS_INCLUDE_PATH \
CPATH=$ROCM_HOME/include:$CPATH \
INCLUDE=$ROCM_HOME/include:$INCLUDE

WORKDIR /tmp

# Install UCX
RUN cd /tmp/ \
&& git clone https://github.com/openucx/ucx.git -b ${UCX_BRANCH} \
&& cd ucx \
&& ./autogen.sh \
&& mkdir build \
&& cd build \
&& ../contrib/configure-release --prefix=$UCX_HOME \
--with-rocm=$ROCM_HOME \
--without-knem \
--without-xpmem \
--without-cuda \
--enable-optimizations \
--disable-logging \
--disable-debug \
--disable-examples \
&& make -j ${nproc} \
&& make install

# Install OpenMPI
RUN cd /tmp \
&& git clone --recursive https://github.com/open-mpi/ompi.git -b ${OMPI_BRANCH} \
&& cd ompi \
&& ./autogen.pl \
&& mkdir build \
&& cd build \
&& ../configure --prefix=$OMPI_HOME --with-ucx=$UCX_HOME \
--enable-mca-no-build=btl-uct \
--without-verbs \
--with-pmix \
--enable-mpi \
--enable-mpi-fortran=yes \
--disable-debug \
&& make -j ${nproc} \
&& make install

# Cleanup
RUN rm -rf /tmp/ompi && rm -rf /tmp/ucx


# Adding OpenMPI and UCX to Environment
ENV PATH=$OMPI_HOME/bin:$UCX_HOME/bin:$PATH \
LD_LIBRARY_PATH=$OMPI_HOME/lib:$UCX_HOME/lib:$LD_LIBRARY_PATH \
LIBRARY_PATH=$OMPI_HOME/lib:$UCX_HOME/lib:$LIBRARY_PATH \
C_INCLUDE_PATH=$OMPI_HOME/include:$UCX_HOME/include:$C_INCLUDE_PATH \
CPLUS_INCLUDE_PATH=$OMPI_HOME/include:$UCX_HOME/include:$CPLUS_INCLUDE_PATH \
CPATH=$OMPI_HOME/include:$UCX_HOME/include:$CPATH \
INCLUDE=$OMPI_HOME/include:$UCX_HOME/include:$INCLUDE \
PKG_CONFIG_PATH=$OMPI_HOME/lib/pkgconfig:$UCX_HOME/lib/pkgconfig:$PKG_CONFIG_PATH


# Adding environment variable for Running as ROOT
ENV OMPI_ALLOW_RUN_AS_ROOT=1
ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1

##############################################################
# Additional Devito requirements
##############################################################

# Additional utils and python3
RUN apt-get update && \
apt-get install -y dh-autoreconf python3-venv python3-dev python3-pip

# Build mpi4py against amdclang
RUN python3 -m venv /venv && \
/venv/bin/pip install --no-cache-dir --upgrade pip && \
CC=$AOMP/bin/amdclang CXX=$AOMP/bin/amdclang++ /venv/bin/pip install --no-cache-dir mpi4py && \
/venv/bin/pip install --no-cache-dir mpi4py && \
rm -rf ~/.cache/pip

########################################################################
Expand All @@ -74,22 +138,32 @@ RUN python3 -m venv /venv && \
# This will only trigger if arch is aomp since the final stage depends on it
FROM sdk-base as aomp

# MPI env
ENV OMPI_CC="amdclang"
ENV OMPI_CXX="amdclang++"

# Devito env
ENV DEVITO_ARCH="aomp"
ENV DEVITO_PLATFORM="amdgpuX"
ENV DEVITO_LANGUAGE="openmp"


########################################################################
# HIPCC for GPUs (HIP)
########################################################################
# This will only trigger if arch is hip since the final stage depends on it
FROM sdk-base as hip

# MPI env
ENV OMPI_CC="hipcc"
ENV OMPI_CXX="hipcc"

# Devito env
ENV DEVITO_ARCH="hip"
ENV DEVITO_PLATFORM="amdgpuX"
ENV DEVITO_LANGUAGE="hip"


########################################################################
# Final image
########################################################################
Expand Down
4 changes: 3 additions & 1 deletion docker/Dockerfile.nvidia
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ ENV UCX_MEMTYPE_CACHE=no
ENV UCX_NET_DEVICES=all
ENV UCX_SHM_DEVICES=all
ENV UCX_ACC_DEVICES=all
ENV UCX_RNDV_THRESH=0
ENV UCX_RNDV_SCHEME=get_zcopy
ENV NCCL_UCX_RNDV_THRESH=0
ENV NCCL_UCX_RNDV_SCHEME=get_zcopy
ENV NCCL_PLUGIN_P2P=ucx
Expand Down Expand Up @@ -185,7 +187,7 @@ RUN cd /llvm-project/build && \
-DCLANG_OPENMP_NVPTX_DEFAULT_ARCH=sm_86 \
-DLIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES=all \
-DCMAKE_INSTALL_PREFIX=/llvm && \
make -j $(( $(lscpu | awk '/^Socket\(s\)/{ print $2 }') * $(lscpu | awk '/^Core\(s\) per socket/{ print $4 }') )) && \
make -j ${nproc} && \
make install

# Set path
Expand Down

0 comments on commit 0c9987a

Please sign in to comment.