Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

install: Overhaul Dockerfile.amd for MPI support #2104

Merged
merged 6 commits into from
Apr 21, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion devito/arch/archinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,8 +701,12 @@ def march(cls):
# mygpu will only print values accepted by cuda clang in
# the clang argument --cuda-gpu-arch.
try:
p1 = Popen(['mygpu', '-d', 'gfx900'], stdout=PIPE, stderr=PIPE)
p1 = Popen(['offload-arch'], stdout=PIPE, stderr=PIPE)
except OSError:
try:
p1 = Popen(['mygpu', '-d', fallback], stdout=PIPE, stderr=PIPE)
except OSError:
pass
return fallback

output, _ = p1.communicate()
Expand Down
18 changes: 9 additions & 9 deletions devito/arch/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,24 +487,24 @@ def __init__(self, *args, **kwargs):
if not configuration['safe-math']:
self.cflags.append('-ffast-math')

language = kwargs.pop('language', configuration['language'])
platform = kwargs.pop('platform', configuration['platform'])

if platform in [NVIDIAX, AMDGPUX]:
if platform is NVIDIAX:
self.cflags.remove('-std=c99')
elif platform is AMDGPUX:
self.cflags.remove('-std=c99')
# Add flags for OpenMP offloading
if language in ['C', 'openmp']:
self.ldflags += ['-target', 'x86_64-pc-linux-gnu']
self.ldflags += ['-fopenmp']
self.ldflags += ['--offload-arch=%s' % platform.march]
elif platform in [POWER8, POWER9]:
# It doesn't make much sense to use AOMP on Power, but it should work
self.cflags.append('-mcpu=native')
else:
self.cflags.append('-march=native')

# Generic amd flags
self.ldflags.extend(['-fopenmp', '-target', 'x86_64-pc-linux-gnu'])
FabioLuporini marked this conversation as resolved.
Show resolved Hide resolved
# amdclang gpu flags, used to be part of aompcc
if platform is AMDGPUX:
self.ldflags.extend(['-fopenmp-targets=amdgcn-amd-amdhsa',
'-Xopenmp-target=amdgcn-amd-amdhsa'])
self.ldflags.append('-march=%s' % platform.march)

def __lookup_cmds__(self):
self.CC = 'amdclang'
self.CXX = 'amdclang++'
Expand Down
189 changes: 137 additions & 52 deletions docker/Dockerfile.amd
Original file line number Diff line number Diff line change
@@ -1,71 +1,146 @@
##############################################################
# This Dockerfile contains AMD compilers
# Based on https://github.com/amd/InfinityHub-CI/tree/main/base-gpu-mpi-rocm-docker
##############################################################
FabioLuporini marked this conversation as resolved.
Show resolved Hide resolved

ARG ROCM_VERSION=5.4.2
ARG arch="aomp"

ARG ROCM_VERSION=5.4.2
FabioLuporini marked this conversation as resolved.
Show resolved Hide resolved
FROM rocm/dev-ubuntu-22.04:${ROCM_VERSION}-complete as sdk-base

ENV DEBIAN_FRONTEND noninteractive

ARG ROCM_VERSION
ENV rocm=/opt/rocm-${ROCM_VERSION}
ENV AOMP=/opt/rocm-${ROCM_VERSION}/llvm
ENV ROCM_HOME /opt/rocm
ENV HIP_HOME $ROCM_HOME/hip

# Some utils needed
RUN apt-get update && \
apt-get install -y wget git autoconf dh-autoreconf flex \
python3-venv python3-dev \
vim libnuma-dev tmux numactl
ARG UCX_BRANCH="v1.13.1"
ARG OMPI_BRANCH="v4.1.4"

# Update and Install basic Linux development tools
RUN rm /etc/apt/sources.list.d/* \
&& apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
ca-certificates \
git \
ssh \
make \
vim \
nano \
libtinfo* \
initramfs-tools \
libelf-dev \
numactl \
wget \
tmux \
build-essential \
autoconf \
automake \
libtool \
pkg-config \
libnuma* \
gfortran \
flex \
hwloc \
&& apt-get clean
FabioLuporini marked this conversation as resolved.
Show resolved Hide resolved

# Until rocm base has it fixed
RUN ln -s /opt/rocm/llvm/bin/offload-arch /opt/rocm/bin/offload-arch
FabioLuporini marked this conversation as resolved.
Show resolved Hide resolved

# Requires cmake > 3.22
RUN mkdir -p /opt/cmake \
&& wget --no-check-certificate --quiet -O - https://cmake.org/files/v3.22/cmake-3.22.2-linux-x86_64.tar.gz | tar --strip-components=1 -xz -C /opt/cmake

ENV ROCM_HOME=/opt/rocm \
UCX_HOME=/opt/ucx \
OMPI_HOME=/opt/ompi

# Install tmpi
RUN curl https://raw.githubusercontent.com/Azrael3000/tmpi/master/tmpi -o /usr/local/bin/tmpi

# ROCm-aware MPI. Taken from:
# https://github.com/ROCmSoftwarePlatform/rocHPCG/blob/develop/install.sh
# UCX. Clang does not support some of the flags such as '-dynamic-list-data' so build UCX with gcc
RUN mkdir -p /deps && mkdir -p /opt/ucx && cd /deps && \
git clone --branch v1.13.0 https://github.com/openucx/ucx.git ucx && \
cd ucx && ./autogen.sh && \
mkdir build && cd build && \
../contrib/configure-release --prefix=/opt/ucx/ --with-rocm=${with_rocm} \
--without-cuda --without-java \
--disable-params-check --disable-logging --disable-assertions --disable-debug \
--enable-optimizations && \
make -j $(( $(lscpu | awk '/^Socket\(s\)/{ print $2 }') * $(lscpu | awk '/^Core\(s\) per socket/{ print $4 }') )) && \
make install && \
rm -rf /deps/ucx

# OpenMPI
# Build OpenMPI with `amdclang` so that `mpicc` links to the correct compiler
RUN mkdir -p /opt/openmpi && cd /deps && \
git clone --recursive --branch v4.1.4 https://github.com/open-mpi/ompi.git openmpi && \
cd openmpi && ./autogen.pl && \
mkdir build && cd build && \
../configure CC=$AOMP/bin/amdclang CXX=$AOMP/bin/amdclang++ FC=$AOMP/bin/amdflang \
--prefix=/opt/openmpi/ --with-ucx=/opt/ucx \
--enable-mca-no-build=btl-uct --enable-mpi1-compatibility && \
make -j $(( $(lscpu | awk '/^Socket\(s\)/{ print $2 }') * $(lscpu | awk '/^Core\(s\) per socket/{ print $4 }') )) && \
make install && \
rm -rf /deps/openmpi

# Set OpenMPI path
ENV PATH=${PATH}:/opt/openmpi/bin:$AOMP/bin
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/openmpi/lib:$AOMP/lib
ENV OMPI_CC=$AOMP/bin/amdclang
FabioLuporini marked this conversation as resolved.
Show resolved Hide resolved
FabioLuporini marked this conversation as resolved.
Show resolved Hide resolved
ENV OMPI_CXX=$AOMP/bin/amdclang++
ENV OMPI_F90=$AOMP/bin/flan

# Set mpiexec flags for gpu aware mpi
RUN echo "--mca btl '^openib' -x UCX_TLS=sm,self,rocm_copy,rocm_ipc --mca pml ucx" > /opt/openmpi/etc/mpiexec.conf
# Adding rocm/cmake to the Environment
ENV PATH=$ROCM_HOME/bin:$ROCM_HOME/profiler/bin:$ROCM_HOME/opencl/bin:/opt/cmake/bin:$PATH \
LD_LIBRARY_PATH=$ROCM_HOME/lib:$ROCM_HOME/lib64:$ROCM_HOME/llvm/lib:$LD_LIBRARY_PATH \
LIBRARY_PATH=$ROCM_HOME/lib:$ROCM_HOME/lib64:$LIBRARY_PATH \
C_INCLUDE_PATH=$ROCM_HOME/include:$C_INCLUDE_PATH \
CPLUS_INCLUDE_PATH=$ROCM_HOME/include:$CPLUS_INCLUDE_PATH \
CPATH=$ROCM_HOME/include:$CPATH \
INCLUDE=$ROCM_HOME/include:$INCLUDE

WORKDIR /tmp

# Install UCX
RUN cd /tmp/ \
&& git clone https://github.com/openucx/ucx.git -b ${UCX_BRANCH} \
&& cd ucx \
&& ./autogen.sh \
&& mkdir build \
&& cd build \
&& ../contrib/configure-release --prefix=$UCX_HOME \
--with-rocm=$ROCM_HOME \
--without-knem \
--without-xpmem \
--without-cuda \
--enable-optimizations \
--disable-logging \
--disable-debug \
--disable-examples \
&& make -j ${nproc} \
&& make install

# Install OpenMPI
RUN cd /tmp \
&& git clone --recursive https://github.com/open-mpi/ompi.git -b ${OMPI_BRANCH} \
&& cd ompi \
&& ./autogen.pl \
&& mkdir build \
&& cd build \
&& ../configure --prefix=$OMPI_HOME --with-ucx=$UCX_HOME \
FabioLuporini marked this conversation as resolved.
Show resolved Hide resolved
--enable-mca-no-build=btl-uct \
--without-verbs \
--with-pmix \
--enable-mpi \
--enable-mpi-fortran=yes \
--disable-debug \
&& make -j ${nproc} \
&& make install

# Cleanup
RUN rm -rf /tmp/ompi && rm -rf /tmp/ucx


# Adding OpenMPI and UCX to Environment
ENV PATH=$OMPI_HOME/bin:$UCX_HOME/bin:$PATH \
LD_LIBRARY_PATH=$OMPI_HOME/lib:$UCX_HOME/lib:$LD_LIBRARY_PATH \
LIBRARY_PATH=$OMPI_HOME/lib:$UCX_HOME/lib:$LIBRARY_PATH \
C_INCLUDE_PATH=$OMPI_HOME/include:$UCX_HOME/include:$C_INCLUDE_PATH \
CPLUS_INCLUDE_PATH=$OMPI_HOME/include:$UCX_HOME/include:$CPLUS_INCLUDE_PATH \
CPATH=$OMPI_HOME/include:$UCX_HOME/include:$CPATH \
INCLUDE=$OMPI_HOME/include:$UCX_HOME/include:$INCLUDE \
PKG_CONFIG_PATH=$OMPI_HOME/lib/pkgconfig:$UCX_HOME/lib/pkgconfig:$PKG_CONFIG_PATH


# Adding environment variable for Running as ROOT and set MCA pml to UCX
ENV OMPI_ALLOW_RUN_AS_ROOT=1
ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
ENV OMPI_MCA_rmaps_base_oversubscribe=1
ENV OMPI_MCA_btl_base_warn_component_unused=0
ENV OMPI_MCA_hwloc_base_binding_policy=""
ENV OMPI_MCA_pml=ucx
# Add additional UCX environment variable
ENV UCX_MEMTYPE_CACHE=no
ENV UCX_NET_DEVICES=all
ENV UCX_SHM_DEVICES=all
ENV UCX_ACC_DEVICES=all
ENV UCX_RNDV_THRESH=0
ENV UCX_RNDV_SCHEME=get_zcopy

##############################################################
# Additional Devito requirements
##############################################################

# Additional utils and python3
RUN apt-get update && \
apt-get install -y dh-autoreconf python3-venv python3-dev python3-pip

# Build mpi4py against amdclang
RUN python3 -m venv /venv && \
/venv/bin/pip install --no-cache-dir --upgrade pip && \
CC=$AOMP/bin/amdclang CXX=$AOMP/bin/amdclang++ /venv/bin/pip install --no-cache-dir mpi4py && \
/venv/bin/pip install --no-cache-dir mpi4py && \
rm -rf ~/.cache/pip

########################################################################
Expand All @@ -74,22 +149,32 @@ RUN python3 -m venv /venv && \
# This will only trigger if arch is aomp since the final stage depends on it
FROM sdk-base as aomp

# MPI env
ENV OMPI_CC="amdclang"
ENV OMPI_CXX="amdclang++"

# Devito env
ENV DEVITO_ARCH="aomp"
ENV DEVITO_PLATFORM="amdgpuX"
ENV DEVITO_LANGUAGE="openmp"


########################################################################
# HIPCC for GPUs (HIP)
########################################################################
# This will only trigger if arch is hip since the final stage depends on it
FROM sdk-base as hip

# MPI env
ENV OMPI_CC="hipcc"
FabioLuporini marked this conversation as resolved.
Show resolved Hide resolved
ENV OMPI_CXX="hipcc"

# Devito env
ENV DEVITO_ARCH="hip"
ENV DEVITO_PLATFORM="amdgpuX"
ENV DEVITO_LANGUAGE="hip"


########################################################################
# Final image
########################################################################
Expand Down
4 changes: 3 additions & 1 deletion docker/Dockerfile.nvidia
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ ENV UCX_MEMTYPE_CACHE=no
ENV UCX_NET_DEVICES=all
ENV UCX_SHM_DEVICES=all
ENV UCX_ACC_DEVICES=all
ENV UCX_RNDV_THRESH=0
ENV UCX_RNDV_SCHEME=get_zcopy
ENV NCCL_UCX_RNDV_THRESH=0
ENV NCCL_UCX_RNDV_SCHEME=get_zcopy
ENV NCCL_PLUGIN_P2P=ucx
Expand Down Expand Up @@ -185,7 +187,7 @@ RUN cd /llvm-project/build && \
-DCLANG_OPENMP_NVPTX_DEFAULT_ARCH=sm_86 \
-DLIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES=all \
-DCMAKE_INSTALL_PREFIX=/llvm && \
make -j $(( $(lscpu | awk '/^Socket\(s\)/{ print $2 }') * $(lscpu | awk '/^Core\(s\) per socket/{ print $4 }') )) && \
make -j ${nproc} && \
FabioLuporini marked this conversation as resolved.
Show resolved Hide resolved
make install

# Set path
Expand Down