Skip to content

Commit

Permalink
Merge 3706ef1 into f1525c6
Browse files Browse the repository at this point in the history
  • Loading branch information
kenhester authored Jun 23, 2021
2 parents f1525c6 + 3706ef1 commit 0482ee5
Show file tree
Hide file tree
Showing 3 changed files with 210 additions and 14 deletions.
54 changes: 41 additions & 13 deletions docker/Dockerfile.nvidia
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,19 @@
# BUILD:
# docker build --network=host --file docker/Dockerfile.nvidia --tag devito:nvidia .
#
# EXPERIMENTAL:
# LEGACY:
# (1) Option MPI 3.0:
# docker build --network=host --build-arg MPI_VER=3 --file docker/Dockerfile.nvidia --tag devito:nvidia .
#
# (2) Option MPI 4.0:
# Enabling and using MPI 4.0.5 works on R450 drivers, but is showing compatibility
# issues during testing on older R418 drivers.
#
# docker build --network=host --build-arg MPI_VER=4 --file docker/Dockerfile.nvidia --tag devito:nvidia .
#
# RUN:
# docker run --gpus all --rm -it -p 8888:8888 -p 8787:8787 -p 8786:8786 devito:nvidia
# docker run --gpus all --rm -it -p 8888:8888 -p 8787:8787 -p 8786:8786 --device=/dev/infiniband/uverbs0 --device=/dev/infiniband/rdma_cm devito:nvidia
##############################################################
FROM python:3.6

Expand All @@ -31,23 +36,29 @@ RUN apt-get update -y && \
nodejs \
liblapack-dev \
libblas-dev \
libibverbs-dev \
texlive-latex-extra texlive-fonts-recommended dvipng cm-super && \
libibverbs-dev libmlx4-1 libmlx5-1 ibutils \
texlive-latex-extra texlive-fonts-recommended dvipng cm-super \
python-dev python3-dev python3-venv && \
wget -q -P /app/ \
https://developer.download.nvidia.com/hpc-sdk/21.3/nvhpc-21-3_21.3_amd64.deb \
https://developer.download.nvidia.com/hpc-sdk/21.3/nvhpc-21-3-cuda-multi_21.3_amd64.deb \
https://developer.download.nvidia.com/hpc-sdk/21.3/nvhpc-2021_21.3_amd64.deb && \
https://developer.download.nvidia.com/hpc-sdk/21.5/nvhpc-21-5_21.5_amd64.deb \
https://developer.download.nvidia.com/hpc-sdk/21.5/nvhpc-21-5-cuda-multi_21.5_amd64.deb \
https://developer.download.nvidia.com/hpc-sdk/21.5/nvhpc-2021_21.5_amd64.deb && \
wget -q -P /app/nvcomp_exts/ \
https://developer.download.nvidia.com/compute/nvcomp/2.0/local_installers/nvcomp_exts_x86_64_ubuntu18.04-2.0.tar.gz &&\
apt-get install -y -q \
/app/nvhpc-21-3_21.3_amd64.deb \
/app/nvhpc-21-3-cuda-multi_21.3_amd64.deb \
/app/nvhpc-2021_21.3_amd64.deb && \
/app/nvhpc-21-5_21.5_amd64.deb \
/app/nvhpc-21-5-cuda-multi_21.5_amd64.deb \
/app/nvhpc-2021_21.5_amd64.deb && \
tar -xvf /app/nvcomp_exts/nvcomp_exts_x86_64_ubuntu18.04-2.0.tar.gz -C /app/nvcomp_exts && \
apt-get update -y && \
rm -rf /app/nvhpc* && \
rm -rf /app/nvcomp_exts/nvcomp* && \
rm -rf /var/lib/apt/lists/*

ARG HPCSDK_HOME=/opt/nvidia/hpc_sdk/Linux_x86_64/2021
ARG HPCSDK_CUPTI=/opt/nvidia/hpc_sdk/Linux_x86_64/2021/cuda/11.3/extras/CUPTI
ARG MPI_VER=3
ARG HPCSDK_CUPTI=/opt/nvidia/hpc_sdk/Linux_x86_64/2021/cuda/11.2/extras/CUPTI
#MPI_VER options 3,4,HPCX
ARG MPI_VER=HPCX

# nvidia-container-runtime
ENV NVIDIA_VISIBLE_DEVICES all
Expand All @@ -60,12 +71,16 @@ RUN echo "$HPCSDK_HOME/cuda/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "$HPCSDK_HOME/comm_libs/mpi/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "$HPCSDK_CUPTI/lib64" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "$HPCSDK_HOME/math_libs/lib64" >> /etc/ld.so.conf.d/nvidia.conf

# Compression
ENV NVCOMP_EXTS_ROOT /app/nvcomp_exts/ubuntu18.04/11.2
ENV bitcomp_DIR $NVCOMP_EXTS_ROOT/lib/

# Compiler, CUDA, and Library paths
ENV CUDA_HOME $HPCSDK_HOME/cuda
ENV CUDA_ROOT $HPCSDK_HOME/cuda/bin
ENV PATH $HPCSDK_HOME/compilers/bin:$HPCSDK_HOME/cuda/bin:$HPCSDK_HOME/comm_libs/mpi/bin:${PATH}
ENV LD_LIBRARY_PATH $HPCSDK_HOME/cuda/lib:$HPCSDK_HOME/cuda/lib64:$HPCSDK_HOME/compilers/lib:$HPCSDK_HOME/math_libs/lib64:$HPCSDK_HOME/comm_libs/mpi/lib:$HPCSDK_CUPTI/lib64:${LD_LIBRARY_PATH}
ENV LD_LIBRARY_PATH $HPCSDK_HOME/cuda/lib:$HPCSDK_HOME/cuda/lib64:$HPCSDK_HOME/compilers/lib:$HPCSDK_HOME/math_libs/lib64:$HPCSDK_HOME/comm_libs/mpi/lib:$HPCSDK_CUPTI/lib64:bitcomp_DIR:${LD_LIBRARY_PATH}

ADD ./requirements.txt /app/requirements.txt
ADD ./requirements-optional.txt /app/requirements-optional.txt
Expand All @@ -83,8 +98,16 @@ RUN python3 -m venv /venv && \
# MPI ROOT USER DEFAULTS
ENV OMPI_ALLOW_RUN_AS_ROOT=1
ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
ENV OMPI_MCA_rmaps_base_oversubscribe=1
ENV UCX_MEMTYPE_CACHE=no
ENV UCX_TLS=self,shm,cuda_copy
ENV UCX_NET_DEVICES=all
ENV UCX_SHM_DEVICES=all
ENV UCX_ACC_DEVICES=all
ENV NCCL_UCX_RNDV_THRESH=0
ENV NCCL_UCX_RNDV_SCHEME=get_zcopy
ENV NCCL_PLUGIN_P2P=ucx
ENV UCX_TLS=rc_x,sm,shm,cuda_copy,gdr_copy,cuda_ipc
ENV MELLANOX_MOUNT_DRIVER=1

ENV CPATH $HPCSDK_HOME/comm_libs/mpi/include:${CPATH}
ENV CFLAGS=-noswitcherror
Expand All @@ -94,6 +117,11 @@ RUN if [ "x$MPI_VER" = "x4" ]; then \
ln -sf $HPCSDK_HOME/comm_libs/openmpi4/openmpi-4.0.5 \
$HPCSDK_HOME/comm_libs/mpi ; \
fi; \
if [ "x$MPI_VER" = "HPCX" ]; then \
rm -f $HPCSDK_HOME/comm_libs/mpi && \
ln -sf $HPCSDK_HOME/comm_libs/hpcx/hpcx-2.7.4/ompi \
$HPCSDK_HOME/comm_libs/mpi ; \
fi; \
/venv/bin/pip install --no-cache-dir -r /app/requirements-mpi.txt && \
rm -rf ~/.cache/pip
ENV CFLAGS=
Expand Down
168 changes: 168 additions & 0 deletions docker/Singularity.nvidia.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
Bootstrap: docker
From: python:3.6

%help
##############################################################
# This Dockerfile contains the additional NVIDIA compilers,
# libraries, and plugins to enable OpenACC and NVIDIA GPU
# acceleration of Devito codes.
#
# BUILD:
# singularity build --fakeroot devito.nvidia.sif docker/Singularity.nvidia.def
#
# RUN:
# singularity run --nv --writable-tmpfs devito.nvidia.sif
##############################################################

%files
./requirements.txt /app/requirements.txt
./requirements-optional.txt /app/requirements-optional.txt
./requirements-nvidia.txt /app/requirements-nvidia.txt
./requirements-mpi.txt /app/requirements-mpi.txt
./devito /app/devito
./tests /app/tests
./scripts /app/scripts
./examples /app/examples
./benchmarks /app/benchmarks
setup.cfg /app/
docker/run-jupyterlab.sh /jupyter
docker/run-tests.sh /tests
docker/run-print-defaults.sh /print-defaults
docker/entrypoint.sh /entrypoint.sh
docker/nvdashboard.json /app/nvdashboard.json

%environment
export NVIDIA_VISIBLE_DEVICES=all
export NVIDIA_DRIVER_CAPABILITIES=compute,utility

export HPCSDK_HOME=/opt/nvidia/hpc_sdk/Linux_x86_64/2021
export HPCSDK_CUPTI=/opt/nvidia/hpc_sdk/Linux_x86_64/2021/cuda/11.2/extras/CUPTI
export NVCOMP_EXTS_ROOT=/app/nvcomp_exts/ubuntu18.04/11.2
export bitcomp_DIR=$NVCOMP_EXTS_ROOT/lib/

export CUDA_HOME=$HPCSDK_HOME/cuda
export CUDA_ROOT=$HPCSDK_HOME/cuda/bin
export PATH=$HPCSDK_HOME/compilers/bin:$HPCSDK_HOME/cuda/bin:$HPCSDK_HOME/comm_libs/mpi/bin:${PATH}
export LD_LIBRARY_PATH=$HPCSDK_HOME/cuda/lib:$HPCSDK_HOME/cuda/lib64:$HPCSDK_HOME/compilers/lib:$HPCSDK_HOME/math_libs/lib64:$HPCSDK_HOME/comm_libs/mpi/lib:$HPCSDK_CUPTI/lib64:bitcomp_DIR:${LD_LIBRARY_PATH}

# entrypoint.sh in docker, but need to add in build for singularity
export PATH=/venv/bin:${PATH}
export PYTHONPATH=/app:${PYTHONPATH}

export OMPI_ALLOW_RUN_AS_ROOT=1
export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
export OMPI_MCA_rmaps_base_oversubscribe=1
export UCX_MEMTYPE_CACHE=no
export UCX_MEMTYPE_CACHE=no
export UCX_NET_DEVICES=all
export UCX_SHM_DEVICES=all
export UCX_ACC_DEVICES=all
export NCCL_UCX_RNDV_THRESH=0
export NCCL_UCX_RNDV_SCHEME=get_zcopy
export NCCL_PLUGIN_P2P=ucx
export UCX_TLS=rc_x,sm,shm,cuda_copy,gdr_copy,cuda_ipc
export MELLANOX_MOUNT_DRIVER=1

## Environment Variables for OpenACC Builds
# Reference: https://github.com/devitocodes/devito/wiki/FAQ#can-i-manually-modify-the-c-code-generated-by-devito-and
export DEVITO_ARCH="nvc"
export DEVITO_LANGUAGE="openacc"
export DEVITO_PLATFORM=nvidiaX

# Options: [unset, 1] For PGI openacc; Should only be set after a first execution of the benchmark
# export DEVITO_JIT_BACKDOOR=1

# Enable logging, Options: [unset, PERF, DEBUG]
export DEVITO_LOGGING=DEBUG
#export PGI_ACC_TIME=1

# Set the home directory to our app user's home.
export HOME=/app
export APP_HOME=/app

%post -c /bin/bash

export DEBIAN_FRONTEND=noninteractive

# nodesource: nvdashboard requires nodejs>=10
apt-get update -y && \
apt-get install -y -q \
apt-utils \
vim \
curl \
mpich libmpich-dev && \
curl -sL https://deb.nodesource.com/setup_12.x | bash - && \
apt-get install -y -q \
nodejs \
liblapack-dev \
libblas-dev \
libibverbs-dev libmlx4-1 libmlx5-1 ibutils \
texlive-latex-extra texlive-fonts-recommended dvipng cm-super \
python-dev python3-dev python3-venv && \
wget -q -P /app/ \
https://developer.download.nvidia.com/hpc-sdk/21.5/nvhpc-21-5_21.5_amd64.deb \
https://developer.download.nvidia.com/hpc-sdk/21.5/nvhpc-21-5-cuda-multi_21.5_amd64.deb \
https://developer.download.nvidia.com/hpc-sdk/21.5/nvhpc-2021_21.5_amd64.deb && \
wget -q -P /app/nvcomp_exts/ \
https://developer.download.nvidia.com/compute/nvcomp/2.0/local_installers/nvcomp_exts_x86_64_ubuntu18.04-2.0.tar.gz &&\
apt-get install -y -q \
/app/nvhpc-21-5_21.5_amd64.deb \
/app/nvhpc-21-5-cuda-multi_21.5_amd64.deb \
/app/nvhpc-2021_21.5_amd64.deb && \
tar -xvf /app/nvcomp_exts/nvcomp_exts_x86_64_ubuntu18.04-2.0.tar.gz -C /app/nvcomp_exts && \
apt-get update -y && \
rm -rf /app/nvhpc* && \
rm -rf /app/nvcomp_exts/nvcomp* && \
rm -rf /var/lib/apt/lists/*

export HPCSDK_HOME=/opt/nvidia/hpc_sdk/Linux_x86_64/2021
export HPCSDK_CUPTI=/opt/nvidia/hpc_sdk/Linux_x86_64/2021/cuda/11.2/extras/CUPTI

# Compiler, CUDA, and Library paths
export CUDA_HOME=$HPCSDK_HOME/cuda
export CUDA_ROOT=$HPCSDK_HOME/cuda/bin
export PATH=$HPCSDK_HOME/compilers/bin:$HPCSDK_HOME/cuda/bin:$HPCSDK_HOME/comm_libs/mpi/bin:${PATH}
export LD_LIBRARY_PATH=$HPCSDK_HOME/cuda/lib:$HPCSDK_HOME/cuda/lib64:$HPCSDK_HOME/compilers/lib:$HPCSDK_HOME/math_libs/lib64:$HPCSDK_HOME/comm_libs/mpi/lib:$HPCSDK_CUPTI/lib64:${LD_LIBRARY_PATH}

python3 -m venv /venv && \
/venv/bin/pip install --no-cache-dir --upgrade pip && \
/venv/bin/pip install --no-cache-dir wheel && \
/venv/bin/pip install --no-cache-dir -r /app/requirements.txt && \
/venv/bin/pip install --no-cache-dir -r /app/requirements-optional.txt && \
/venv/bin/pip install --no-cache-dir -r /app/requirements-nvidia.txt && \
rm -rf ~/.cache/pip

# MPI ROOT USER DEFAULTS
export CPATH=$HPCSDK_HOME/comm_libs/mpi/include:${CPATH}
export CFLAGS=-noswitcherror

#MPI 3
# Do Nothing
#MPI 4
#rm -f $HPCSDK_HOME/comm_libs/mpi && \
#ln -sf $HPCSDK_HOME/comm_libs/openmpi4/openmpi-4.0.5 $HPCSDK_HOME/comm_libs/mpi ;
#HPCX
rm -f $HPCSDK_HOME/comm_libs/mpi && \
ln -sf $HPCSDK_HOME/comm_libs/hpcx/hpcx-2.7.4/ompi $HPCSDK_HOME/comm_libs/mpi ;

/venv/bin/pip install --no-cache-dir -r /app/requirements-mpi.txt && \
rm -rf ~/.cache/pip
export CFLAGS=

chmod -R 755 /app
chmod 777 /app
chmod 777 /print-defaults /jupyter /tests /entrypoint.sh && \
/venv/bin/jupyter labextension install jupyterlab-nvdashboard && \
/venv/bin/jupyter labextension install dask-labextension && \
/venv/bin/jupyter serverextension enable dask_labextension && \
/venv/bin/jupyter lab workspaces import /app/nvdashboard.json

%runscript
echo /jupyter "$@"
cd /app
exec /jupyter "$@"

%startscript
cd /app

%test
2 changes: 1 addition & 1 deletion docker/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
find /app -type f -name '*.pyc' -delete

export PATH=/venv/bin:$PATH
export PYTHONPATH=$PYTHONPATH:/app
export PYTHONPATH=/app:$PYTHONPATH

exec "$@"

0 comments on commit 0482ee5

Please sign in to comment.