Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docker: add some tweaks to nvidia docker #2171

Merged
merged 3 commits into from
Aug 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 1 addition & 42 deletions .github/workflows/docker-bases.yml
Original file line number Diff line number Diff line change
Expand Up @@ -162,47 +162,6 @@ jobs:
build-args: 'arch=nvc-host'
tags: 'devitocodes/bases:cpu-nvc'

#######################################################
################### Nvidia clang ######################
#######################################################
deploy-nvidia-clang-base:
name: "nvidia-clang-base"
runs-on: ["self-hosted", "nvidiagpu"]
env:
DOCKER_BUILDKIT: "1"

steps:
- name: Checkout devito
uses: actions/checkout@v3

- name: Check event name
run: echo ${{ github.event_name }}

- name: Set up QEMU
uses: docker/setup-qemu-action@v2

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

- name: Login to DockerHub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: cleanup
run: docker system prune -a -f

- name: Nvidia clang image
uses: docker/build-push-action@v3
with:
context: .
file: './docker/Dockerfile.nvidia'
push: true
target: 'clang'
build-args: 'arch=clang'
tags: 'devitocodes/bases:nvidia-clang'

#######################################################
##################### AMD #############################
#######################################################
Expand Down Expand Up @@ -252,4 +211,4 @@ jobs:
target: 'hip'
build-args: |
arch=hip
tags: devitocodes/bases:amd-hip
tags: devitocodes/bases:amd-hip
6 changes: 0 additions & 6 deletions .github/workflows/docker-devito.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,6 @@ jobs:
test: 'tests/test_gpu_openacc.py tests/test_gpu_common.py'
runner: ["self-hosted", "nvidiagpu"]

- base: 'bases:nvidia-clang'
tag: 'nvidia-clang'
flag: '--gpus all'
test: 'tests/test_gpu_openmp.py tests/test_gpu_common.py'
runner: ["self-hosted", "nvidiagpu"]

# Runtime gpu flags from https://hub.docker.com/r/rocm/tensorflow/
- base: 'bases:amd'
tag: 'amd'
Expand Down
8 changes: 0 additions & 8 deletions .github/workflows/pytest-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,20 +46,12 @@ jobs:

matrix:
name: [
pytest-gpu-omp-nvidia,
pytest-gpu-acc-nvidia,
pytest-gpu-omp-amd
]
test_examples: ["examples/seismic/tti/tti_example.py examples/seismic/acoustic/acoustic_example.py examples/seismic/viscoacoustic/viscoacoustic_example.py examples/seismic/viscoelastic/viscoelastic_example.py examples/seismic/elastic/elastic_example.py"]

include:
- name: pytest-gpu-omp-nvidia
test_files: "tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openmp.py"
base: "devitocodes/bases:nvidia-clang"
tags: ["self-hosted", "nvidiagpu"]
test_drive_cmd: "nvidia-smi"
flags: '--gpus all --rm --name testrun-clang-nvidia'

- name: pytest-gpu-acc-nvidia
test_files: "tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openacc.py"
base: "devitocodes/bases:nvidia-nvc"
Expand Down
2 changes: 1 addition & 1 deletion devito/finite_differences/derivative.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class Derivative(sympy.Derivative, Differentiable):
__rkwargs__ = ('side', 'deriv_order', 'fd_order', 'transpose', '_ppsubs', 'x0')

def __new__(cls, expr, *dims, **kwargs):
if type(expr) == sympy.Derivative:
if type(expr) is sympy.Derivative:
raise ValueError("Cannot nest sympy.Derivative with devito.Derivative")
if not isinstance(expr, Differentiable):
raise ValueError("`expr` must be a Differentiable object")
Expand Down
2 changes: 1 addition & 1 deletion devito/ir/iet/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ class List(Node):

def __init__(self, header=None, body=None, footer=None):
body = as_tuple(body)
if len(body) == 1 and all(type(i) == List for i in [self, body[0]]):
if len(body) == 1 and all(type(i) is List for i in [self, body[0]]):
# De-nest Lists
#
# Note: to avoid disgusting metaclass voodoo (due to
Expand Down
2 changes: 1 addition & 1 deletion devito/ir/support/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(self, dim, stamp=S0):
self.stamp = stamp

def __eq__(self, o):
return (type(self) == type(o) and
return (type(self) is type(o) and
self.dim is o.dim and
self.stamp == o.stamp)

Expand Down
2 changes: 1 addition & 1 deletion devito/passes/clusters/aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,7 @@ def pick_best(variants, schedule_strategy, eval_variants_delta):
Return the variant with the best trade-off between operation count
reduction and working set increase. Heuristics may be applied.
"""
if type(schedule_strategy) == int:
if type(schedule_strategy) is int:
try:
return variants[schedule_strategy]
except IndexError:
Expand Down
2 changes: 1 addition & 1 deletion devito/symbolics/inspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def compare_ops(e1, e2):
>>> compare_ops(u[x] + u[x+1], u[x] + u[y+10])
True
"""
if type(e1) == type(e2) and len(e1.args) == len(e2.args):
if type(e1) is type(e2) and len(e1.args) == len(e2.args):
if e1.is_Atom:
return True if e1 == e2 else False
elif e1.is_Indexed and e2.is_Indexed:
Expand Down
2 changes: 1 addition & 1 deletion devito/types/dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -1582,7 +1582,7 @@ def _separate_dims(cls, d0, d1, ofs_items):


def dimensions(names):
assert type(names) == str
assert type(names) is str
return tuple(Dimension(i) for i in names.split())


Expand Down
5 changes: 0 additions & 5 deletions docker/Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,6 @@ ENV PATH $I_MPI_ROOT/libfabric/bin:$I_MPI_ROOT/bin:$I_ICC_ROOT/linux/bin/intel64
ENV LD_LIBRARY_PATH $I_MPI_ROOT/libfabric/lib:$I_MPI_ROOT/lib/release:$I_MPI_ROOT/lib:$I_ICC_ROOT/linux/lib:$I_ICC_ROOT/linux/lib/x64:$I_ICC_ROOT/linux/compiler/lib/intel64_lin:${LD_LIBRARY_PATH}
ENV FI_PROVIDER_PATH $I_MPI_ROOT/libfabric/lib/prov:/usr/lib64/libfabric:${LD_LIBRARY_PATH}

# Install intelpython
RUN wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/03aae3a8-623a-47cf-9655-5dd8fcf86430/l_pythoni39_oneapi_p_2023.2.0.49422.sh && \
chmod +x l_pythoni39_oneapi_p_2023.2.0.49422.sh && ./l_pythoni39_oneapi_p_2023.2.0.49422.sh -r yes -a -s --eula accept
ENV PATH /opt/intel/oneapi/intelpython/python3.9/bin:${PATH}

##############################################################
# ICC image
# This is a legacy setup that is not built anymore but kept for reference
Expand Down
13 changes: 13 additions & 0 deletions docker/Dockerfile.devito
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@ RUN python3 -m venv /venv && \
/venv/bin/pip install --no-cache-dir -e /app/devito[extras,mpi,tests] && \
rm -rf ~/.cache/pip

# Usefull utilities
# Nvtop
RUN apt update && apt install -y git cmake libncurses5-dev libncursesw5-dev libdrm-dev libsystemd-dev cmake && \
git clone https://github.com/Syllo/nvtop.git /app/nvtop && \
mkdir -p /app/nvtop/build && cd /app/nvtop/build && \
cmake .. -DNVIDIA_SUPPORT=ON -DAMDGPU_SUPPORT=ON -DINTEL_SUPPORT=ON && \
make && \
make install && \
ln -fs /app/nvtop/build/src/nvtop /venv/bin/nvtop

# Safety cleanup
RUN apt-get clean && apt-get autoclean && apt-get autoremove && \
rm -rf /var/lib/apt/lists/*
Expand Down Expand Up @@ -63,6 +73,9 @@ RUN chmod +x /print-defaults /jupyter /tests /docker-entrypoint.sh
# Venv
COPY --from=builder --chown=app:app /venv /venv

# opt
COPY --from=builder --chown=app:app /opt /opt

# Install codecov
WORKDIR /app/devito
RUN curl -Os https://uploader.codecov.io/latest/linux/codecov && chmod +x codecov && chown app:app codecov
Expand Down
56 changes: 8 additions & 48 deletions docker/Dockerfile.nvidia
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,16 @@ ARG arch="nvc"
########################################################################
# Build base image with apt setup and common env
########################################################################
FROM ${pyversion}-slim-bullseye as sdk-base
FROM ubuntu:22.04 as sdk-base

ENV DEBIAN_FRONTEND noninteractive

RUN apt-get update -y && apt-get install -y -q gpg apt-utils curl wget vim libnuma-dev tmux numactl
RUN apt-get update -y && \
apt-get install -y -q gpg apt-utils curl wget vim libnuma-dev tmux numactl

#Install python
RUN apt-get update && \
apt-get install -y dh-autoreconf python3-venv python3-dev python3-pip

# nodesource: nvdashboard requires nodejs>=10
RUN curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | gpg --yes --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg
Expand Down Expand Up @@ -81,7 +86,7 @@ RUN export NVARCH=$(ls -1 /opt/nvidia/hpc_sdk/Linux_x86_64/ | grep '\.' | head -
ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/comm_libs/${CUDA_V}/nccl /opt/nvhpc/comm_libs/nccl

# Starting nvhpc 23.5 and cuda 12.1, hpcx and openmpi are inside the cuda version folder, only the bin is in the comm_libs path
RUN export CUDA_V=$(ls /opt/nvhpc/${NVARCH}/cuda/ | grep '\.') && \
RUN export CUDA_V=$(nvcc --version | sed -n 's/^.*release \([0-9]\+\.[0-9]\+\).*$/\1/p') && \
ls /opt/nvhpc/comm_libs/${CUDA_V}/hpcx/ &&\
if [ -d /opt/nvhpc/comm_libs/${CUDA_V}/hpcx ]; then \
rm -rf /opt/nvhpc/comm_libs/hpcx && rm -rf /opt/nvhpc/comm_libs/openmpi4 && \
Expand Down Expand Up @@ -167,48 +172,3 @@ FROM nvc as nvc-host
ENV DEVITO_ARCH="nvc"
ENV DEVITO_PLATFORM="cpu64"
ENV DEVITO_LANGUAGE="openmp"

########################################################################
# Build latest stable clang. This is following the wiki:
# https://en.wikibooks.org/wiki/LLVM_Compiler/Installation.
########################################################################
# This will only trigger if arch is clang since the final stage depends on it
FROM sdk-base as clang

## Install clang requirements
RUN apt-get -y update && apt-get install -y -q libelf-dev libffi-dev cmake git gcc-multilib g++-multilib

########################################################################
# Build clang 14 with gcc
########################################################################
RUN git -c advice.detachedHead=false clone --depth 1 https://github.com/devitocodes/llvm-project.git

RUN mkdir -p /llvm-project/build
RUN cd /llvm-project/build && \
cmake ../llvm/ -DCMAKE_BUILD_TYPE=Release \
-DLLVM_TARGETS_TO_BUILD="X86;NVPTX" \
-DLLVM_ENABLE_PROJECTS="clang" \
-DLLVM_ENABLE_RUNTIMES="openmp" \
-DCLANG_OPENMP_NVPTX_DEFAULT_ARCH=sm_86 \
-DLIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES=all \
-DCMAKE_INSTALL_PREFIX=/llvm && \
make -j ${nproc} && \
make install

# Set path
ENV PATH /llvm/bin:${PATH}
ENV LD_LIBRARY_PATH /llvm/lib:${LD_LIBRARY_PATH}
ENV CPATH /llvm/include:${CPATH}

RUN rm -rf llvm-project
RUN apt-get clean && apt-get autoclean && apt-get autoremove && \
rm -rf /var/lib/apt/lists/*

# Recompile mpi4py with clang
ENV OMPI_CC="clang"
RUN /venv/bin/pip install --no-cache-dir mpi4py && rm -rf ~/.cache/pip

# Devito env vars
ENV DEVITO_ARCH="clang"
ENV DEVITO_PLATFORM="nvidiaX"
ENV DEVITO_LANGUAGE="openmp"
12 changes: 2 additions & 10 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,9 @@ In addition, the following legacy tags are available:

### [Devito] on GPU

Second, we provide three images to run [Devito] on GPUs, tagged `devito:nvidia-nvc-*`, `devito:nvidia-clang-*`, and `devito:amd-*`.
Second, we provide three images to run [Devito] on GPUs, tagged `devito:nvidia-nvc-*`, and `devito:amd-*`.

- `devito:nvidia-nvc-*` is intended to be used on NVidia GPUs. It comes with the configuration to use the `nvc` compiler for `openacc` offloading. This image also comes with CUDA-aware MPI for multi-GPU deployment.
- `devito:nvidia-clang-*` is intended to be used on NVidia GPUs. It comes with the configuration to use the `clang` compiler for `openmp` offloading. This image also comes with CUDA-aware MPI for multi-GPU deployment.
- `devito:amd-*` is intended to be used on AMD GPUs. It comes with the configuration to use the `aoompcc` compiler for `openmp` offloading. This image also comes with ROCm-aware MPI for multi-GPU deployment. This image can also be used on AMD CPUs since the ROCm compilers are preinstalled.

#### NVidia
Expand Down Expand Up @@ -93,19 +92,12 @@ To build the GPU image with `openacc` offloading and the `nvc` compiler, run:
docker build --build-arg base=devitocodes/bases:nvidia-nvc --network=host --file docker/Dockerfile.devito --tag devito .
```

or if you wish to use the `clang` compiler with `openmp` offloading:

```bash
docker build --build-arg base=devitocodes/bases:nvidia-clang --network=host --file docker/Dockerfile --tag devito .
```

and finally, for AMD architectures:
or if you wish to use the `amdclang` compiler with `openmp` offloading for AMD architectures:

```bash
docker build --build-arg base=devitocodes/bases:amd --network=host --file docker/Dockerfile --tag devito .
```


## Debugging a base image

To build the base image yourself locally, you need to run the standard build command using the provided Dockerfile.
Expand Down
2 changes: 1 addition & 1 deletion requirements-nvidia.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cupy-cuda110
cupy-cuda12x
dask-cuda
jupyterlab>=3
jupyterlab-nvdashboard
Expand Down
8 changes: 4 additions & 4 deletions tests/test_builtins.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ def test_issue_1863(self):
assert v0 == v1
assert v0 == v2
assert v0 == v3
assert type(v0) == np.int16
assert type(v1) == np.int32
assert type(v2) == np.float32
assert type(v3) == np.float64
assert type(v0) is np.int16
assert type(v1) is np.int32
assert type(v2) is np.float32
assert type(v3) is np.float64
4 changes: 2 additions & 2 deletions tests/test_derivatives.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def test_derivatives_space(self, derivative, dim, order):

s_expr = u.diff(dim).as_finite_difference(indices).evalf(_PRECISION)
assert(simplify(expr - s_expr) == 0) # Symbolic equality
assert type(expr) == EvalDerivative
assert type(expr) is EvalDerivative
expr1 = s_expr.func(*expr.args)
assert(expr1 == s_expr) # Exact equality

Expand All @@ -217,7 +217,7 @@ def test_second_derivatives_space(self, derivative, dim, order):
indices = [(dim + i * dim.spacing) for i in range(-width, width + 1)]
s_expr = u.diff(dim, dim).as_finite_difference(indices).evalf(_PRECISION)
assert(simplify(expr - s_expr) == 0) # Symbolic equality
assert type(expr) == EvalDerivative
assert type(expr) is EvalDerivative
expr1 = s_expr.func(*expr.args)
assert(expr1 == s_expr) # Exact equality

Expand Down
4 changes: 2 additions & 2 deletions tests/test_dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,8 +443,8 @@ def test_subdimmiddle_parallel(self, opt):
thickness_left=thickness, thickness_right=thickness)

# a 5 point stencil that can be computed in parallel
centre = Eq(u[t+1, xi, yi], u[t, xi, yi] + u[t, xi-1, yi]
+ u[t, xi+1, yi] + u[t, xi, yi-1] + u[t, xi, yi+1])
centre = Eq(u[t+1, xi, yi], u[t, xi, yi] + u[t, xi-1, yi] +
u[t, xi+1, yi] + u[t, xi, yi-1] + u[t, xi, yi+1])

u.data[0, 10, 10] = 1.0

Expand Down