From b85e4468f47cc8bc687e62f30a06b2e6757ecdb0 Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Tue, 21 Apr 2020 17:34:56 -0700 Subject: [PATCH] Unify centos7 Dockerfiles and remove install scripts (#18115) --- cd/mxnet_lib/mxnet_lib_pipeline.groovy | 4 +- cd/python/docker/Jenkins_pipeline.groovy | 4 +- cd/python/pypi/Jenkins_pipeline.groovy | 6 +- ci/Jenkinsfile_utils.groovy | 6 +- ci/build.py | 109 ++++++++------ ci/docker/Dockerfile.build.centos7 | 140 ++++++++++++++++++ ci/docker/Dockerfile.build.centos7_cpu | 41 ----- ci/docker/Dockerfile.build.centos7_gpu | 43 ------ ci/docker/Dockerfile.publish.centos7_cpu | 41 ----- .../Dockerfile.publish.centos7_gpu_cu100 | 43 ------ .../Dockerfile.publish.centos7_gpu_cu101 | 43 ------ .../Dockerfile.publish.centos7_gpu_cu102 | 43 ------ ci/docker/Dockerfile.publish.centos7_gpu_cu92 | 43 ------ ci/docker/docker-compose.yml | 84 +++++++++++ ci/docker/install/centos7_adduser.sh | 42 ------ ci/docker/install/centos7_ccache.sh | 43 ------ ci/docker/install/centos7_core.sh | 62 -------- ci/docker/install/centos7_cudnn.sh | 59 -------- ci/docker/install/centos7_nccl.sh | 35 ----- ci/docker/install/centos7_python.sh | 32 ---- ci/docker/install/centos7_scala.sh | 42 ------ ci/docker/runtime_functions.sh | 36 +++-- ci/docker_cache.py | 11 +- ci/jenkins/Jenkins_steps.groovy | 14 +- ci/publish/Jenkinsfile | 2 +- config/distribution/linux_cu92.cmake | 2 +- 26 files changed, 342 insertions(+), 688 deletions(-) create mode 100644 ci/docker/Dockerfile.build.centos7 delete mode 100644 ci/docker/Dockerfile.build.centos7_cpu delete mode 100644 ci/docker/Dockerfile.build.centos7_gpu delete mode 100644 ci/docker/Dockerfile.publish.centos7_cpu delete mode 100644 ci/docker/Dockerfile.publish.centos7_gpu_cu100 delete mode 100644 ci/docker/Dockerfile.publish.centos7_gpu_cu101 delete mode 100644 ci/docker/Dockerfile.publish.centos7_gpu_cu102 delete mode 100644 ci/docker/Dockerfile.publish.centos7_gpu_cu92 create mode 100644 ci/docker/docker-compose.yml delete mode 100755 ci/docker/install/centos7_adduser.sh delete mode 100755 ci/docker/install/centos7_ccache.sh delete mode 100755 ci/docker/install/centos7_core.sh delete mode 100755 ci/docker/install/centos7_cudnn.sh delete mode 100755 ci/docker/install/centos7_nccl.sh delete mode 100755 ci/docker/install/centos7_python.sh delete mode 100755 ci/docker/install/centos7_scala.sh diff --git a/cd/mxnet_lib/mxnet_lib_pipeline.groovy b/cd/mxnet_lib/mxnet_lib_pipeline.groovy index 1eab29d21229..c35db6d3f25f 100644 --- a/cd/mxnet_lib/mxnet_lib_pipeline.groovy +++ b/cd/mxnet_lib/mxnet_lib_pipeline.groovy @@ -75,9 +75,9 @@ def get_stash(mxnet_variant) { // The environment corresponds to the docker files in the 'docker' directory def get_environment(mxnet_variant) { if (mxnet_variant.startsWith("cu")) { - return "publish.centos7_gpu_${mxnet_variant}" + return "centos7_gpu_${mxnet_variant}" } - return "publish.centos7_cpu" + return "centos7_cpu" } // Returns the variant appropriate jenkins node test in which diff --git a/cd/python/docker/Jenkins_pipeline.groovy b/cd/python/docker/Jenkins_pipeline.groovy index 693acc540874..5d7a580860fa 100644 --- a/cd/python/docker/Jenkins_pipeline.groovy +++ b/cd/python/docker/Jenkins_pipeline.groovy @@ -32,9 +32,9 @@ def get_pipeline(mxnet_variant) { // The environment corresponds to the docker files in the 'docker' directory def get_environment(mxnet_variant) { if (mxnet_variant.startsWith("cu")) { - return "publish.centos7_gpu_${mxnet_variant}" + return "centos7_gpu_${mxnet_variant}" } - return "publish.centos7_cpu" + return "centos7_cpu" } diff --git a/cd/python/pypi/Jenkins_pipeline.groovy b/cd/python/pypi/Jenkins_pipeline.groovy index dfd864fa1a3b..0d813deca33d 100644 --- a/cd/python/pypi/Jenkins_pipeline.groovy +++ b/cd/python/pypi/Jenkins_pipeline.groovy @@ -36,9 +36,9 @@ def get_pipeline(mxnet_variant) { def get_environment(mxnet_variant) { if (mxnet_variant.startsWith('cu')) { - return "publish.centos7_gpu_${mxnet_variant}" + return "centos7_gpu_${mxnet_variant}" } - return "publish.centos7_cpu" + return "centos7_cpu" } def build(mxnet_variant) { @@ -58,7 +58,7 @@ def test(mxnet_variant) { // test wheel file def environment = get_environment(mxnet_variant) def nvidia_docker = mxnet_variant.startsWith('cu') - ci_utils.docker_run(environment, "cd_integration_test_pypi python3 ${nvidia_docker}", nvidia_docker) + ci_utils.docker_run(environment, "cd_integration_test_pypi ${nvidia_docker}", nvidia_docker) } } diff --git a/ci/Jenkinsfile_utils.groovy b/ci/Jenkinsfile_utils.groovy index bf6c2d740b14..4954963ff9c6 100644 --- a/ci/Jenkinsfile_utils.groovy +++ b/ci/Jenkinsfile_utils.groovy @@ -159,9 +159,11 @@ def collect_test_results_windows(original_file_name, new_file_name) { } -def docker_run(platform, function_name, use_nvidia, shared_mem = '500m', env_vars = "") { - def command = "ci/build.py %ENV_VARS% --docker-registry ${env.DOCKER_CACHE_REGISTRY} %USE_NVIDIA% --platform %PLATFORM% --docker-build-retries 3 --shm-size %SHARED_MEM% /work/runtime_functions.sh %FUNCTION_NAME%" +def docker_run(platform, function_name, use_nvidia = false, shared_mem = '500m', env_vars = "", + build_args = "") { + def command = "ci/build.py %ENV_VARS% %BUILD_ARGS% --docker-registry ${env.DOCKER_CACHE_REGISTRY} %USE_NVIDIA% --platform %PLATFORM% --docker-build-retries 3 --shm-size %SHARED_MEM% /work/runtime_functions.sh %FUNCTION_NAME%" command = command.replaceAll('%ENV_VARS%', env_vars.length() > 0 ? "-e ${env_vars}" : '') + command = command.replaceAll('%BUILD_ARGS%', env_vars.length() > 0 ? "${build_args}" : '') command = command.replaceAll('%USE_NVIDIA%', use_nvidia ? '--nvidiadocker' : '') command = command.replaceAll('%PLATFORM%', platform) command = command.replaceAll('%FUNCTION_NAME%', function_name) diff --git a/ci/build.py b/ci/build.py index c6a96d5626f0..179910bf9058 100755 --- a/ci/build.py +++ b/ci/build.py @@ -29,6 +29,7 @@ import glob import pprint import re +import os import shutil import signal import subprocess @@ -36,9 +37,15 @@ from subprocess import check_call, check_output from typing import * +import yaml + from safe_docker_run import SafeDockerClient from util import * +# NOTE: Temporary whitelist used until all Dockerfiles are refactored for docker compose +DOCKER_COMPOSE_WHITELIST = ('centos7_cpu', 'centos7_gpu_cu92', 'centos7_gpu_cu100', + 'centos7_gpu_cu101', 'centos7_gpu_cu102') + def get_dockerfiles_path(): return "docker" @@ -55,6 +62,11 @@ def get_platforms(path: str = get_dockerfiles_path()) -> List[str]: def get_docker_tag(platform: str, registry: str) -> str: """:return: docker tag to be used for the container""" + if platform in DOCKER_COMPOSE_WHITELIST: + with open("docker/docker-compose.yml", "r") as f: + compose_config = yaml.load(f.read(), yaml.SafeLoader) + return compose_config["services"][platform]["image"] + platform = platform if any(x in platform for x in ['build.', 'publish.']) else 'build.{}'.format(platform) if not registry: registry = "mxnet_local" @@ -66,50 +78,57 @@ def get_dockerfile(platform: str, path=get_dockerfiles_path()) -> str: return os.path.join(path, "Dockerfile.{0}".format(platform)) -def get_docker_binary(use_nvidia_docker: bool) -> str: - return "nvidia-docker" if use_nvidia_docker else "docker" - - -def build_docker(platform: str, docker_binary: str, registry: str, num_retries: int, no_cache: bool, +def build_docker(platform: str, registry: str, num_retries: int, no_cache: bool, cache_intermediate: bool = False) -> str: """ Build a container for the given platform :param platform: Platform - :param docker_binary: docker binary to use (docker/nvidia-docker) :param registry: Dockerhub registry name :param num_retries: Number of retries to build the docker image :param no_cache: pass no-cache to docker to rebuild the images :return: Id of the top level image """ tag = get_docker_tag(platform=platform, registry=registry) - logging.info("Building docker container tagged '%s' with %s", tag, docker_binary) - # - # We add a user with the same group as the executing non-root user so files created in the - # container match permissions of the local user. Same for the group. - # - # These variables are used in the docker files to create user and group with these ids. - # see: docker/install/ubuntu_adduser.sh - # - # cache-from is needed so we use the cached images tagged from the remote via - # docker pull see: docker_cache.load_docker_cache - # - # This also prevents using local layers for caching: https://github.com/moby/moby/issues/33002 - # So to use local caching, we should omit the cache-from by using --no-dockerhub-cache argument to this - # script. - # - # This doesn't work with multi head docker files. - # - cmd = [docker_binary, "build", - "-f", get_dockerfile(platform), - "--build-arg", "USER_ID={}".format(os.getuid()), - "--build-arg", "GROUP_ID={}".format(os.getgid())] - if no_cache: - cmd.append("--no-cache") - if cache_intermediate: - cmd.append("--rm=false") - elif registry: - cmd.extend(["--cache-from", tag]) - cmd.extend(["-t", tag, get_dockerfiles_path()]) + + # Case 1: docker-compose + if platform in DOCKER_COMPOSE_WHITELIST: + logging.info('Building docker container tagged \'%s\' based on ci/docker/docker-compose.yml', tag) + # We add a user with the same group as the executing non-root user so files created in the + # container match permissions of the local user. Same for the group. + cmd = ['docker-compose', '-f', 'docker/docker-compose.yml', 'build', + "--build-arg", "USER_ID={}".format(os.getuid()), + "--build-arg", "GROUP_ID={}".format(os.getgid())] + if cache_intermediate: + cmd.append('--no-rm') + cmd.append(platform) + else: # Case 2: Deprecated way, will be removed + # We add a user with the same group as the executing non-root user so files created in the + # container match permissions of the local user. Same for the group. + # + # These variables are used in the docker files to create user and group with these ids. + # see: docker/install/ubuntu_adduser.sh + # + # cache-from is needed so we use the cached images tagged from the remote via + # docker pull see: docker_cache.load_docker_cache + # + # This also prevents using local layers for caching: https://github.com/moby/moby/issues/33002 + # So to use local caching, we should omit the cache-from by using --no-dockerhub-cache argument to this + # script. + # + # This doesn't work with multi head docker files. + logging.info("Building docker container tagged '%s'", tag) + cmd = ["docker", "build", + "-f", get_dockerfile(platform), + "--build-arg", "USER_ID={}".format(os.getuid()), + "--build-arg", "GROUP_ID={}".format(os.getgid())] + if no_cache: + cmd.append("--no-cache") + if cache_intermediate: + cmd.append("--rm=false") + elif registry: + cmd.extend(["--cache-from", tag]) + cmd.extend(["-t", tag, get_dockerfiles_path()]) + @retry(subprocess.CalledProcessError, tries=num_retries) def run_cmd(): @@ -117,21 +136,22 @@ def run_cmd(): check_call(cmd) run_cmd() + # Get image id by reading the tag. It's guaranteed (except race condition) that the tag exists. Otherwise, the # check_call would have failed - image_id = _get_local_image_id(docker_binary=docker_binary, docker_tag=tag) + image_id = _get_local_image_id(docker_tag=tag) if not image_id: raise FileNotFoundError('Unable to find docker image id matching with {}'.format(tag)) return image_id -def _get_local_image_id(docker_binary, docker_tag): +def _get_local_image_id(docker_tag): """ Get the image id of the local docker layer with the passed tag :param docker_tag: docker tag :return: Image id as string or None if tag does not exist """ - cmd = [docker_binary, "images", "-q", docker_tag] + cmd = ["docker", "images", "-q", docker_tag] image_id_b = check_output(cmd) image_id = image_id_b.decode('utf-8').strip() if not image_id: @@ -196,7 +216,7 @@ def container_run(docker_client: SafeDockerClient, # Equivalent command docker_cmd_list = [ - get_docker_binary(nvidia_runtime), + "nvidia-docker" if nvidia_runtime else "docker", 'run', "--cap-add", "SYS_PTRACE", # Required by ASAN @@ -352,7 +372,6 @@ def main() -> int: args = parser.parse_args() command = list(chain(*args.command)) - docker_binary = get_docker_binary(args.nvidiadocker) docker_client = SafeDockerClient() environment = dict([(e.split('=')[:2] if '=' in e else (e, os.environ[e])) @@ -363,12 +382,12 @@ def main() -> int: elif args.platform: platform = args.platform tag = get_docker_tag(platform=platform, registry=args.docker_registry) - if args.docker_registry: + if args.docker_registry and platform not in DOCKER_COMPOSE_WHITELIST: + # Caching logic for Dockerfiles not yet refactored with compose load_docker_cache(tag=tag, docker_registry=args.docker_registry) if not args.run_only: - build_docker(platform=platform, docker_binary=docker_binary, registry=args.docker_registry, - num_retries=args.docker_build_retries, no_cache=args.no_cache, - cache_intermediate=args.cache_intermediate) + build_docker(platform=platform, registry=args.docker_registry, num_retries=args.docker_build_retries, + no_cache=args.no_cache, cache_intermediate=args.cache_intermediate) else: logging.info("Skipping docker build step.") @@ -410,8 +429,8 @@ def main() -> int: for platform in platforms: tag = get_docker_tag(platform=platform, registry=args.docker_registry) load_docker_cache(tag=tag, docker_registry=args.docker_registry) - build_docker(platform, docker_binary=docker_binary, registry=args.docker_registry, - num_retries=args.docker_build_retries, no_cache=args.no_cache) + build_docker(platform, registry=args.docker_registry, num_retries=args.docker_build_retries, + no_cache=args.no_cache) if args.build_only: continue shutil.rmtree(buildir(), ignore_errors=True) diff --git a/ci/docker/Dockerfile.build.centos7 b/ci/docker/Dockerfile.build.centos7 new file mode 100644 index 000000000000..ce74d9e896b6 --- /dev/null +++ b/ci/docker/Dockerfile.build.centos7 @@ -0,0 +1,140 @@ +# -*- mode: dockerfile -*- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Dockerfile declaring CentOS 7 related images. +# Via the CentOS 7 Dockerfiles, we ensure MXNet continues to run fine on older systems. +# +# See docker-compose.yml for supported BASE_IMAGE ARGs and targets. + +#################################################################################################### +# The Dockerfile uses a dynamic BASE_IMAGE (for examplecentos:7, nvidia/cuda:10.2-devel-centos7 etc) +# On top of BASE_IMAGE we install all dependencies shared by all MXNet build environments into a +# "base" target. At the end of this file, we specialize "base" for specific usecases. +# The target built by docker can be selected via "--target" option or docker-compose.yml +#################################################################################################### +ARG BASE_IMAGE +FROM $BASE_IMAGE AS base + +WORKDIR /work/deps + +RUN yum -y check-update || true && \ + yum -y install epel-release centos-release-scl && \ + yum install -y \ + # Utilities + wget \ + unzip \ + patchelf \ + pandoc \ + # Development tools + git \ + make \ + ninja-build \ + automake \ + autoconf \ + libtool \ + protobuf-compiler \ + protobuf-devel \ + # CentOS Software Collections https://www.softwarecollections.org + devtoolset-7 \ + rh-python35 \ + rh-maven35 \ + # Libraries + # Provide clbas headerfiles + atlas-devel \ + openblas-devel \ + lapack-devel \ + opencv-devel \ + openssl-devel \ + zeromq-devel \ + # Build-dependencies for ccache 3.7.9 + gperf \ + libb2-devel \ + libzstd-devel && \ + yum clean all && \ + # Centos 7 only provides ninja-build + ln -s /usr/bin/ninja-build /usr/bin/ninja + +# Make GCC7, Python 3.5 and Maven 3.3 Software Collections available by default +# during build and runtime of this container +SHELL [ "/usr/bin/scl", "enable", "devtoolset-7", "rh-python35", "rh-maven35" ] + +# Install minimum required cmake version +RUN cd /usr/local/src && \ + wget -nv https://cmake.org/files/v3.13/cmake-3.13.5-Linux-x86_64.sh && \ + sh cmake-3.13.5-Linux-x86_64.sh --prefix=/usr/local --skip-license && \ + rm cmake-3.13.5-Linux-x86_64.sh + +# ccache 3.7.9 has fixes for caching nvcc outputs +RUN cd /usr/local/src && \ + git clone --recursive https://github.com/ccache/ccache.git && \ + cd ccache && \ + git checkout v3.7.9 && \ + ./autogen.sh && \ + ./configure --disable-man && \ + make -j$(nproc) && \ + make install && \ + cd /usr/local/src && \ + rm -rf ccache + +# Python dependencies +RUN pip3 install --no-cache-dir --upgrade pip && \ + pip3 install --no-cache-dir nose pylint cython numpy nose-timer requests h5py scipy==1.2.3 wheel + + +ARG USER_ID=0 +# Add user in order to make sure the assumed user the container is running under +# actually exists inside the container to avoid problems like missing home dir +RUN if [[ "$USER_ID" -gt 0 ]]; then \ + # -no-log-init required due to https://github.com/moby/moby/issues/5419 + useradd -m --no-log-init --uid $USER_ID --system jenkins_slave; \ + usermod -aG wheel jenkins_slave; \ + # By default, docker creates all WORK_DIRs with root owner + mkdir /work/mxnet; \ + mkdir /work/build; \ + chown -R jenkins_slave /work/; \ + fi + +ENV PYTHONPATH=./python/ +WORKDIR /work/mxnet + +COPY runtime_functions.sh /work/ + +#################################################################################################### +# Specialize base image to install more gpu specific dependencies. +# The target built by docker can be selected via "--target" option or docker-compose.yml +#################################################################################################### +FROM base as gpu +# Different Cuda versions require different NCCL versions +# https://wiki.bash-hackers.org/syntax/pe#search_and_replace +RUN export SHORT_CUDA_VERSION=${CUDA_VERSION%.*} && \ + if [[ ${SHORT_CUDA_VERSION} == 9.2 ]]; then \ + export NCCL_VERSION=2.4.8; \ + elif [[ ${SHORT_CUDA_VERSION} == 10.* ]]; then \ + export NCCL_VERSION=2.6.4; \ + else \ + echo "ERROR: Cuda ${SHORT_CUDA_VERSION} not yet supported in Dockerfile.build.centos7"; \ + exit 1; \ + fi && \ + curl -fsSL https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm -O && \ + rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ + yum -y check-update || true && \ + yum -y install \ + libnccl-${NCCL_VERSION}-1+cuda${SHORT_CUDA_VERSION} \ + libnccl-devel-${NCCL_VERSION}-1+cuda${SHORT_CUDA_VERSION} \ + libnccl-static-${NCCL_VERSION}-1+cuda${SHORT_CUDA_VERSION} && \ + yum clean all diff --git a/ci/docker/Dockerfile.build.centos7_cpu b/ci/docker/Dockerfile.build.centos7_cpu deleted file mode 100644 index 0cfa5a9f6e47..000000000000 --- a/ci/docker/Dockerfile.build.centos7_cpu +++ /dev/null @@ -1,41 +0,0 @@ -# -*- mode: dockerfile -*- -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# Dockerfile to build and run MXNet on CentOS 7 for CPU - -FROM centos:7 - -WORKDIR /work/deps - -COPY install/centos7_core.sh /work/ -RUN /work/centos7_core.sh -COPY install/centos7_ccache.sh /work/ -RUN /work/centos7_ccache.sh -COPY install/centos7_python.sh /work/ -RUN /work/centos7_python.sh -COPY install/centos7_scala.sh /work/ -RUN /work/centos7_scala.sh - -ARG USER_ID=0 -COPY install/centos7_adduser.sh /work/ -RUN /work/centos7_adduser.sh - -ENV PYTHONPATH=./python/ -WORKDIR /work/mxnet - -COPY runtime_functions.sh /work/ diff --git a/ci/docker/Dockerfile.build.centos7_gpu b/ci/docker/Dockerfile.build.centos7_gpu deleted file mode 100644 index 7e49e88b3a52..000000000000 --- a/ci/docker/Dockerfile.build.centos7_gpu +++ /dev/null @@ -1,43 +0,0 @@ -# -*- mode: dockerfile -*- -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# Dockerfile to build and run MXNet on CentOS 7 for GPU - -FROM nvidia/cuda:10.1-devel-centos7 - -WORKDIR /work/deps - -COPY install/centos7_core.sh /work/ -RUN /work/centos7_core.sh -COPY install/centos7_ccache.sh /work/ -RUN /work/centos7_ccache.sh -COPY install/centos7_python.sh /work/ -RUN /work/centos7_python.sh - -ENV CUDNN_VERSION=7.6.0.64 -COPY install/centos7_cudnn.sh /work/ -RUN /work/centos7_cudnn.sh - -ARG USER_ID=0 -COPY install/centos7_adduser.sh /work/ -RUN /work/centos7_adduser.sh - -ENV PYTHONPATH=./python/ -WORKDIR /work/mxnet - -COPY runtime_functions.sh /work/ diff --git a/ci/docker/Dockerfile.publish.centos7_cpu b/ci/docker/Dockerfile.publish.centos7_cpu deleted file mode 100644 index 3a87f8e45fff..000000000000 --- a/ci/docker/Dockerfile.publish.centos7_cpu +++ /dev/null @@ -1,41 +0,0 @@ -# -*- mode: dockerfile -*- -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# Dockerfile to build and run MXNet on CentOS 7 for CPU - -FROM centos:7 - -WORKDIR /work/deps - -COPY install/centos7_core.sh /work/ -RUN /work/centos7_core.sh -COPY install/centos7_ccache.sh /work/ -RUN /work/centos7_ccache.sh -COPY install/centos7_python.sh /work/ -RUN /work/centos7_python.sh -COPY install/centos7_scala.sh /work/ -RUN /work/centos7_scala.sh - -ARG USER_ID=0 -COPY install/centos7_adduser.sh /work/ -RUN /work/centos7_adduser.sh - -ENV PYTHONPATH=./python/ -WORKDIR /work/mxnet - -COPY runtime_functions.sh /work/ diff --git a/ci/docker/Dockerfile.publish.centos7_gpu_cu100 b/ci/docker/Dockerfile.publish.centos7_gpu_cu100 deleted file mode 100644 index ce5aa3c8d468..000000000000 --- a/ci/docker/Dockerfile.publish.centos7_gpu_cu100 +++ /dev/null @@ -1,43 +0,0 @@ -# -*- mode: dockerfile -*- -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -FROM nvidia/cuda:10.0-cudnn7-devel-centos7 - -WORKDIR /work/deps - -COPY install/centos7_core.sh /work/ -RUN /work/centos7_core.sh -COPY install/centos7_ccache.sh /work/ -RUN /work/centos7_ccache.sh -COPY install/centos7_python.sh /work/ -RUN /work/centos7_python.sh -COPY install/centos7_scala.sh /work/ -RUN /work/centos7_scala.sh -ENV SHORT_CUDA_VERSION=10.0 -ENV SHORT_NCCL_VERSION=2.6.4 -COPY install/centos7_nccl.sh /work/ -RUN /work/centos7_nccl.sh - -ARG USER_ID=0 -COPY install/centos7_adduser.sh /work/ -RUN /work/centos7_adduser.sh - -ENV PYTHONPATH=./python/ -WORKDIR /work/mxnet - -COPY runtime_functions.sh /work/ diff --git a/ci/docker/Dockerfile.publish.centos7_gpu_cu101 b/ci/docker/Dockerfile.publish.centos7_gpu_cu101 deleted file mode 100644 index 2b207748db95..000000000000 --- a/ci/docker/Dockerfile.publish.centos7_gpu_cu101 +++ /dev/null @@ -1,43 +0,0 @@ -# -*- mode: dockerfile -*- -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -FROM nvidia/cuda:10.1-cudnn7-devel-centos7 - -WORKDIR /work/deps - -COPY install/centos7_core.sh /work/ -RUN /work/centos7_core.sh -COPY install/centos7_ccache.sh /work/ -RUN /work/centos7_ccache.sh -COPY install/centos7_python.sh /work/ -RUN /work/centos7_python.sh -COPY install/centos7_scala.sh /work/ -RUN /work/centos7_scala.sh -ENV SHORT_CUDA_VERSION=10.1 -ENV SHORT_NCCL_VERSION=2.6.4 -COPY install/centos7_nccl.sh /work/ -RUN /work/centos7_nccl.sh - -ARG USER_ID=0 -COPY install/centos7_adduser.sh /work/ -RUN /work/centos7_adduser.sh - -ENV PYTHONPATH=./python/ -WORKDIR /work/mxnet - -COPY runtime_functions.sh /work/ diff --git a/ci/docker/Dockerfile.publish.centos7_gpu_cu102 b/ci/docker/Dockerfile.publish.centos7_gpu_cu102 deleted file mode 100644 index 564ea01b887f..000000000000 --- a/ci/docker/Dockerfile.publish.centos7_gpu_cu102 +++ /dev/null @@ -1,43 +0,0 @@ -# -*- mode: dockerfile -*- -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -FROM nvidia/cuda:10.2-cudnn7-devel-centos7 - -WORKDIR /work/deps - -COPY install/centos7_core.sh /work/ -RUN /work/centos7_core.sh -COPY install/centos7_ccache.sh /work/ -RUN /work/centos7_ccache.sh -COPY install/centos7_python.sh /work/ -RUN /work/centos7_python.sh -COPY install/centos7_scala.sh /work/ -RUN /work/centos7_scala.sh -ENV SHORT_CUDA_VERSION=10.2 -ENV SHORT_NCCL_VERSION=2.6.4 -COPY install/centos7_nccl.sh /work/ -RUN /work/centos7_nccl.sh - -ARG USER_ID=0 -COPY install/centos7_adduser.sh /work/ -RUN /work/centos7_adduser.sh - -ENV PYTHONPATH=./python/ -WORKDIR /work/mxnet - -COPY runtime_functions.sh /work/ diff --git a/ci/docker/Dockerfile.publish.centos7_gpu_cu92 b/ci/docker/Dockerfile.publish.centos7_gpu_cu92 deleted file mode 100644 index 397954d63597..000000000000 --- a/ci/docker/Dockerfile.publish.centos7_gpu_cu92 +++ /dev/null @@ -1,43 +0,0 @@ -# -*- mode: dockerfile -*- -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -FROM nvidia/cuda:9.2-cudnn7-devel-centos7 - -WORKDIR /work/deps - -COPY install/centos7_core.sh /work/ -RUN /work/centos7_core.sh -COPY install/centos7_ccache.sh /work/ -RUN /work/centos7_ccache.sh -COPY install/centos7_python.sh /work/ -RUN /work/centos7_python.sh -COPY install/centos7_scala.sh /work/ -RUN /work/centos7_scala.sh -ENV SHORT_CUDA_VERSION=9.2 -ENV SHORT_NCCL_VERSION=2.4.8 -COPY install/centos7_nccl.sh /work/ -RUN /work/centos7_nccl.sh - -ARG USER_ID=0 -COPY install/centos7_adduser.sh /work/ -RUN /work/centos7_adduser.sh - -ENV PYTHONPATH=./python/ -WORKDIR /work/mxnet - -COPY runtime_functions.sh /work/ diff --git a/ci/docker/docker-compose.yml b/ci/docker/docker-compose.yml new file mode 100644 index 000000000000..4e6eb5492797 --- /dev/null +++ b/ci/docker/docker-compose.yml @@ -0,0 +1,84 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# We use the cache_from feature introduced in file form version 3.4 (released 2017-11-01) +version: "3.4" + +# For simplicity, only the centos7_cpu is commented. But the comments apply to +# all other services as well. +services: + centos7_cpu: + # The resulting image will be named build.centos7_cpu:latest and will be + # pushed to mxnetci under this name + image: build.centos7_cpu:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + # Use "base" target declared in Dockerfile.build.centos7 as "build.centos7_cpu:latest" + target: base + args: + # BASE_IMAGE is used to dynamically specify the FROM image in Dockerfile.build.centos7 + BASE_IMAGE: centos:7 + cache_from: + # Due to https://github.com/moby/moby/issues/32612, we have to specify + # the local image tag in in addition to the dockerhub tag. + - build.centos7_cpu:latest + - mxnetci/build.centos7_cpu:latest + centos7_gpu_cu92: + image: build.centos7_gpu_cu92:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + target: gpu + args: + BASE_IMAGE: nvidia/cuda:9.2-cudnn7-devel-centos7 + cache_from: + - build.centos7_gpu_cu92:latest + - mxnetci/build.centos7_gpu_cu92:latest + centos7_gpu_cu100: + image: build.centos7_gpu_cu100:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + target: gpu + args: + BASE_IMAGE: nvidia/cuda:10.0-cudnn7-devel-centos7 + cache_from: + - build.centos7_gpu_cu100:latest + - mxnetci/build.centos7_gpu_cu100:latest + centos7_gpu_cu101: + image: build.centos7_gpu_cu101:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + target: gpu + args: + BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-centos7 + cache_from: + - build.centos7_gpu_cu101:latest + - mxnetci/build.centos7_gpu_cu101:latest + centos7_gpu_cu102: + image: build.centos7_gpu_cu102:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + target: gpu + args: + BASE_IMAGE: nvidia/cuda:10.2-cudnn7-devel-centos7 + cache_from: + - build.centos7_gpu_cu102:latest + - mxnetci/build.centos7_gpu_cu102:latest diff --git a/ci/docker/install/centos7_adduser.sh b/ci/docker/install/centos7_adduser.sh deleted file mode 100755 index f9d2402c9554..000000000000 --- a/ci/docker/install/centos7_adduser.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Add user in order to make sure the assumed user the container is running under -# actually exists inside the container to avoid problems like missing home dir - - -set -ex - -# $USER_ID is coming from build.py:build_docker passed as --build-arg -if [[ "$USER_ID" -gt 0 ]] -then - # -no-log-init required due to https://github.com/moby/moby/issues/5419 - useradd -m --no-log-init --uid $USER_ID --system jenkins_slave - usermod -aG wheel jenkins_slave - - # By default, docker creates all WORK_DIRs with root owner - mkdir /work/mxnet - mkdir /work/build - chown -R jenkins_slave /work/ - - # Later on, we have to override the links because underlying build systems ignore our compiler settings. Thus, - # we have to give the process the proper permission to these files. This is hacky, but unfortunately - # there's no better way to do this without patching all our submodules. - chown -R jenkins_slave /usr/local/bin -fi diff --git a/ci/docker/install/centos7_ccache.sh b/ci/docker/install/centos7_ccache.sh deleted file mode 100755 index 955287b228e8..000000000000 --- a/ci/docker/install/centos7_ccache.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Script to build ccache for centos7 based images - -set -ex - -pushd . - -yum -y install autoconf libb2-devel libzstd-devel gperf - -mkdir -p /work/deps -cd /work/deps - -git clone --recursive https://github.com/ccache/ccache.git -cd ccache -git checkout v3.7.9 - -./autogen.sh -./configure --disable-man -make -j$(nproc) -make install - -cd /work/deps -rm -rf /work/deps/ccache - -popd diff --git a/ci/docker/install/centos7_core.sh b/ci/docker/install/centos7_core.sh deleted file mode 100755 index 119d172ff70f..000000000000 --- a/ci/docker/install/centos7_core.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image - -set -ex - -# Multipackage installation does not fail in yum -yum -y install epel-release -yum -y install git -yum -y install wget -yum -y install atlas-devel # Provide clbas headerfiles -yum -y install openblas-devel -yum -y install lapack-devel -yum -y install opencv-devel -yum -y install protobuf-compiler -yum -y install protobuf-devel -yum -y install zeromq-devel -yum -y install openssl-devel -yum -y install gcc-c++-4.8.* -yum -y install gcc-gfortran -yum -y install make -yum -y install wget -yum -y install unzip -yum -y install ninja-build -yum -y install automake -yum -y install autoconf -yum -y install libtool -yum -y install patchelf -yum -y install pandoc - -# gcc7 -yum -y install centos-release-scl -yum -y install devtoolset-7 - -# Centos 7 only provides ninja-build -ln -s /usr/bin/ninja-build /usr/bin/ninja - -# CMake 3.13.2+ is required -mkdir /opt/cmake && cd /opt/cmake -wget -nv https://cmake.org/files/v3.13/cmake-3.13.5-Linux-x86_64.sh -sh cmake-3.13.5-Linux-x86_64.sh --prefix=/opt/cmake --skip-license -ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake -rm cmake-3.13.5-Linux-x86_64.sh -cmake --version diff --git a/ci/docker/install/centos7_cudnn.sh b/ci/docker/install/centos7_cudnn.sh deleted file mode 100755 index 69f285017447..000000000000 --- a/ci/docker/install/centos7_cudnn.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image - -set -ex - -if [ -z ${CUDA_VERSION} ]; then - echo "Error: CUDA_VERSION environment variable undefined" - exit 1 -fi - -if [ -z ${CUDNN_VERSION} ]; then - echo "Error: CUDNN_VERSION environment variable undefined" - exit 1 -fi - -SHORT_CUDA_VERSION="" -SHORT_CUDNN_VERSION="" - -if [[ ${CUDA_VERSION} =~ ([0-9]+\.[0-9]+)\.* ]]; then - SHORT_CUDA_VERSION=${BASH_REMATCH[1]} -else - echo "Error: CUDA_VERSION (${CUDA_VERSION}) did not match expected format [0-9]+.[0-9]+.*" -fi - -if [[ ${CUDNN_VERSION} =~ ([0-9]+\.[0-9]+\.[0-9]+)\.* ]]; then - SHORT_CUDNN_VERSION=${BASH_REMATCH[1]} -else - echo "Error: CUDNN_VERSION (${CUDNN_VERSION}) did not match expected format [0-9]+.[0-9]+.[0-9]+.*" -fi - -# Multipackage installation does not fail in yum -CUDNN_PKG="cudnn-${SHORT_CUDA_VERSION}-linux-x64-v${CUDNN_VERSION}.tgz" -CUDNN_PKG_URL="http://developer.download.nvidia.com/compute/redist/cudnn/v${SHORT_CUDNN_VERSION}/${CUDNN_PKG}" -CUDNN_DOWNLOAD_SUM=`curl -fsSL "${CUDNN_PKG_URL}.sha256"` - -curl -fsSL ${CUDNN_PKG_URL} -O -echo "${CUDNN_DOWNLOAD_SUM}" | sha256sum -c - -tar --no-same-owner -xzf ${CUDNN_PKG} -C /usr/local -rm ${CUDNN_PKG} -ldconfig diff --git a/ci/docker/install/centos7_nccl.sh b/ci/docker/install/centos7_nccl.sh deleted file mode 100755 index 7a14f104b328..000000000000 --- a/ci/docker/install/centos7_nccl.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - - -if [ -z ${SHORT_CUDA_VERSION} ]; then - echo "Error: SHORT_CUDA_VERSION environment variable undefined" - exit 1 -fi -if [ -z ${SHORT_NCCL_VERSION} ]; then - echo "Error: SHORT_NCCL_VERSION environment variable undefined" - exit 1 -fi - -curl -fsSL https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm -O -rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm -yum check-update || true # exit code 100 in case of available updates -yum install -y libnccl-${SHORT_NCCL_VERSION}-1+cuda${SHORT_CUDA_VERSION} libnccl-devel-${SHORT_NCCL_VERSION}-1+cuda${SHORT_CUDA_VERSION} libnccl-static-${SHORT_NCCL_VERSION}-1+cuda${SHORT_CUDA_VERSION} diff --git a/ci/docker/install/centos7_python.sh b/ci/docker/install/centos7_python.sh deleted file mode 100755 index 06c53bea48c1..000000000000 --- a/ci/docker/install/centos7_python.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image - -set -ex - - # Python 2.7 is installed by default, install 3.6 on top -yum -y install https://centos7.iuscommunity.org/ius-release.rpm -yum -y install python36u - -# Install PIP -curl "https://bootstrap.pypa.io/get-pip.py" -o "get-pip.py" -python3.6 get-pip.py -pip3 install nose pylint numpy nose-timer requests h5py scipy==1.2.3 diff --git a/ci/docker/install/centos7_scala.sh b/ci/docker/install/centos7_scala.sh deleted file mode 100755 index df0d7a152bb1..000000000000 --- a/ci/docker/install/centos7_scala.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image - -set -ex - -yum install -y java-1.8.0-openjdk-devel -export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk -export PATH=$JAVA_HOME/bin:$PATH - -# Build from source with Maven -curl -o apache-maven-3.3.9-bin.tar.gz -L http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz \ - || curl -o apache-maven-3.3.9-bin.tar.gz -L https://search.maven.org/remotecontent?filepath=org/apache/maven/apache-maven/3.3.9/apache-maven-3.3.9-bin.tar.gz - -tar xzf apache-maven-3.3.9-bin.tar.gz -mkdir /usr/local/maven -mv apache-maven-3.3.9/ /usr/local/maven/ -alternatives --install /usr/bin/mvn mvn /usr/local/maven/apache-maven-3.3.9/bin/mvn 1 - -echo "export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk" >> /etc/profile.d/maven.sh -echo "export M3_HOME=/usr/local/src/apache-maven" >> /etc/profile.d/maven.sh -echo "export PATH=$M3_HOME/bin:$JAVA_HOME/bin:$PATH" >> /etc/profile.d/maven.sh -chmod +x /etc/profile.d/maven.sh -source /etc/profile.d/maven.sh diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 2b73f26135a1..b5ca88840384 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -983,6 +983,7 @@ sanity_check() { # $2 -> python_cmd: The python command to use to execute the tests, python or python3 cd_unittest_ubuntu() { set -ex + source /opt/rh/rh-python35/enable export PYTHONPATH=./python/ export MXNET_MKLDNN_DEBUG=0 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 @@ -1094,6 +1095,9 @@ unittest_ubuntu_tensorrt_gpu() { # need to separte it from unittest_ubuntu_python3_gpu() unittest_ubuntu_python3_quantization_gpu() { set -ex + if [ -f /etc/redhat-release ]; then + source /opt/rh/rh-python35/enable + fi export PYTHONPATH=./python/ export MXNET_MKLDNN_DEBUG=0 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 @@ -1107,6 +1111,7 @@ unittest_ubuntu_python3_quantization_gpu() { unittest_centos7_cpu_scala() { set -ex source /opt/rh/devtoolset-7/enable + source /opt/rh/rh-maven35/enable cd /work/mxnet scala_prepare cd scala-package @@ -1240,19 +1245,19 @@ unittest_ubuntu_cpu_julia10() { unittest_centos7_cpu() { set -ex - source /opt/rh/devtoolset-7/enable + source /opt/rh/rh-python35/enable cd /work/mxnet - python3.6 -m "nose" $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_unittest.xml --verbose tests/python/unittest - python3.6 -m "nose" $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_train.xml --verbose tests/python/train + python -m "nose" $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_unittest.xml --verbose tests/python/unittest + python -m "nose" $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_train.xml --verbose tests/python/train } unittest_centos7_gpu() { set -ex - source /opt/rh/devtoolset-7/enable + source /opt/rh/rh-python35/enable cd /work/mxnet export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3} export DMLC_LOG_STACK_TRACE_DEPTH=10 - python3.6 -m "nose" $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu + python3 -m "nose" $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu } integrationtest_ubuntu_cpu_onnx() { @@ -1942,6 +1947,7 @@ build_static_libmxnet() { set -ex pushd . source /opt/rh/devtoolset-7/enable + source /opt/rh/rh-python35/enable export USE_SYSTEM_CUDA=1 local mxnet_variant=${1:?"This function requires a python command as the first argument"} source tools/staticbuild/build.sh ${mxnet_variant} @@ -1953,6 +1959,7 @@ cd_package_pypi() { set -ex pushd . source /opt/rh/devtoolset-7/enable + source /opt/rh/rh-python35/enable local mxnet_variant=${1:?"This function requires a python command as the first argument"} ./cd/python/pypi/pypi_package.sh ${mxnet_variant} popd @@ -1961,8 +1968,9 @@ cd_package_pypi() { # Sanity checks wheel file cd_integration_test_pypi() { set -ex - local python_cmd=${1:?"This function requires a python command as the first argument"} - local gpu_enabled=${2:-"false"} + source /opt/rh/rh-python35/enable + + local gpu_enabled=${1:-"false"} local test_conv_params='' local mnist_params='' @@ -2008,6 +2016,7 @@ build_static_scala_cpu() { export MAVEN_PUBLISH_OS_TYPE=linux-x86_64-cpu export mxnet_variant=cpu source /opt/rh/devtoolset-7/enable + source /opt/rh/rh-maven35/enable ./ci/publish/scala/build.sh popd } @@ -2017,16 +2026,18 @@ build_static_python_cpu() { pushd . export mxnet_variant=cpu source /opt/rh/devtoolset-7/enable + source /opt/rh/rh-python35/enable ./ci/publish/python/build.sh popd } -build_static_python_cu101() { +build_static_python_cu92() { set -ex pushd . - export mxnet_variant=cu101 + export mxnet_variant=cu92 export USE_SYSTEM_CUDA=1 source /opt/rh/devtoolset-7/enable + source /opt/rh/rh-python35/enable ./ci/publish/python/build.sh popd } @@ -2037,17 +2048,19 @@ build_static_python_cpu_cmake() { export mxnet_variant=cpu export CMAKE_STATICBUILD=1 source /opt/rh/devtoolset-7/enable + source /opt/rh/rh-python35/enable ./ci/publish/python/build.sh popd } -build_static_python_cu101_cmake() { +build_static_python_cu92_cmake() { set -ex pushd . - export mxnet_variant=cu101 + export mxnet_variant=cu92 export CMAKE_STATICBUILD=1 export USE_SYSTEM_CUDA=1 source /opt/rh/devtoolset-7/enable + source /opt/rh/rh-python35/enable ./ci/publish/python/build.sh popd } @@ -2057,6 +2070,7 @@ publish_scala_build() { pushd . scala_prepare source /opt/rh/devtoolset-7/enable + source /opt/rh/rh-maven35/enable export USE_SYSTEM_CUDA=1 ./ci/publish/scala/build.sh popd diff --git a/ci/docker_cache.py b/ci/docker_cache.py index 254d6237d6e2..ec73e93fdb7c 100755 --- a/ci/docker_cache.py +++ b/ci/docker_cache.py @@ -74,8 +74,15 @@ def _build_save_container(platform, registry, load_cache) -> Optional[str]: :param load_cache: Load cache before building :return: Platform if failed, None otherwise """ - docker_tag = build_util.get_docker_tag(platform=platform, registry=registry) + # Case 1: docker-compose + if platform in build_util.DOCKER_COMPOSE_WHITELIST: + build_util.build_docker(platform=platform, registry=registry, num_retries=10, no_cache=False) + push_cmd = ['docker-compose', 'push', platform] + subprocess.check_call(push_cmd) + return None + # Case 2: Deprecated way, will be removed + docker_tag = build_util.get_docker_tag(platform=platform, registry=registry) # Preload cache if load_cache: load_docker_cache(registry=registry, docker_tag=docker_tag) @@ -84,7 +91,7 @@ def _build_save_container(platform, registry, load_cache) -> Optional[str]: logging.debug('Building %s as %s', platform, docker_tag) try: # Increase the number of retries for building the cache. - image_id = build_util.build_docker(docker_binary='docker', platform=platform, registry=registry, num_retries=10, no_cache=False) + image_id = build_util.build_docker(platform=platform, registry=registry, num_retries=10, no_cache=False) logging.info('Built %s as %s', docker_tag, image_id) # Push cache to registry diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index 450036ce811b..bfa517364e35 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -411,7 +411,7 @@ def compile_centos7_gpu() { ws('workspace/build-centos7-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('centos7_gpu', 'build_centos7_gpu', false) + utils.docker_run('centos7_gpu_cu92', 'build_centos7_gpu', false) utils.pack_lib('centos7_gpu', mx_lib) } } @@ -739,7 +739,7 @@ def test_static_scala_cpu() { ws('workspace/ut-publish-scala-cpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run("publish.centos7_cpu", 'build_static_scala_cpu', false) + utils.docker_run('centos7_cpu', 'build_static_scala_cpu', false) } } } @@ -752,7 +752,7 @@ def test_static_python_cpu() { ws('workspace/ut-publish-python-cpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run("publish.centos7_cpu", 'build_static_python_cpu', false) + utils.docker_run('centos7_cpu', 'build_static_python_cpu', false) } } } @@ -765,7 +765,7 @@ def test_static_python_cpu_cmake() { ws('workspace/ut-publish-python-cpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run("publish.centos7_cpu", 'build_static_python_cpu_cmake', false) + utils.docker_run('centos7_cpu', 'build_static_python_cpu_cmake', false) } } } @@ -778,7 +778,7 @@ def test_static_python_gpu() { ws('workspace/ut-publish-python-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run("publish.centos7_gpu_cu101", 'build_static_python_cu101', true) + utils.docker_run('centos7_gpu_cu92', 'build_static_python_cu92') } } } @@ -791,7 +791,7 @@ def test_static_python_gpu_cmake() { ws('workspace/ut-publish-python-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run("publish.centos7_gpu_cu101", 'build_static_python_cu101_cmake', true) + utils.docker_run('centos7_gpu_cu92', 'build_static_python_cu92_cmake') } } } @@ -1316,7 +1316,7 @@ def test_centos7_python3_gpu() { timeout(time: max_time, unit: 'MINUTES') { try { utils.unpack_and_init('centos7_gpu', mx_lib) - utils.docker_run('centos7_gpu', 'unittest_centos7_gpu', true) + utils.docker_run('centos7_gpu_cu92', 'unittest_centos7_gpu', true) utils.publish_test_coverage() } finally { utils.collect_test_results_unix('nosetests_gpu.xml', 'nosetests_python3_centos7_gpu.xml') diff --git a/ci/publish/Jenkinsfile b/ci/publish/Jenkinsfile index 366758d85665..b522bb008c60 100644 --- a/ci/publish/Jenkinsfile +++ b/ci/publish/Jenkinsfile @@ -57,7 +57,7 @@ for (x in labels) { toBuild["Scala Build ${label}"] = wrapStep(nodeMap['cpu'], "build-scala-${label}") { withEnv(["MAVEN_PUBLISH_OS_TYPE=${scalaOSMap[label]}", "mxnet_variant=${scalaVariantMap[label]}"]) { utils.init_git() - utils.docker_run("publish.centos7_cpu", 'publish_scala_build', false, '500m', 'MAVEN_PUBLISH_OS_TYPE mxnet_variant') + utils.docker_run("centos7_cpu", 'publish_scala_build', false, '500m', 'MAVEN_PUBLISH_OS_TYPE mxnet_variant') utils.pack_lib("scala_${label}", mx_scala_pub, false) } } diff --git a/config/distribution/linux_cu92.cmake b/config/distribution/linux_cu92.cmake index 74f31c8ae031..8499421f91ec 100644 --- a/config/distribution/linux_cu92.cmake +++ b/config/distribution/linux_cu92.cmake @@ -33,4 +33,4 @@ set(USE_F16C OFF CACHE BOOL "Build with x86 F16C instruction support") set(USE_LIBJPEG_TURBO ON CACHE BOOL "Build with libjpeg-turbo") set(CUDACXX "/usr/local/cuda-9.2/bin/nvcc" CACHE STRING "Cuda compiler") -set(MXNET_CUDA_ARCH "3.0;5.0;6.0;7.0;7.2" CACHE STRING "Cuda architectures") +set(MXNET_CUDA_ARCH "3.0;5.0;6.0;7.0" CACHE STRING "Cuda architectures")