From 67bbf5a7b35a6e6e57061b81fee9899a9ddde87b Mon Sep 17 00:00:00 2001 From: Jake Lee Date: Thu, 18 Jul 2019 17:29:12 -0700 Subject: [PATCH] [Dependency Update] Bump up the CI Nvidia docker to CUDA 10.1 (#14986) * bump up the nvidia docker to CUDA 10.1 * change the func name * change the groovy file * clean up code & create cu101 docker file * add cuda 10.1 * update the miss groovy * change description * move up the cudnn installation * update label & func for nightly build * update to cuda 10.1 * upgrade to build cu101mkl * do not use nvidia docker in build static test * fix cuda_patch_typo * use nvidia docker * fix the right config * use nvidia docker and install latest cuDNN * fix the typo * use default cudnn --- ci/docker/Dockerfile.build.centos7_gpu | 2 +- ci/docker/Dockerfile.build.ubuntu_base_gpu | 2 +- ci/docker/Dockerfile.build.ubuntu_build_cuda | 2 +- ci/docker/Dockerfile.build.ubuntu_gpu_cu101 | 82 +++++++++++++++++++ ci/docker/Dockerfile.build.ubuntu_nightly_gpu | 2 +- ci/docker/Dockerfile.publish.ubuntu1404_gpu | 2 +- ci/docker/install/ubuntu_cudnn.sh | 4 + ci/docker/install/ubuntu_nvidia.sh | 2 +- ci/docker/runtime_functions.sh | 8 +- ci/jenkins/Jenkins_steps.groovy | 44 +++++----- tests/nightly/JenkinsfileForBinaries | 4 +- tools/setup_gpu_build_tools.sh | 2 +- 12 files changed, 121 insertions(+), 35 deletions(-) create mode 100644 ci/docker/Dockerfile.build.ubuntu_gpu_cu101 diff --git a/ci/docker/Dockerfile.build.centos7_gpu b/ci/docker/Dockerfile.build.centos7_gpu index 1a927c4d5832..7e49e88b3a52 100644 --- a/ci/docker/Dockerfile.build.centos7_gpu +++ b/ci/docker/Dockerfile.build.centos7_gpu @@ -18,7 +18,7 @@ # # Dockerfile to build and run MXNet on CentOS 7 for GPU -FROM nvidia/cuda:10.0-devel-centos7 +FROM nvidia/cuda:10.1-devel-centos7 WORKDIR /work/deps diff --git a/ci/docker/Dockerfile.build.ubuntu_base_gpu b/ci/docker/Dockerfile.build.ubuntu_base_gpu index 40e1da657203..94e6437e578b 100644 --- a/ci/docker/Dockerfile.build.ubuntu_base_gpu +++ b/ci/docker/Dockerfile.build.ubuntu_base_gpu @@ -19,7 +19,7 @@ # Dockerfile to run the MXNet Installation Tests on Ubuntu 16.04 # This should run in an empty docker with ubuntu and cuda. -FROM nvidia/cuda:10.0-devel-ubuntu16.04 +FROM nvidia/cuda:10.1-devel-ubuntu16.04 WORKDIR /work/deps diff --git a/ci/docker/Dockerfile.build.ubuntu_build_cuda b/ci/docker/Dockerfile.build.ubuntu_build_cuda index f568fbc386eb..47f1d1f9ca58 100644 --- a/ci/docker/Dockerfile.build.ubuntu_build_cuda +++ b/ci/docker/Dockerfile.build.ubuntu_build_cuda @@ -21,7 +21,7 @@ # package generation, requiring the actual CUDA library to be # present -FROM nvidia/cuda:10.0-devel-ubuntu16.04 +FROM nvidia/cuda:10.1-devel-ubuntu16.04 WORKDIR /work/deps diff --git a/ci/docker/Dockerfile.build.ubuntu_gpu_cu101 b/ci/docker/Dockerfile.build.ubuntu_gpu_cu101 new file mode 100644 index 000000000000..32f0a0a8d862 --- /dev/null +++ b/ci/docker/Dockerfile.build.ubuntu_gpu_cu101 @@ -0,0 +1,82 @@ +# -*- mode: dockerfile -*- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Dockerfile to run MXNet on Ubuntu 16.04 for GPU + +FROM nvidia/cuda:10.1-devel-ubuntu16.04 + +WORKDIR /work/deps + +COPY install/ubuntu_core.sh /work/ +RUN /work/ubuntu_core.sh + +COPY install/deb_ubuntu_ccache.sh /work/ +RUN /work/deb_ubuntu_ccache.sh + +COPY install/ubuntu_python.sh /work/ +RUN /work/ubuntu_python.sh + +COPY install/ubuntu_scala.sh /work/ +COPY install/sbt.gpg /work/ +RUN /work/ubuntu_scala.sh + +COPY install/ubuntu_r.sh /work/ +COPY install/r.gpg /work/ +RUN /work/ubuntu_r.sh + +COPY install/ubuntu_perl.sh /work/ +RUN /work/ubuntu_perl.sh + +COPY install/ubuntu_clang.sh /work/ +RUN /work/ubuntu_clang.sh + +COPY install/ubuntu_mklml.sh /work/ +RUN /work/ubuntu_mklml.sh + +COPY install/ubuntu_tvm.sh /work/ +RUN /work/ubuntu_tvm.sh + +COPY install/ubuntu_llvm.sh /work/ +RUN /work/ubuntu_llvm.sh + +COPY install/ubuntu_caffe.sh /work/ +RUN /work/ubuntu_caffe.sh + +COPY install/ubuntu_onnx.sh /work/ +RUN /work/ubuntu_onnx.sh + +COPY install/ubuntu_docs.sh /work/ +COPY install/docs_requirements /work/ +RUN /work/ubuntu_docs.sh + +COPY install/ubuntu_tutorials.sh /work/ +RUN /work/ubuntu_tutorials.sh + +ENV CUDNN_VERSION=7.5.1.10 +COPY install/ubuntu_cudnn.sh /work/ +RUN /work/ubuntu_cudnn.sh + +# Always last +ARG USER_ID=0 +ARG GROUP_ID=0 +COPY install/ubuntu_adduser.sh /work/ +RUN /work/ubuntu_adduser.sh + +COPY runtime_functions.sh /work/ + +WORKDIR /work/mxnet diff --git a/ci/docker/Dockerfile.build.ubuntu_nightly_gpu b/ci/docker/Dockerfile.build.ubuntu_nightly_gpu index a667f7b7a94f..275a5a54fc66 100644 --- a/ci/docker/Dockerfile.build.ubuntu_nightly_gpu +++ b/ci/docker/Dockerfile.build.ubuntu_nightly_gpu @@ -18,7 +18,7 @@ # # Dockerfile to run MXNet on Ubuntu 16.04 for CPU -FROM nvidia/cuda:10.0-devel-ubuntu16.04 +FROM nvidia/cuda:10.1-devel-ubuntu16.04 WORKDIR /work/deps diff --git a/ci/docker/Dockerfile.publish.ubuntu1404_gpu b/ci/docker/Dockerfile.publish.ubuntu1404_gpu index 9855986a2891..4d9fa819a39e 100644 --- a/ci/docker/Dockerfile.publish.ubuntu1404_gpu +++ b/ci/docker/Dockerfile.publish.ubuntu1404_gpu @@ -18,7 +18,7 @@ # # Dockerfile to run MXNet on Ubuntu 14.04 for GPU -FROM ubuntu:14.04 +FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu14.04 WORKDIR /work/deps diff --git a/ci/docker/install/ubuntu_cudnn.sh b/ci/docker/install/ubuntu_cudnn.sh index 3d260046b5e7..b773fea0f82f 100755 --- a/ci/docker/install/ubuntu_cudnn.sh +++ b/ci/docker/install/ubuntu_cudnn.sh @@ -32,6 +32,10 @@ fi apt-get update || true case ${CUDA_VERSION} in + 10\.1*) + export libcudnn7_version="${CUDNN_VERSION}-1+cuda10.1" + export libcudnn7_dev_version="${CUDNN_VERSION}-1+cuda10.1" + ;; 10\.0*) export libcudnn7_version="${CUDNN_VERSION}-1+cuda10.0" export libcudnn7_dev_version="${CUDNN_VERSION}-1+cuda10.0" diff --git a/ci/docker/install/ubuntu_nvidia.sh b/ci/docker/install/ubuntu_nvidia.sh index 36eb21b8a03e..41f68a21858b 100755 --- a/ci/docker/install/ubuntu_nvidia.sh +++ b/ci/docker/install/ubuntu_nvidia.sh @@ -22,4 +22,4 @@ set -ex # Retrieve ppa:graphics-drivers and install nvidia-drivers. # Note: DEBIAN_FRONTEND required to skip the interactive setup steps apt update -DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends cuda-10-0 +DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends cuda-10-1 diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index d2b0d6b0e274..2518f4c7c64f 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -579,7 +579,7 @@ build_ubuntu_cpu_mkldnn_mkl() { } build_ubuntu_gpu() { - build_ubuntu_gpu_cuda100_cudnn7 + build_ubuntu_gpu_cuda101_cudnn7 } build_ubuntu_gpu_tensorrt() { @@ -679,7 +679,7 @@ build_ubuntu_gpu_mkldnn_nocudnn() { -j$(nproc) } -build_ubuntu_gpu_cuda100_cudnn7() { +build_ubuntu_gpu_cuda101_cudnn7() { set -ex build_ccache_wrappers make \ @@ -1460,10 +1460,10 @@ build_static_python_mkl() { popd } -build_static_python_cu100mkl() { +build_static_python_cu101mkl() { set -ex pushd . - export mxnet_variant=cu100mkl + export mxnet_variant=cu101mkl ./ci/publish/python/build.sh popd } diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index 40700ad22bb1..c27a61383e46 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -153,7 +153,7 @@ def compile_unix_int64_gpu() { ws('workspace/build-gpu-int64') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_gpu_cu100', 'build_ubuntu_gpu_large_tensor', false) + utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_large_tensor', false) utils.pack_lib('ubuntu_gpu_int64', mx_cmake_lib, true) } } @@ -232,12 +232,12 @@ def compile_unix_mkldnn_nocudnn_gpu() { } def compile_unix_full_gpu() { - return ['GPU: CUDA10.0+cuDNN7': { + return ['GPU: CUDA10.1+cuDNN7': { node(NODE_LINUX_CPU) { ws('workspace/build-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda100_cudnn7', false) + utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7', false) utils.pack_lib('gpu', mx_lib_cpp_examples, true) } } @@ -251,7 +251,7 @@ def compile_unix_cmake_mkldnn_gpu() { ws('workspace/build-cmake-mkldnn-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_gpu_cu100', 'build_ubuntu_gpu_cmake_mkldnn', false) + utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_mkldnn', false) utils.pack_lib('cmake_mkldnn_gpu', mx_cmake_mkldnn_lib, true) } } @@ -265,7 +265,7 @@ def compile_unix_cmake_gpu() { ws('workspace/build-cmake-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_gpu_cu100', 'build_ubuntu_gpu_cmake', false) + utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake', false) utils.pack_lib('cmake_gpu', mx_cmake_lib_cython, true) } } @@ -631,7 +631,7 @@ def test_static_python_gpu() { ws('workspace/ut-publish-python-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run("publish.ubuntu1404_gpu", 'build_static_python_cu100mkl', true) + utils.docker_run("publish.ubuntu1404_gpu", 'build_static_python_cu101mkl', true) } } } @@ -662,7 +662,7 @@ def test_unix_python2_gpu() { ws('workspace/ut-python2-gpu') { try { utils.unpack_and_init('gpu', mx_lib, true) - python2_gpu_ut('ubuntu_gpu_cu100') + python2_gpu_ut('ubuntu_gpu_cu101') utils.publish_test_coverage() } finally { utils.collect_test_results_unix('nosetests_gpu.xml', 'nosetests_python2_gpu.xml') @@ -679,7 +679,7 @@ def test_unix_python2_quantize_gpu() { timeout(time: max_time, unit: 'MINUTES') { try { utils.unpack_and_init('gpu', mx_lib, true) - utils.docker_run('ubuntu_gpu_cu100', 'unittest_ubuntu_python2_quantization_gpu', true) + utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_python2_quantization_gpu', true) utils.publish_test_coverage() } finally { utils.collect_test_results_unix('nosetests_quantization_gpu.xml', 'nosetests_python2_quantize_gpu.xml') @@ -696,7 +696,7 @@ def test_unix_python2_mkldnn_gpu() { ws('workspace/ut-python2-mkldnn-gpu') { try { utils.unpack_and_init('mkldnn_gpu', mx_mkldnn_lib, true) - python2_gpu_ut('ubuntu_gpu_cu100') + python2_gpu_ut('ubuntu_gpu_cu101') utils.publish_test_coverage() } finally { utils.collect_test_results_unix('nosetests_gpu.xml', 'nosetests_python2_mkldnn_gpu.xml') @@ -746,7 +746,7 @@ def test_unix_python3_gpu() { ws('workspace/ut-python3-gpu') { try { utils.unpack_and_init('gpu', mx_lib_cython, true) - python3_gpu_ut_cython('ubuntu_gpu_cu100') + python3_gpu_ut_cython('ubuntu_gpu_cu101') utils.publish_test_coverage() } finally { utils.collect_test_results_unix('nosetests_gpu.xml', 'nosetests_python3_gpu.xml') @@ -763,7 +763,7 @@ def test_unix_python3_quantize_gpu() { timeout(time: max_time, unit: 'MINUTES') { try { utils.unpack_and_init('gpu', mx_lib, true) - utils.docker_run('ubuntu_gpu_cu100', 'unittest_ubuntu_python3_quantization_gpu', true) + utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_python3_quantization_gpu', true) utils.publish_test_coverage() } finally { utils.collect_test_results_unix('nosetests_quantization_gpu.xml', 'nosetests_python3_quantize_gpu.xml') @@ -848,7 +848,7 @@ def test_unix_python3_mkldnn_gpu() { ws('workspace/ut-python3-mkldnn-gpu') { try { utils.unpack_and_init('mkldnn_gpu', mx_mkldnn_lib, true) - python3_gpu_ut('ubuntu_gpu_cu100') + python3_gpu_ut('ubuntu_gpu_cu101') utils.publish_test_coverage() } finally { utils.collect_test_results_unix('nosetests_gpu.xml', 'nosetests_python3_mkldnn_gpu.xml') @@ -864,7 +864,7 @@ def test_unix_python3_mkldnn_nocudnn_gpu() { ws('workspace/ut-python3-mkldnn-gpu-nocudnn') { try { utils.unpack_and_init('mkldnn_gpu_nocudnn', mx_mkldnn_lib, true) - python3_gpu_ut_nocudnn('ubuntu_gpu_cu100') + python3_gpu_ut_nocudnn('ubuntu_gpu_cu101') utils.publish_test_coverage() } finally { utils.collect_test_results_unix('nosetests_gpu.xml', 'nosetests_python3_mkldnn_gpu_nocudnn.xml') @@ -898,7 +898,7 @@ def test_unix_python3_integration_gpu() { ws('workspace/it-python-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib, true) - utils.docker_run('ubuntu_gpu_cu100', 'integrationtest_ubuntu_gpu_python', true) + utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_python', true) utils.publish_test_coverage() } } @@ -913,7 +913,7 @@ def test_unix_caffe_gpu() { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() utils.unpack_lib('gpu', mx_lib) - utils.docker_run('ubuntu_gpu_cu100', 'integrationtest_ubuntu_gpu_caffe', true) + utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_caffe', true) utils.publish_test_coverage() } } @@ -927,7 +927,7 @@ def test_unix_cpp_package_gpu() { ws('workspace/it-cpp-package') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib_cpp_examples, true) - utils.docker_run('ubuntu_gpu_cu100', 'integrationtest_ubuntu_gpu_cpp_package', true) + utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_cpp_package', true) utils.publish_test_coverage() } } @@ -969,7 +969,7 @@ def test_unix_scala_gpu() { ws('workspace/ut-scala-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib, true) - utils.docker_run('ubuntu_gpu_cu100', 'integrationtest_ubuntu_gpu_scala', true) + utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_scala', true) utils.publish_test_coverage() } } @@ -1052,7 +1052,7 @@ def test_unix_cpp_gpu() { ws('workspace/ut-cpp-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('cmake_gpu', mx_cmake_lib, true) - utils.docker_run('ubuntu_gpu_cu100', 'unittest_cpp', true) + utils.docker_run('ubuntu_gpu_cu101', 'unittest_cpp', true) utils.publish_test_coverage() } } @@ -1066,7 +1066,7 @@ def test_unix_cpp_mkldnn_gpu() { ws('workspace/ut-cpp-mkldnn-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('cmake_mkldnn_gpu', mx_cmake_mkldnn_lib, true) - utils.docker_run('ubuntu_gpu_cu100', 'unittest_cpp', true) + utils.docker_run('ubuntu_gpu_cu101', 'unittest_cpp', true) utils.publish_test_coverage() } } @@ -1094,7 +1094,7 @@ def test_unix_perl_gpu() { ws('workspace/ut-perl-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib, true) - utils.docker_run('ubuntu_gpu_cu100', 'unittest_ubuntu_cpugpu_perl', true) + utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_cpugpu_perl', true) utils.publish_test_coverage() } } @@ -1108,7 +1108,7 @@ def test_unix_r_gpu() { ws('workspace/ut-r-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib, true) - utils.docker_run('ubuntu_gpu_cu100', 'unittest_ubuntu_gpu_R', true) + utils.docker_run('ubuntu_gpu_cu101', 'unittest_ubuntu_gpu_R', true) utils.publish_test_coverage() } } @@ -1176,7 +1176,7 @@ def test_unix_distributed_kvstore_gpu() { ws('workspace/it-dist-kvstore') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init('gpu', mx_lib, true) - utils.docker_run('ubuntu_gpu_cu100', 'integrationtest_ubuntu_gpu_dist_kvstore', true) + utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_dist_kvstore', true) utils.publish_test_coverage() } } diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries index d5f1ebdd6fef..725c76969e23 100755 --- a/tests/nightly/JenkinsfileForBinaries +++ b/tests/nightly/JenkinsfileForBinaries @@ -31,11 +31,11 @@ utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_ utils.main_wrapper( core_logic: { stage('Build') { - parallel 'GPU: CUDA9.1+cuDNN7': { + parallel 'GPU: CUDA10.1+cuDNN7': { node(NODE_LINUX_CPU) { ws('workspace/build-gpu') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda100_cudnn7', false) + utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7', false) utils.pack_lib('gpu', mx_lib) } } diff --git a/tools/setup_gpu_build_tools.sh b/tools/setup_gpu_build_tools.sh index 724af3c90ec1..b25be6b697e1 100755 --- a/tools/setup_gpu_build_tools.sh +++ b/tools/setup_gpu_build_tools.sh @@ -29,7 +29,7 @@ DEPS_PATH=$2 >&2 echo "Setting CUDA versions for $VARIANT" if [[ $VARIANT == cu101* ]]; then CUDA_VERSION='10.1.105-1' - CUDA_PATCH_VERSION='10.1.105-1' + CUDA_PATCH_VERSION='10.1.0.105-1' LIBCUDA_VERSION='418.39-0ubuntu1' LIBCUDNN_VERSION='7.6.0.64-1+cuda10.1' LIBNCCL_VERSION='2.4.7-1+cuda10.1'