[v1.7.x] update jetson dockerfile to support CUDA 10.0 (#18339)

* update dockerfile for jetson * add toolchain files * update build_jetson function * update ubuntu_julia.sh * update FindCUDAToolkit.cmake * Update centos7_python.sh * revert changes on ubuntu_julia.sh * disable TVM for gpu build * Disable TVM_OP on GPU builds Co-authored-by: Wei Chu <[email protected]> Co-authored-by: Leonard Lausen <[email protected]>
apache · May 26, 2020 · 1eefe66 · 1eefe66
1 parent c4d9270
commit 1eefe66
Show file tree

Hide file tree

Showing 8 changed files with 267 additions and 219 deletions.
diff --git a/ci/docker/Dockerfile.build.jetson b/ci/docker/Dockerfile.build.jetson
@@ -20,68 +20,58 @@
 # This script assumes /work/mxnet exists and contains the mxnet code you wish to compile and
 # that /work/build exists and is the target for your output.
 
-FROM nvidia/cuda:9.0-cudnn7-devel as cudabuilder
+FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
 
-FROM dockcross/linux-arm64
+ENV ARCH=aarch64 \
+    HOSTCC=gcc \
+    TARGET=ARMV8
 
-ENV ARCH aarch64
-ENV HOSTCC gcc
-ENV TARGET ARMV8
+WORKDIR /usr/local
 
-# gh issue #11567 https://github.com/apache/incubator-mxnet/issues/11567
-#RUN sed -i '\#deb http://cdn-fastly.deb.debian.org/debian-security jessie/updates main#d' /etc/apt/sources.list
-#RUN sed -i 's/cdn-fastly.//' /etc/apt/sources.list
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    build-essential \
+    ninja-build \
+    git \
+    curl \
+    zip \
+    unzip \
+    python3 \
+    python3-pip \
+    awscli \
+    crossbuild-essential-arm64 \
+ && rm -rf /var/lib/apt/lists/*
 
+# cmake on Ubuntu 18.04 is too old
+RUN python3 -m pip install cmake
 
-WORKDIR /work/deps
-
-COPY install/ubuntu_arm.sh /work/
-RUN /work/ubuntu_arm.sh
-
-COPY install/arm_openblas.sh /work/
-RUN /work/arm_openblas.sh
-
-ENV OpenBLAS_HOME=${CROSS_ROOT}
-ENV OpenBLAS_DIR=${CROSS_ROOT}
-
+# ccache on Ubuntu 18.04 is too old to support Cuda correctly
 COPY install/deb_ubuntu_ccache.sh /work/
 RUN /work/deb_ubuntu_ccache.sh
 
-# Setup CUDA build env (including configuring and copying nvcc)
-COPY --from=cudabuilder /usr/local/cuda /usr/local/cuda
-ENV TARGET_ARCH aarch64
-ENV TARGET_OS linux
+COPY toolchains/aarch64-linux-gnu-toolchain.cmake /usr
+ENV CMAKE_TOOLCHAIN_FILE=/usr/aarch64-linux-gnu-toolchain.cmake
+
+RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \
+    cd /usr/local/OpenBLAS && \
+    make NOFORTRAN=1 CC=aarch64-linux-gnu-gcc && \
+    make PREFIX=/usr/aarch64-linux-gnu install && \
+    cd /usr/local && \
+    rm -rf OpenBLAS
 
-# Install ARM depedencies based on Jetpack 3.3
-RUN JETPACK_DOWNLOAD_PREFIX=https://developer.download.nvidia.com/devzone/devcenter/mobile/jetpack_l4t/3.3/lw.xd42/JetPackL4T_33_b39 && \
-    CUDA_REPO_PREFIX=/var/cuda-repo-9-0-local && \
-    ARM_CUDA_INSTALLER_PACKAGE=cuda-repo-l4t-9-0-local_9.0.252-1_arm64.deb && \
-    ARM_CUDNN_INSTALLER_PACKAGE=libcudnn7_7.1.5.14-1+cuda9.0_arm64.deb && \
-    ARM_CUDNN_DEV_INSTALLER_PACKAGE=libcudnn7-dev_7.1.5.14-1+cuda9.0_arm64.deb && \
-    ARM_LICENSE_INSTALLER=cuda-license-9-0_9.0.252-1_arm64.deb && \
-    ARM_CUBLAS_INSTALLER=cuda-cublas-9-0_9.0.252-1_arm64.deb && \
-    ARM_NVINFER_INSTALLER_PACKAGE=libnvinfer4_4.1.3-1+cuda9.0_arm64.deb && \
-    ARM_NVINFER_DEV_INSTALLER_PACKAGE=libnvinfer-dev_4.1.3-1+cuda9.0_arm64.deb && \
-    dpkg --add-architecture arm64 && \
-    wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDA_INSTALLER_PACKAGE && \
-    wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDNN_INSTALLER_PACKAGE && \
-    wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDNN_DEV_INSTALLER_PACKAGE && \
-    wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_NVINFER_INSTALLER_PACKAGE && \
-    wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_NVINFER_DEV_INSTALLER_PACKAGE && \
-    dpkg -i --force-architecture  $ARM_CUDA_INSTALLER_PACKAGE && \
-    apt-key add $CUDA_REPO_PREFIX/7fa2af80.pub && \
-    dpkg -i --force-architecture  $ARM_CUDNN_INSTALLER_PACKAGE && \
-    dpkg -i --force-architecture  $ARM_CUDNN_DEV_INSTALLER_PACKAGE && \
-    dpkg -i --force-architecture  $CUDA_REPO_PREFIX/$ARM_LICENSE_INSTALLER && \
-    dpkg -i --force-architecture  $CUDA_REPO_PREFIX/$ARM_CUBLAS_INSTALLER && \
-    dpkg -i --force-architecture  $ARM_NVINFER_INSTALLER_PACKAGE && \
-    dpkg -i --force-architecture  $ARM_NVINFER_DEV_INSTALLER_PACKAGE && \
-    apt update -y || true && apt install -y cuda-libraries-dev-9-0 libcudnn7-dev libnvinfer-dev
-RUN ln -s /usr/include/aarch64-linux-gnu/cudnn_v7.h /usr/include/aarch64-linux-gnu/cudnn.h
-ENV PATH $PATH:/usr/local/cuda/bin
-ENV NVCCFLAGS "-m64"
-ENV CUDA_ARCH "-gencode arch=compute_53,code=sm_53 -gencode arch=compute_62,code=sm_62"
-ENV NVCC /usr/local/cuda/bin/nvcc
+# Install aarch64 cross depedencies based on Jetpack 4.3
+# Manually downloaded using SDK Manager tool and placed in a private S3 bucket.
+# We're not allowed to redistribute these files and there is no public version.
+RUN aws s3 cp s3://mxnet-ci-prod-private-slave-data/nvidia/sdkm_downloads/cuda-repo-ubuntu1804-10-0-local-10.0.326-410.108_1.0-1_amd64.deb . && \
+    dpkg -i cuda-repo-ubuntu1804-10-0-local-10.0.326-410.108_1.0-1_amd64.deb && \
+    rm cuda-repo-ubuntu1804-10-0-local-10.0.326-410.108_1.0-1_amd64.deb && \
+    apt-key add /var/cuda-repo-10-0-local-10.0.326-410.108/7fa2af80.pub && \
+    aws s3 cp s3://mxnet-ci-prod-private-slave-data/nvidia/sdkm_downloads/cuda-repo-cross-aarch64-10-0-local-10.0.326_1.0-1_all.deb . && \
+    dpkg -i cuda-repo-cross-aarch64-10-0-local-10.0.326_1.0-1_all.deb && \
+    rm cuda-repo-cross-aarch64-10-0-local-10.0.326_1.0-1_all.deb && \
+    apt-get update && \
+    apt-get install -y -f && \
+    apt-get install -y cuda-cross-aarch64 cuda-cross-aarch64-10-0 && \
+    rm -rf /var/lib/apt/lists/*
 
 ARG USER_ID=0
 ARG GROUP_ID=0

diff --git a/ci/docker/install/centos7_python.sh b/ci/docker/install/centos7_python.sh
@@ -23,7 +23,7 @@
 set -ex
 
  # Python 2.7 is installed by default, install 3.6 on top
-yum -y install https://centos7.iuscommunity.org/ius-release.rpm
+yum -y install https://repo.ius.io/ius-release-el7.rpm
 yum -y install python36u
 
 # Install PIP

diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
@@ -248,15 +248,22 @@ build_dynamic_libmxnet() {
 
 build_jetson() {
     set -ex
-    pushd .
-
-    #build_ccache_wrappers
-
-    cp make/crosscompile.jetson.mk ./config.mk
-    make -j$(nproc)
-
-    build_wheel /work/mxnet/python /work/mxnet/lib
-    popd
+    cd /work/build
+    cmake \
+        -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \
+        -DUSE_CUDA=ON \
+        -DMXNET_CUDA_ARCH="5.2" \
+        -DENABLE_CUDA_RTC=OFF \
+        -DSUPPORT_F16C=OFF \
+        -DUSE_OPENCV=OFF \
+        -DUSE_OPENMP=ON \
+        -DUSE_LAPACK=OFF \
+        -DUSE_SIGNAL_HANDLER=ON \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DUSE_MKL_IF_AVAILABLE=OFF \
+        -G Ninja /work/mxnet
+    ninja
+    build_wheel
 }
 
 #
@@ -745,7 +752,7 @@ build_ubuntu_gpu_mkldnn() {
         USE_CUDA=1                                \
         USE_CUDA_PATH=/usr/local/cuda             \
         USE_CUDNN=1                               \
-        USE_TVM_OP=1                              \
+        USE_TVM_OP=0                              \
         CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \
         USE_SIGNAL_HANDLER=1                      \
         -j$(nproc)
@@ -762,7 +769,7 @@ build_ubuntu_gpu_mkldnn_nocudnn() {
         USE_CUDA=1                                \
         USE_CUDA_PATH=/usr/local/cuda             \
         USE_CUDNN=0                               \
-        USE_TVM_OP=1                              \
+        USE_TVM_OP=0                              \
         CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \
         USE_SIGNAL_HANDLER=1                      \
         -j$(nproc)
@@ -778,7 +785,7 @@ build_ubuntu_gpu_cuda101_cudnn7() {
         USE_CUDA=1                                \
         USE_CUDA_PATH=/usr/local/cuda             \
         USE_CUDNN=1                               \
-        USE_TVM_OP=1                              \
+        USE_TVM_OP=0                              \
         USE_CPP_PACKAGE=1                         \
         USE_DIST_KVSTORE=1                        \
         CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \
@@ -807,26 +814,6 @@ build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test() {
     make cython PYTHON=python3
 }
 
-build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op() {
-    set -ex
-    build_ccache_wrappers
-    make \
-        DEV=1                                     \
-        USE_BLAS=openblas                         \
-        USE_MKLDNN=0                              \
-        USE_CUDA=1                                \
-        USE_CUDA_PATH=/usr/local/cuda             \
-        USE_CUDNN=1                               \
-        USE_TVM_OP=0                              \
-        USE_CPP_PACKAGE=1                         \
-        USE_DIST_KVSTORE=1                        \
-        CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \
-        USE_SIGNAL_HANDLER=1                      \
-        -j$(nproc)
-
-    make cython PYTHON=python3
-}
-
 build_ubuntu_amalgamation() {
     set -ex
     # Amalgamation can not be run with -j nproc
@@ -854,7 +841,7 @@ build_ubuntu_gpu_cmake_mkldnn() {
         -DUSE_SIGNAL_HANDLER=ON                 \
         -DUSE_CUDA=1                            \
         -DUSE_CUDNN=1                           \
-        -DUSE_TVM_OP=1                          \
+        -DUSE_TVM_OP=0                          \
         -DPython3_EXECUTABLE=/usr/bin/python3   \
         -DUSE_MKLML_MKL=1                       \
         -DCMAKE_BUILD_TYPE=Release              \
@@ -873,7 +860,7 @@ build_ubuntu_gpu_cmake() {
         -DUSE_SIGNAL_HANDLER=ON                 \
         -DUSE_CUDA=ON                           \
         -DUSE_CUDNN=ON                          \
-        -DUSE_TVM_OP=ON                         \
+        -DUSE_TVM_OP=OFF                        \
         -DPython3_EXECUTABLE=/usr/bin/python3   \
         -DUSE_MKL_IF_AVAILABLE=OFF              \
         -DUSE_MKLML_MKL=OFF                     \
@@ -896,7 +883,7 @@ build_ubuntu_gpu_cmake_no_rtc() {
         -DUSE_SIGNAL_HANDLER=ON                 \
         -DUSE_CUDA=ON                           \
         -DUSE_CUDNN=ON                          \
-        -DUSE_TVM_OP=ON                         \
+        -DUSE_TVM_OP=OFF                        \
         -DPython3_EXECUTABLE=/usr/bin/python3   \
         -DUSE_MKL_IF_AVAILABLE=OFF              \
         -DUSE_MKLML_MKL=OFF                     \
@@ -912,29 +899,6 @@ build_ubuntu_gpu_cmake_no_rtc() {
     ninja
 }
 
-build_ubuntu_gpu_cmake_no_tvm_op() {
-    set -ex
-    cd /work/build
-    build_ccache_wrappers
-    cmake \
-        -DUSE_SIGNAL_HANDLER=ON                 \
-        -DUSE_CUDA=ON                           \
-        -DUSE_CUDNN=ON                          \
-        -DUSE_TVM_OP=OFF                        \
-        -DPython3_EXECUTABLE=/usr/bin/python3   \
-        -DUSE_MKL_IF_AVAILABLE=OFF              \
-        -DUSE_MKLML_MKL=OFF                     \
-        -DUSE_MKLDNN=OFF                        \
-        -DUSE_DIST_KVSTORE=ON                   \
-        -DCMAKE_BUILD_TYPE=Release              \
-        -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
-        -DBUILD_CYTHON_MODULES=1                \
-        -G Ninja                                \
-        /work/mxnet
-
-    ninja
-}
-
 build_ubuntu_cpu_large_tensor() {
     set -ex
     cd /work/build
@@ -960,7 +924,7 @@ build_ubuntu_gpu_large_tensor() {
         -DUSE_SIGNAL_HANDLER=ON                 \
         -DUSE_CUDA=ON                           \
         -DUSE_CUDNN=ON                          \
-        -DUSE_TVM_OP=ON                         \
+        -DUSE_TVM_OP=OFF                        \
         -DPython3_EXECUTABLE=/usr/bin/python3   \
         -DUSE_MKL_IF_AVAILABLE=OFF              \
         -DUSE_MKLML_MKL=OFF                     \

diff --git a/ci/docker/toolchains/aarch64-linux-gnu-toolchain.cmake b/ci/docker/toolchains/aarch64-linux-gnu-toolchain.cmake
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR "aarch64")
+set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc)
+set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++)
+set(CMAKE_CUDA_HOST_COMPILER aarch64-linux-gnu-gcc)
+set(CMAKE_FIND_ROOT_PATH "/usr/aarch64-linux-gnu")
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
diff --git a/ci/docker/toolchains/arm-linux-gnueabihf-toolchain.cmake b/ci/docker/toolchains/arm-linux-gnueabihf-toolchain.cmake
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR "armv7l")
+set(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc)
+set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++)
+set(CMAKE_FIND_ROOT_PATH "/usr/arm-linux-gnueabihf" "/usr/local/arm-linux-gnueabihf")
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
@@ -256,20 +256,6 @@ def compile_unix_full_gpu_mkldnn_cpp_test() {
     }]
 }
 
-def compile_unix_full_gpu_no_tvm_op() {
-    return ['GPU: CUDA10.1+cuDNN7 TVM_OP OFF': {
-      node(NODE_LINUX_CPU) {
-        ws('workspace/build-gpu-no-tvm-op') {
-          timeout(time: max_time, unit: 'MINUTES') {
-            utils.init_git()
-            utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op', false)
-            utils.pack_lib('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op)
-          }
-        }
-      }
-    }]
-}
-
 def compile_unix_cmake_mkldnn_gpu() {
     return ['GPU: CMake MKLDNN': {
       node(NODE_LINUX_CPU) {
@@ -298,19 +284,6 @@ def compile_unix_cmake_gpu() {
     }]
 }
 
-def compile_unix_cmake_gpu_no_tvm_op() {
-    return ['GPU: CMake TVM_OP OFF': {
-      node(NODE_LINUX_CPU) {
-        ws('workspace/build-cmake-gpu-no-tvm-op') {
-          timeout(time: max_time, unit: 'MINUTES') {
-            utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_tvm_op', false)
-          }
-        }
-      }
-    }]
-}
-
 def compile_unix_cmake_gpu_no_rtc() {
     return ['GPU: CMake CUDA RTC OFF': {
         node(NODE_LINUX_CPU) {
@@ -765,22 +738,6 @@ def test_unix_python3_gpu() {
     }]
 }
 
-def test_unix_python3_gpu_no_tvm_op() {
-    return ['Python3: GPU TVM_OP OFF': {
-      node(NODE_LINUX_GPU) {
-        ws('workspace/ut-python3-gpu-no-tvm-op') {
-          try {
-            utils.unpack_and_init('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op)
-            python3_gpu_ut_cython('ubuntu_gpu_cu101')
-            utils.publish_test_coverage()
-          } finally {
-            utils.collect_test_results_unix('nosetests_gpu.xml', 'nosetests_python3_gpu.xml')
-          }
-        }
-      }
-    }]
-}
-
 def test_unix_python3_quantize_gpu() {
     return ['Python3: Quantize GPU': {
       node(NODE_LINUX_GPU_P3) {