From a1428987663e60b7b847a3797053ac994947ea89 Mon Sep 17 00:00:00 2001 From: Manu Seth <22492939+mseth10@users.noreply.github.com> Date: Tue, 6 Jul 2021 12:35:03 -0700 Subject: [PATCH 1/2] [v1.x] use centos7 base image for aarch64 build (#20392) * add centos7 aarch64 build * enable rh packages * update openssl curl version * install rhel7 armpl * find armpl cmake * test mkldnn build * test ninja * use armpl compiled with gcc-8 * enable py38 for unittests * check previously failing tests * create symlink for ninja * fix cd pypi * fix cd docker * update ssl for mac --- cd/mxnet_lib/Jenkins_pipeline.groovy | 2 +- cd/mxnet_lib/mxnet_lib_pipeline.groovy | 2 +- cd/python/docker/Dockerfile | 6 +- cd/python/docker/Jenkins_pipeline.groovy | 2 +- cd/python/pypi/Jenkins_pipeline.groovy | 4 +- .../Dockerfile.build.centos7_aarch64_cpu | 111 ++++++++++++++++++ .../Dockerfile.publish.ubuntu1804_aarch64_cpu | 39 ------ ci/docker/install/requirements_aarch64 | 2 - ci/docker/install/ubuntu_aarch64_publish.sh | 93 --------------- ci/docker/runtime_functions.sh | 24 +++- cmake/Modules/FindArmPL.cmake | 4 +- tests/python/unittest/test_ndarray.py | 3 - .../unittest/test_numpy_interoperability.py | 2 - tests/python/unittest/test_numpy_op.py | 2 - tests/python/unittest/test_profiler.py | 4 +- tools/dependencies/curl.sh | 6 +- tools/dependencies/openssl.sh | 2 +- tools/staticbuild/build_lib_cmake.sh | 6 +- 18 files changed, 150 insertions(+), 164 deletions(-) create mode 100644 ci/docker/Dockerfile.build.centos7_aarch64_cpu delete mode 100644 ci/docker/Dockerfile.publish.ubuntu1804_aarch64_cpu delete mode 100755 ci/docker/install/ubuntu_aarch64_publish.sh diff --git a/cd/mxnet_lib/Jenkins_pipeline.groovy b/cd/mxnet_lib/Jenkins_pipeline.groovy index f34988d1d1f1..ac372c621206 100644 --- a/cd/mxnet_lib/Jenkins_pipeline.groovy +++ b/cd/mxnet_lib/Jenkins_pipeline.groovy @@ -47,7 +47,7 @@ def build(mxnet_variant) { node(NODE_LINUX_AARCH64_CPU) { ws("workspace/mxnet_${libtype}/${mxnet_variant}/${env.BUILD_NUMBER}") { ci_utils.init_git() - ci_utils.docker_run('publish.ubuntu1804_aarch64_cpu', "build_static_libmxnet ${mxnet_variant}", false) + ci_utils.docker_run('centos7_aarch64_cpu', "build_static_libmxnet ${mxnet_variant}", false) ci_utils.pack_lib("mxnet_${mxnet_variant}", libmxnet_pipeline.get_stash(mxnet_variant)) } } diff --git a/cd/mxnet_lib/mxnet_lib_pipeline.groovy b/cd/mxnet_lib/mxnet_lib_pipeline.groovy index 5ca1bb1c6d00..9c154a911f97 100644 --- a/cd/mxnet_lib/mxnet_lib_pipeline.groovy +++ b/cd/mxnet_lib/mxnet_lib_pipeline.groovy @@ -76,7 +76,7 @@ def get_stash(mxnet_variant) { // The environment corresponds to the docker files in the 'docker' directory def get_environment(mxnet_variant) { if (mxnet_variant.startsWith("aarch64")) { - return "publish.ubuntu1804_aarch64_cpu" + return "centos7_aarch64_cpu" } else if (mxnet_variant.startsWith("cu")) { // Remove 'mkl' suffix from variant to properly format test environment return "ubuntu_gpu_${mxnet_variant.replace('mkl', '')}" diff --git a/cd/python/docker/Dockerfile b/cd/python/docker/Dockerfile index 28d85cff1f11..16d20832eb0c 100644 --- a/cd/python/docker/Dockerfile +++ b/cd/python/docker/Dockerfile @@ -36,12 +36,12 @@ RUN apt-get install -y libgomp1 ARG MXNET_VARIANT RUN if [ "$MXNET_VARIANT" = "aarch64_cpu" ] ; then echo "not installing libquadmath0 on aarch64" ; else apt-get install -y libquadmath0 ; fi -RUN if [ "$MXNET_VARIANT" = "aarch64_cpu" ] ; \ - then wget https://armkeil.blob.core.windows.net/developer/Files/downloads/hpc/arm-performance-libraries/21-0-0/Ubuntu18.04/arm-performance-libraries_21.0_Ubuntu-18.04_gcc-8.2.tar && \ +RUN if [ "$MXNET_VARIANT" = "aarch64_cpu" ] ; then \ + wget https://armkeil.blob.core.windows.net/developer/Files/downloads/hpc/arm-performance-libraries/21-0-0/Ubuntu18.04/arm-performance-libraries_21.0_Ubuntu-18.04_gcc-8.2.tar && \ tar -xvf arm-performance-libraries_21.0_Ubuntu-18.04_gcc-8.2.tar && \ arm-performance-libraries_21.0_Ubuntu-18.04_gcc-8.2/arm-performance-libraries_21.0_Ubuntu-18.04.sh -a; \ fi -ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/arm/armpl_21.0_gcc-8.2/lib/ +ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/arm/armpl_21.0_gcc-8.2/lib ARG MXNET_COMMIT_ID ENV MXNET_COMMIT_ID=${MXNET_COMMIT_ID} diff --git a/cd/python/docker/Jenkins_pipeline.groovy b/cd/python/docker/Jenkins_pipeline.groovy index bf3df36556b6..46fc64714cc3 100644 --- a/cd/python/docker/Jenkins_pipeline.groovy +++ b/cd/python/docker/Jenkins_pipeline.groovy @@ -42,7 +42,7 @@ def get_pipeline(mxnet_variant) { // The environment corresponds to the docker files in the 'docker' directory def get_environment(mxnet_variant) { if (mxnet_variant.startsWith('aarch64')) { - return "publish.ubuntu1804_aarch64_cpu" + return "centos7_aarch64_cpu" } if (mxnet_variant.startsWith('cu')) { return "ubuntu_gpu_${mxnet_variant}" diff --git a/cd/python/pypi/Jenkins_pipeline.groovy b/cd/python/pypi/Jenkins_pipeline.groovy index dfb3018b4500..8b2d37ff3090 100644 --- a/cd/python/pypi/Jenkins_pipeline.groovy +++ b/cd/python/pypi/Jenkins_pipeline.groovy @@ -48,7 +48,7 @@ def get_pipeline(mxnet_variant) { // The environment corresponds to the docker files in the 'docker' directory def get_environment(mxnet_variant) { if (mxnet_variant.startsWith('aarch64')) { - return "publish.ubuntu1804_aarch64_cpu" + return "centos7_aarch64_cpu" } if (mxnet_variant.startsWith('cu')) { return "ubuntu_gpu_${mxnet_variant}" @@ -73,7 +73,7 @@ def test(mxnet_variant) { // test wheel file def environment = get_environment(mxnet_variant) def nvidia_docker = mxnet_variant.startsWith('cu') - ci_utils.docker_run(environment, "cd_integration_test_pypi python3 ${nvidia_docker}", nvidia_docker) + ci_utils.docker_run(environment, "cd_integration_test_pypi ${mxnet_variant} ${nvidia_docker}", nvidia_docker) } } diff --git a/ci/docker/Dockerfile.build.centos7_aarch64_cpu b/ci/docker/Dockerfile.build.centos7_aarch64_cpu new file mode 100644 index 000000000000..36415107ae6e --- /dev/null +++ b/ci/docker/Dockerfile.build.centos7_aarch64_cpu @@ -0,0 +1,111 @@ +# -*- mode: dockerfile -*- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Dockerfile for CentOS 7 AArch64 CPU build. +# Via the CentOS 7 Dockerfiles, we ensure MXNet continues to run fine on older systems. + +FROM arm64v8/centos:7 + +WORKDIR /work/deps + +RUN yum -y check-update || true && \ + yum -y install epel-release centos-release-scl centos-release-scl-rh && \ + yum install -y \ + # Utilities + wget \ + unzip \ + patchelf \ + pandoc \ + # Development tools + git \ + make \ + ninja-build \ + automake \ + autoconf \ + libtool \ + protobuf-compiler \ + protobuf-devel \ + # CentOS Software Collections https://www.softwarecollections.org + devtoolset-10 \ + devtoolset-10-gcc \ + devtoolset-10-gcc-c++ \ + devtoolset-10-gcc-gfortran \ + rh-python38 \ + rh-python38-python-numpy \ + rh-python38-python-scipy \ + # Libraries + opencv-devel \ + openssl-devel \ + zeromq-devel \ + # Build-dependencies for ccache 3.7.9 + gperf \ + libb2-devel \ + libzstd-devel && \ + yum clean all + +# Make Red Hat Developer Toolset 10.0 and Python 3.8 Software Collections available by default +# during the following build steps in this Dockerfile +SHELL [ "/usr/bin/scl", "enable", "devtoolset-10", "rh-python38" ] + +# Install minimum required cmake version +RUN cd /usr/local/src && \ + wget -nv https://cmake.org/files/v3.20/cmake-3.20.5-linux-aarch64.sh && \ + sh cmake-3.20.5-linux-aarch64.sh --prefix=/usr/local --skip-license && \ + rm cmake-3.20.5-linux-aarch64.sh + +# ccache 3.7.9 has fixes for caching nvcc outputs +RUN cd /usr/local/src && \ + git clone --recursive https://github.com/ccache/ccache.git && \ + cd ccache && \ + git checkout v3.7.9 && \ + ./autogen.sh && \ + ./configure --disable-man && \ + make -j$(nproc) && \ + make install && \ + cd /usr/local/src && \ + rm -rf ccache + +# Arm Performance Libraries 21.0 +RUN cd /usr/local/src && \ + wget https://armkeil.blob.core.windows.net/developer/Files/downloads/hpc/arm-performance-libraries/21-0-0/RHEL7/arm-performance-libraries_21.0_RHEL-7_gcc-8.2.tar && \ + tar -xvf arm-performance-libraries_21.0_RHEL-7_gcc-8.2.tar && \ + arm-performance-libraries_21.0_RHEL-7_gcc-8.2/arm-performance-libraries_21.0_RHEL-7.sh -a && \ + rm -rf arm-performance-libraries_21.0_RHEL-7_gcc-8.2.tar arm-performance-libraries_21.0_RHEL-7_gcc-8.2 +ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/arm/armpl_21.0_gcc-8.2/lib + +# Fix the en_DK.UTF-8 locale to test locale invariance +RUN localedef -i en_DK -f UTF-8 en_DK.UTF-8 + +# Python dependencies +RUN python3 -m pip install --upgrade pip +COPY install/requirements_aarch64 /work/ +RUN python3 -m pip install -r /work/requirements_aarch64 + +ARG USER_ID=0 +COPY install/centos7_adduser.sh /work/ +RUN /work/centos7_adduser.sh + +ENV PYTHONPATH=./python/ +# Verify that MXNet works correctly when the C locale is set to a locale that uses a comma as the +# decimal separator. Please see #16134 for an example of a bug caused by incorrect handling of +# number serialization and deserialization. +ENV LC_NUMERIC=en_DK.UTF-8 +WORKDIR /work/mxnet + +COPY runtime_functions.sh /work/ + diff --git a/ci/docker/Dockerfile.publish.ubuntu1804_aarch64_cpu b/ci/docker/Dockerfile.publish.ubuntu1804_aarch64_cpu deleted file mode 100644 index f4da407fc7d8..000000000000 --- a/ci/docker/Dockerfile.publish.ubuntu1804_aarch64_cpu +++ /dev/null @@ -1,39 +0,0 @@ -# -*- mode: dockerfile -*- -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# Dockerfile to build and run MXNet on Ubuntu 18.04 for CPU - -FROM arm64v8/ubuntu:18.04 - -WORKDIR /work/deps - -COPY install/ubuntu_aarch64_publish.sh /work/ -RUN /work/ubuntu_aarch64_publish.sh -ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/arm/armpl_21.0_gcc-8.2/lib/ -ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/gcc-8.5.0/lib64 -ENV PATH=${PATH}:/usr/local/gcc-8.5.0/bin - -ARG USER_ID=0 -ARG GROUP_ID=0 -COPY install/ubuntu_adduser.sh /work/ -RUN /work/ubuntu_adduser.sh - -COPY runtime_functions.sh /work/ - -WORKDIR /work/mxnet -ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib diff --git a/ci/docker/install/requirements_aarch64 b/ci/docker/install/requirements_aarch64 index c3662196a2fd..327a78f7d960 100644 --- a/ci/docker/install/requirements_aarch64 +++ b/ci/docker/install/requirements_aarch64 @@ -25,10 +25,8 @@ decorator==4.4.0 mock==2.0.0 nose==1.3.7 nose-timer==0.7.3 -numpy pylint==2.3.1 # pylint and astroid need to be aligned astroid==2.3.3 # pylint and astroid need to be aligned requests<2.19.0,>=2.18.4 -scipy setuptools coverage diff --git a/ci/docker/install/ubuntu_aarch64_publish.sh b/ci/docker/install/ubuntu_aarch64_publish.sh deleted file mode 100755 index 78165265187f..000000000000 --- a/ci/docker/install/ubuntu_aarch64_publish.sh +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Build on Ubuntu 18.04 LTS for LINUX CPU/GPU -set -ex - -apt-get update -apt-get install -y software-properties-common -add-apt-repository ppa:ubuntu-toolchain-r/test -y -add-apt-repository ppa:openjdk-r/ppa -y # Java lib -apt-get update -apt-get install -y git \ - build-essential \ - ninja-build \ - libssl-dev \ - libcurl4-openssl-dev \ - ccache \ - unzip \ - libtool \ - curl \ - wget \ - sudo \ - gnupg \ - gnupg2 \ - gnupg-agent \ - libc6-lse \ - pandoc \ - python3 \ - python3-pip \ - automake \ - pkg-config \ - openjdk-8-jdk \ - patchelf - -# build gcc-8.5 from source -apt update -apt install -y flex bison -wget https://ftpmirror.gnu.org/gcc/gcc-8.5.0/gcc-8.5.0.tar.xz -tar xf gcc-8.5.0.tar.xz -cd gcc-8.5.0/ -sed -i contrib/download_prerequisites -e '/base_url=/s/ftp/http/' -contrib/download_prerequisites -cd .. -mkdir build && cd build -../gcc-8.5.0/configure -v --build=aarch64-linux-gnu --host=aarch64-linux-gnu --target=aarch64-linux-gnu --prefix=/usr/local/gcc-8.5.0 --enable-checking=release --enable-languages=c,c++,fortran --disable-multilib --program-suffix=-8.5 -make -j$(nproc) -sudo make install-strip -cd .. -rm -rf gcc-8.5.0.tar.xz -update-alternatives --install /usr/bin/gcc gcc /usr/local/gcc-8.5.0/bin/gcc-8.5 100 --slave /usr/bin/g++ g++ /usr/local/gcc-8.5.0/bin/g++-8.5 --slave /usr/bin/gcov gcov /usr/local/gcc-8.5.0/bin/gcov-8.5 --slave /usr/bin/gfortran gfortran /usr/local/gcc-8.5.0/bin/gfortran-8.5 - -curl -o apache-maven-3.3.9-bin.tar.gz -L http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz \ - || curl -o apache-maven-3.3.9-bin.tar.gz -L https://search.maven.org/remotecontent?filepath=org/apache/maven/apache-maven/3.3.9/apache-maven-3.3.9-bin.tar.gz - -tar xzf apache-maven-3.3.9-bin.tar.gz -mkdir /usr/local/maven -mv apache-maven-3.3.9/ /usr/local/maven/ -update-alternatives --install /usr/bin/mvn mvn /usr/local/maven/apache-maven-3.3.9/bin/mvn 1 -update-ca-certificates -f - -# the version of the pip shipped with ubuntu may be too lower, install a recent version here -python3 -m pip install --upgrade pip - -python3 -m pip install --upgrade --ignore-installed nose cpplint==1.3.0 pylint==2.3.1 nose-timer 'numpy<2.0.0,>1.16.0' 'requests<3,>=2.20.0' scipy boto3 - -# CMake 3.13.2+ is required -wget https://github.com/Kitware/CMake/releases/download/v3.16.5/cmake-3.16.5.tar.gz -tar -zxvf cmake-3.16.5.tar.gz -cd cmake-3.16.5 -./bootstrap -make -j$(nproc) -sudo make install - -# Download and set up Arm Performance Libraries -wget https://armkeil.blob.core.windows.net/developer/Files/downloads/hpc/arm-performance-libraries/21-0-0/Ubuntu18.04/arm-performance-libraries_21.0_Ubuntu-18.04_gcc-8.2.tar -tar -xvf arm-performance-libraries_21.0_Ubuntu-18.04_gcc-8.2.tar -arm-performance-libraries_21.0_Ubuntu-18.04_gcc-8.2/arm-performance-libraries_21.0_Ubuntu-18.04.sh -a diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 156264aaaea5..8e1d31d6a0c5 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -984,6 +984,10 @@ cd_unittest_ubuntu() { local nose_cmd="nosetests-3.4" + if [[ ${mxnet_variant} = aarch64_cpu ]]; then + source /opt/rh/rh-python38/enable + fi + $nose_cmd $NOSE_TIMER_ARGUMENTS --verbose tests/python/unittest $nose_cmd $NOSE_TIMER_ARGUMENTS --verbose tests/python/quantization @@ -2007,6 +2011,9 @@ build_static_libmxnet() { set -ex pushd . local mxnet_variant=${1:?"This function requires a python command as the first argument"} + if [[ ${mxnet_variant} = aarch64_cpu ]]; then + source /opt/rh/devtoolset-10/enable + fi CMAKE_STATICBUILD=1 source tools/staticbuild/build.sh ${mxnet_variant} popd @@ -2017,6 +2024,9 @@ cd_package_pypi() { set -ex pushd . local mxnet_variant=${1:?"This function requires a python command as the first argument"} + if [[ ${mxnet_variant} = aarch64_cpu ]]; then + source /opt/rh/rh-python38/enable + fi ./cd/python/pypi/pypi_package.sh ${mxnet_variant} popd } @@ -2024,25 +2034,27 @@ cd_package_pypi() { # Sanity checks wheel file cd_integration_test_pypi() { set -ex - local python_cmd=${1:?"This function requires a python command as the first argument"} + local mxnet_variant=${1:?"This function requires a python command as the first argument"} local gpu_enabled=${2:-"false"} local test_conv_params='' local mnist_params='' - local pip_cmd='pip3' - if [ "${gpu_enabled}" = "true" ]; then mnist_params="--gpu 0" test_conv_params="--gpu" fi + if [[ ${mxnet_variant} = aarch64_cpu ]]; then + source /opt/rh/rh-python38/enable + fi + # install mxnet wheel package - ${pip_cmd} install --user ./wheel_build/dist/*.whl + python3 -m pip install --user ./wheel_build/dist/*.whl # execute tests - ${python_cmd} /work/mxnet/tests/python/train/test_conv.py ${test_conv_params} - ${python_cmd} /work/mxnet/example/image-classification/train_mnist.py ${mnist_params} + python3 /work/mxnet/tests/python/train/test_conv.py ${test_conv_params} + python3 /work/mxnet/example/image-classification/train_mnist.py ${mnist_params} } # Publishes wheel to PyPI diff --git a/cmake/Modules/FindArmPL.cmake b/cmake/Modules/FindArmPL.cmake index 5ff4e44832c6..49d8e096d74a 100644 --- a/cmake/Modules/FindArmPL.cmake +++ b/cmake/Modules/FindArmPL.cmake @@ -42,8 +42,8 @@ SET(ArmPL_LIBRARIES ${ArmPL_LIB} ${MATH_LIB} ${STRING_LIB} - /usr/local/gcc-8.5.0/lib64/libgfortran.so - /usr/lib/aarch64-linux-gnu/libm.so + libgfortran.so + libm.so ) SET(ArmPL_FOUND ON) diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py index 3f93cbe30284..c8fbf35065b7 100644 --- a/tests/python/unittest/test_ndarray.py +++ b/tests/python/unittest/test_ndarray.py @@ -31,13 +31,11 @@ from mxnet.test_utils import np_reduce from mxnet.test_utils import same from mxnet.test_utils import random_sample, rand_shape_nd, random_arrays -from mxnet.test_utils import is_aarch64_run from mxnet import runtime from numpy.testing import assert_allclose, assert_array_equal, assert_array_almost_equal import mxnet.autograd from mxnet.base import integer_types from mxnet.ndarray.ndarray import py_slice -import unittest def check_with_uniform(uf, arg_shapes, dim=None, npuf=None, rmin=-10, type_list=[np.float32]): @@ -1285,7 +1283,6 @@ def test_output(): @with_seed() -@unittest.skipIf(is_aarch64_run(), "test fails on aarch64 - tracked in #20289") def test_ndarray_fluent(): has_grad = set(['flatten', 'expand_dims', 'flip', 'tile', 'transpose', 'sum', 'nansum', 'prod', 'nanprod', 'mean', 'max', 'min', 'reshape', 'broadcast_to', 'split', 'split_v2', diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 63e74c746d5b..fd8abf1849be 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -28,7 +28,6 @@ from mxnet.test_utils import assert_almost_equal from mxnet.test_utils import use_np from mxnet.test_utils import is_op_runnable -from mxnet.test_utils import is_aarch64_run from common import assertRaises, with_seed, random_seed, setup_module, teardown from mxnet.numpy_dispatch_protocol import with_array_function_protocol, with_array_ufunc_protocol from mxnet.numpy_dispatch_protocol import _NUMPY_ARRAY_FUNCTION_LIST, _NUMPY_ARRAY_UFUNC_LIST @@ -3067,7 +3066,6 @@ def test_np_memory_array_function(): @with_seed() @use_np -@unittest.skipIf(is_aarch64_run(), "test fails on aarch64 - tracked in #20289") @with_array_function_protocol def test_np_array_function_protocol(): check_interoperability(_NUMPY_ARRAY_FUNCTION_LIST) diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 6833cd3fb765..add0620be71d 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -34,7 +34,6 @@ from mxnet.test_utils import check_numeric_gradient, use_np, collapse_sum_like, effective_dtype from mxnet.test_utils import new_matrix_with_real_eigvals_nd from mxnet.test_utils import new_sym_matrix_with_real_eigvals_nd -from mxnet.test_utils import is_aarch64_run from common import assertRaises, with_seed import random from mxnet.test_utils import verify_generator, gen_buckets_probs_with_ppf @@ -3575,7 +3574,6 @@ def hybrid_forward(self, F, x): @with_seed() @use_np -@unittest.skipIf(is_aarch64_run(), "test fails on aarch64 - tracked in #20289") def test_np_delete(): class TestDelete(HybridBlock): def __init__(self, obj, axis=None): diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py index 8e8209f5235b..ae7352c19301 100644 --- a/tests/python/unittest/test_profiler.py +++ b/tests/python/unittest/test_profiler.py @@ -61,10 +61,10 @@ def test_profiler(): for i in range(iter_num): if i == begin_profiling_iter: - t0 = time.clock() + t0 = time.process_time() profiler.set_state('run') if i == end_profiling_iter: - t1 = time.clock() + t1 = time.process_time() profiler.set_state('stop') executor.forward() c = executor.outputs[0] diff --git a/tools/dependencies/curl.sh b/tools/dependencies/curl.sh index 8e546146eeaa..948d4a2ae088 100755 --- a/tools/dependencies/curl.sh +++ b/tools/dependencies/curl.sh @@ -19,7 +19,7 @@ # This script builds the static library of libcurl that can be used as dependency of mxnet. set -ex -LIBCURL_VERSION=7.61.0 +LIBCURL_VERSION=7.77.0 if [[ ! -f $DEPS_PATH/lib/libcurl.a ]]; then # download and build libcurl >&2 echo "Building libcurl..." @@ -30,9 +30,9 @@ if [[ ! -f $DEPS_PATH/lib/libcurl.a ]]; then pushd . cd $DEPS_PATH/curl-$LIBCURL_VERSION if [[ $PLATFORM == 'linux' ]]; then - CONFIG_FLAG="" + CONFIG_FLAG="--with-openssl" elif [[ $PLATFORM == 'darwin' ]]; then - CONFIG_FLAG="--with-darwinssl" + CONFIG_FLAG="--with-secure-transport" fi ./configure $CONFIG_FLAG \ --with-zlib \ diff --git a/tools/dependencies/openssl.sh b/tools/dependencies/openssl.sh index 9709825d7e4a..697c73518308 100755 --- a/tools/dependencies/openssl.sh +++ b/tools/dependencies/openssl.sh @@ -19,7 +19,7 @@ # This script builds the static library of openssl that can be used as dependency of mxnet. set -ex -OPENSSL_VERSION=1.1.1b +OPENSSL_VERSION=1.1.1k if [[ ! -f $DEPS_PATH/lib/libssl.a ]] || [[ ! -f $DEPS_PATH/lib/libcrypto.a ]]; then # download and build openssl >&2 echo "Building openssl..." diff --git a/tools/staticbuild/build_lib_cmake.sh b/tools/staticbuild/build_lib_cmake.sh index a29c393f9a9f..e6c14e4bb7a7 100755 --- a/tools/staticbuild/build_lib_cmake.sh +++ b/tools/staticbuild/build_lib_cmake.sh @@ -32,7 +32,11 @@ git submodule update --init --recursive || true # Build libmxnet.so rm -rf build; mkdir build; cd build cmake -GNinja -C $cmake_config -DCMAKE_PREFIX_PATH=${DEPS_PATH} -DCMAKE_FIND_ROOT_PATH=${DEPS_PATH} .. -ninja +if [[ ! $ARCH == 'aarch64' ]]; then + ninja +else + ninja-build +fi cd - # Move to lib From 7fb6d152183f3983c04516a926dd5fbb461521ee Mon Sep 17 00:00:00 2001 From: waytrue17 <52505574+waytrue17@users.noreply.github.com> Date: Tue, 6 Jul 2021 16:13:44 -0700 Subject: [PATCH 2/2] [v1.x] Update CD pipeline to build with centos7 (#20409) Co-authored-by: Wei Chu --- cd/mxnet_lib/Jenkins_pipeline.groovy | 2 +- cd/mxnet_lib/mxnet_lib_pipeline.groovy | 4 +- cd/python/docker/Jenkins_pipeline.groovy | 4 +- cd/python/pypi/Jenkins_pipeline.groovy | 4 +- ci/Jenkinsfile_docker_cache | 1 + ci/Jenkinsfile_utils.groovy | 5 +- ci/build.py | 118 ++++++++++++------ ...d.centos7_cpu => Dockerfile.build.centos7} | 18 ++- ci/docker/Dockerfile.build.centos7_gpu | 43 ------- ci/docker/docker-compose.yml | 94 ++++++++++++++ ci/docker/install/centos7_core.sh | 5 + ci/docker/install/requirements | 2 +- ci/docker/runtime_functions.sh | 6 +- ci/docker_cache.py | 9 +- ci/jenkins/Jenkins_steps.groovy | 14 +-- tests/README.md | 17 ++- 16 files changed, 237 insertions(+), 109 deletions(-) rename ci/docker/{Dockerfile.build.centos7_cpu => Dockerfile.build.centos7} (60%) delete mode 100644 ci/docker/Dockerfile.build.centos7_gpu create mode 100644 ci/docker/docker-compose.yml diff --git a/cd/mxnet_lib/Jenkins_pipeline.groovy b/cd/mxnet_lib/Jenkins_pipeline.groovy index ac372c621206..31da48928c71 100644 --- a/cd/mxnet_lib/Jenkins_pipeline.groovy +++ b/cd/mxnet_lib/Jenkins_pipeline.groovy @@ -57,7 +57,7 @@ def build(mxnet_variant) { ci_utils.init_git() // Compiling in Ubuntu14.04 due to glibc issues. // This should be updates once we have clarity on this issue. - ci_utils.docker_run('publish.ubuntu1404_cpu', "build_static_libmxnet ${mxnet_variant}", false) + ci_utils.docker_run('centos7_cpu', "build_static_libmxnet ${mxnet_variant}", false) ci_utils.pack_lib("mxnet_${mxnet_variant}", libmxnet_pipeline.get_stash(mxnet_variant)) } } diff --git a/cd/mxnet_lib/mxnet_lib_pipeline.groovy b/cd/mxnet_lib/mxnet_lib_pipeline.groovy index 9c154a911f97..d9fb3d810bc5 100644 --- a/cd/mxnet_lib/mxnet_lib_pipeline.groovy +++ b/cd/mxnet_lib/mxnet_lib_pipeline.groovy @@ -79,9 +79,9 @@ def get_environment(mxnet_variant) { return "centos7_aarch64_cpu" } else if (mxnet_variant.startsWith("cu")) { // Remove 'mkl' suffix from variant to properly format test environment - return "ubuntu_gpu_${mxnet_variant.replace('mkl', '')}" + return "centos7_gpu_${mxnet_variant.replace('mkl', '')}" } - return "ubuntu_cpu" + return "centos7_cpu" } // Returns the variant appropriate jenkins node test in which diff --git a/cd/python/docker/Jenkins_pipeline.groovy b/cd/python/docker/Jenkins_pipeline.groovy index 46fc64714cc3..847756adef9c 100644 --- a/cd/python/docker/Jenkins_pipeline.groovy +++ b/cd/python/docker/Jenkins_pipeline.groovy @@ -45,9 +45,9 @@ def get_environment(mxnet_variant) { return "centos7_aarch64_cpu" } if (mxnet_variant.startsWith('cu')) { - return "ubuntu_gpu_${mxnet_variant}" + return "centos7_gpu_${mxnet_variant}" } - return "ubuntu_cpu" + return "centos7_cpu" } diff --git a/cd/python/pypi/Jenkins_pipeline.groovy b/cd/python/pypi/Jenkins_pipeline.groovy index 8b2d37ff3090..4e642e08fe53 100644 --- a/cd/python/pypi/Jenkins_pipeline.groovy +++ b/cd/python/pypi/Jenkins_pipeline.groovy @@ -51,9 +51,9 @@ def get_environment(mxnet_variant) { return "centos7_aarch64_cpu" } if (mxnet_variant.startsWith('cu')) { - return "ubuntu_gpu_${mxnet_variant}" + return "centos7_gpu_${mxnet_variant}" } - return "ubuntu_cpu" + return "centos7_cpu" } def build(mxnet_variant) { diff --git a/ci/Jenkinsfile_docker_cache b/ci/Jenkinsfile_docker_cache index f90bf0459f03..1be54a33d91e 100644 --- a/ci/Jenkinsfile_docker_cache +++ b/ci/Jenkinsfile_docker_cache @@ -38,6 +38,7 @@ core_logic: { timeout(time: total_timeout, unit: 'MINUTES') { utils.init_git() sh "ci/docker_cache.py --docker-registry ${env.DOCKER_ECR_REGISTRY}" + sh "cd ci && python3 ./docker_login.py --secret-name ${env.DOCKERHUB_SECRET_NAME} && docker-compose -f docker/docker-compose.yml build --parallel && docker-compose -f docker/docker-compose.yml push && docker logout" } } } diff --git a/ci/Jenkinsfile_utils.groovy b/ci/Jenkinsfile_utils.groovy index 3f774f052703..a758c72211cc 100644 --- a/ci/Jenkinsfile_utils.groovy +++ b/ci/Jenkinsfile_utils.groovy @@ -147,9 +147,10 @@ def collect_test_results_windows(original_file_name, new_file_name) { } -def docker_run(platform, function_name, use_nvidia, shared_mem = '500m', env_vars = "") { - def command = "ci/build.py %ENV_VARS% --docker-registry ${env.DOCKER_ECR_REGISTRY} %USE_NVIDIA% --platform %PLATFORM% --docker-build-retries 3 --shm-size %SHARED_MEM% /work/runtime_functions.sh %FUNCTION_NAME%" +def docker_run(platform, function_name, use_nvidia, shared_mem = '500m', env_vars = "", build_args = "") { + def command = "ci/build.py %ENV_VARS% %BUILD_ARGS% --docker-registry ${env.DOCKER_CACHE_REGISTRY} %USE_NVIDIA% --platform %PLATFORM% --docker-build-retries 3 --shm-size %SHARED_MEM% /work/runtime_functions.sh %FUNCTION_NAME%" command = command.replaceAll('%ENV_VARS%', env_vars.length() > 0 ? "-e ${env_vars}" : '') + command = command.replaceAll('%BUILD_ARGS%', env_vars.length() > 0 ? "${build_args}" : '') command = command.replaceAll('%USE_NVIDIA%', use_nvidia ? '--nvidiadocker' : '') command = command.replaceAll('%PLATFORM%', platform) command = command.replaceAll('%FUNCTION_NAME%', function_name) diff --git a/ci/build.py b/ci/build.py index 645eb96875e9..a316225bae29 100755 --- a/ci/build.py +++ b/ci/build.py @@ -38,18 +38,28 @@ from subprocess import check_call, check_output from typing import * +import yaml + from util import * +DOCKER_COMPOSE_WHITELIST = ('centos7_cpu', 'centos7_gpu_cu92', 'centos7_gpu_cu100', + 'centos7_gpu_cu101', 'centos7_gpu_cu102', 'centos7_gpu_cu110', + 'centos7_gpu_cu112') + +# Files for docker compose +DOCKER_COMPOSE_FILES = set(('docker/build.centos7')) def get_dockerfiles_path(): return "docker" -def get_platforms(path: str = get_dockerfiles_path()) -> List[str]: +def get_platforms(path: str = get_dockerfiles_path(), legacy_only=False) -> List[str]: """Get a list of architectures given our dockerfiles""" dockerfiles = glob.glob(os.path.join(path, "Dockerfile.*")) - dockerfiles = list(filter(lambda x: x[-1] != '~', dockerfiles)) - files = list(map(lambda x: re.sub(r"Dockerfile.(.*)", r"\1", x), dockerfiles)) + dockerfiles = set(filter(lambda x: x[-1] != '~', dockerfiles)) + files = set(map(lambda x: re.sub(r"Dockerfile.(.*)", r"\1", x), dockerfiles)) + if legacy_only: + files = files - DOCKER_COMPOSE_FILES platforms = list(map(lambda x: os.path.split(x)[1], sorted(files))) return platforms @@ -79,6 +89,11 @@ def _hash_file(ctx, filename): def get_docker_tag(platform: str, registry: str) -> str: """:return: docker tag to be used for the container""" + if platform in DOCKER_COMPOSE_WHITELIST: + with open("docker/docker-compose.yml", "r") as f: + compose_config = yaml.load(f.read(), yaml.SafeLoader) + return compose_config["services"][platform]["image"].replace('${DOCKER_CACHE_REGISTRY}', registry) + platform = platform if any(x in platform for x in ['build.', 'publish.']) else 'build.{}'.format(platform) if not registry: registry = "mxnet_local" @@ -106,41 +121,58 @@ def build_docker(platform: str, registry: str, num_retries: int, no_cache: bool, :return: Id of the top level image """ tag = get_docker_tag(platform=platform, registry=registry) - logging.info("Building docker container tagged '%s'", tag) - # - # We add a user with the same group as the executing non-root user so files created in the - # container match permissions of the local user. Same for the group. - # - # These variables are used in the docker files to create user and group with these ids. - # see: docker/install/ubuntu_adduser.sh - # - # cache-from is needed so we use the cached images tagged from the remote via - # docker pull see: docker_cache.load_docker_cache - # - # This also prevents using local layers for caching: https://github.com/moby/moby/issues/33002 - # So to use local caching, we should omit the cache-from by using --no-dockerhub-cache argument to this - # script. - # - # This doesn't work with multi head docker files. - # - cmd = ["docker", "build", - "-f", get_dockerfile(platform), - "--build-arg", "USER_ID={}".format(os.getuid()), - "--build-arg", "GROUP_ID={}".format(os.getgid())] - if no_cache: - cmd.append("--no-cache") - if cache_intermediate: - cmd.append("--rm=false") - elif registry: - cmd.extend(["--cache-from", tag]) - cmd.extend(["-t", tag, get_dockerfiles_path()]) + + # docker-compose + if platform in DOCKER_COMPOSE_WHITELIST: + logging.info('Building docker container tagged \'%s\' based on ci/docker/docker-compose.yml', tag) + # We add a user with the same group as the executing non-root user so files created in the + # container match permissions of the local user. Same for the group. + cmd = ['docker-compose', '-f', 'docker/docker-compose.yml', 'build', + "--build-arg", "USER_ID={}".format(os.getuid()), + "--build-arg", "GROUP_ID={}".format(os.getgid())] + if cache_intermediate: + cmd.append('--no-rm') + cmd.append(platform) + else: + logging.info("Building docker container tagged '%s'", tag) + # + # We add a user with the same group as the executing non-root user so files created in the + # container match permissions of the local user. Same for the group. + # + # These variables are used in the docker files to create user and group with these ids. + # see: docker/install/ubuntu_adduser.sh + # + # cache-from is needed so we use the cached images tagged from the remote via + # docker pull see: docker_cache.load_docker_cache + # + # This also prevents using local layers for caching: https://github.com/moby/moby/issues/33002 + # So to use local caching, we should omit the cache-from by using --no-dockerhub-cache argument to this + # script. + # + # This doesn't work with multi head docker files. + # + cmd = ["docker", "build", + "-f", get_dockerfile(platform), + "--build-arg", "USER_ID={}".format(os.getuid()), + "--build-arg", "GROUP_ID={}".format(os.getgid())] + if no_cache: + cmd.append("--no-cache") + if cache_intermediate: + cmd.append("--rm=false") + elif registry: + cmd.extend(["--cache-from", tag]) + cmd.extend(["-t", tag, get_dockerfiles_path()]) + + env = os.environ.copy() + env["DOCKER_CACHE_REGISTRY"] = registry @retry(subprocess.CalledProcessError, tries=num_retries) - def run_cmd(): + def run_cmd(env=None): logging.info("Running command: '%s'", ' '.join(cmd)) - check_call(cmd) + check_call(cmd, env=env) + + run_cmd(env=env) - run_cmd() # Get image id by reading the tag. It's guaranteed (except race condition) that the tag exists. Otherwise, the # check_call would have failed image_id = _get_local_image_id(docker_tag=tag) @@ -258,9 +290,19 @@ def list_platforms() -> str: return "\nSupported platforms:\n{}".format('\n'.join(get_platforms())) -def load_docker_cache(tag, docker_registry) -> None: +def load_docker_cache(platform, tag, docker_registry) -> None: """Imports tagged container from the given docker registry""" if docker_registry: + if platform in DOCKER_COMPOSE_WHITELIST: + env = os.environ.copy() + env["DOCKER_CACHE_REGISTRY"] = docker_registry + cmd = ['docker-compose', '-f', 'docker/docker-compose.yml', 'pull', platform] + logging.info("Running command: 'DOCKER_CACHE_REGISTRY=%s %s'", docker_registry, ' '.join(cmd)) + check_call(cmd, env=env) + return + + env = os.environ.copy() + env["DOCKER_CACHE_REGISTRY"] = docker_registry # noinspection PyBroadException try: import docker_cache @@ -363,8 +405,8 @@ def main() -> int: elif args.platform: platform = args.platform tag = get_docker_tag(platform=platform, registry=args.docker_registry) - if args.docker_registry: - load_docker_cache(tag=tag, docker_registry=args.docker_registry) + if args.docker_registry: + load_docker_cache(platform=platform, tag=tag, docker_registry=args.docker_registry) if not args.run_only: build_docker(platform=platform, registry=args.docker_registry, num_retries=args.docker_build_retries, no_cache=args.no_cache, @@ -409,7 +451,7 @@ def main() -> int: logging.info("Artifacts will be produced in the build/ directory.") for platform in platforms: tag = get_docker_tag(platform=platform, registry=args.docker_registry) - load_docker_cache(tag=tag, docker_registry=args.docker_registry) + load_docker_cache(platform=platform, tag=tag, docker_registry=args.docker_registry) build_docker(platform, registry=args.docker_registry, num_retries=args.docker_build_retries, no_cache=args.no_cache, cache_intermediate=args.cache_intermediate) diff --git a/ci/docker/Dockerfile.build.centos7_cpu b/ci/docker/Dockerfile.build.centos7 similarity index 60% rename from ci/docker/Dockerfile.build.centos7_cpu rename to ci/docker/Dockerfile.build.centos7 index 0cfa5a9f6e47..0114e9ba5bdb 100644 --- a/ci/docker/Dockerfile.build.centos7_cpu +++ b/ci/docker/Dockerfile.build.centos7 @@ -16,9 +16,22 @@ # specific language governing permissions and limitations # under the License. # -# Dockerfile to build and run MXNet on CentOS 7 for CPU +# +# Dockerfile for CentOS 7 based builds. +# Via the CentOS 7 Dockerfiles, we ensure MXNet continues to run fine on older systems. +# +# See docker-compose.yml for supported BASE_IMAGE ARGs and targets. -FROM centos:7 +#################################################################################################### +# The Dockerfile uses a dynamic BASE_IMAGE (for example centos:7, +# nvidia/cuda:10.2-cudnn7-devel-centos7 etc). +# On top of BASE_IMAGE we install all dependencies shared by all MXNet build +# environments into a "base" target. At the end of this file, we specialize +# "base" for specific usecases. The target built by docker can be selected via +# "--target" option or docker-compose.yml +#################################################################################################### +ARG BASE_IMAGE +FROM $BASE_IMAGE AS base WORKDIR /work/deps @@ -39,3 +52,4 @@ ENV PYTHONPATH=./python/ WORKDIR /work/mxnet COPY runtime_functions.sh /work/ + diff --git a/ci/docker/Dockerfile.build.centos7_gpu b/ci/docker/Dockerfile.build.centos7_gpu deleted file mode 100644 index 7e49e88b3a52..000000000000 --- a/ci/docker/Dockerfile.build.centos7_gpu +++ /dev/null @@ -1,43 +0,0 @@ -# -*- mode: dockerfile -*- -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# Dockerfile to build and run MXNet on CentOS 7 for GPU - -FROM nvidia/cuda:10.1-devel-centos7 - -WORKDIR /work/deps - -COPY install/centos7_core.sh /work/ -RUN /work/centos7_core.sh -COPY install/centos7_ccache.sh /work/ -RUN /work/centos7_ccache.sh -COPY install/centos7_python.sh /work/ -RUN /work/centos7_python.sh - -ENV CUDNN_VERSION=7.6.0.64 -COPY install/centos7_cudnn.sh /work/ -RUN /work/centos7_cudnn.sh - -ARG USER_ID=0 -COPY install/centos7_adduser.sh /work/ -RUN /work/centos7_adduser.sh - -ENV PYTHONPATH=./python/ -WORKDIR /work/mxnet - -COPY runtime_functions.sh /work/ diff --git a/ci/docker/docker-compose.yml b/ci/docker/docker-compose.yml new file mode 100644 index 000000000000..0784987187a5 --- /dev/null +++ b/ci/docker/docker-compose.yml @@ -0,0 +1,94 @@ +# -*- mode: dockerfile -*- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# We use the cache_from feature introduced in file form version 3.4 (released 2017-11-01) +version: "3.4" + +# For simplicity, only the centos7_cpu is commented. But the comments apply to +# all other services as well. +services: + ################################################################################################### + # Dockerfile.build.centos7 based images used for building on CentOS7. On + # CentOS7, we respectively test the oldest supported toolchain and dependency + # versions + ################################################################################################### + centos7_cpu: + # The resulting image will be named build.centos7_cpu:latest and will be + # pushed to the dockerhub user specified in the environment variable + # ${DOCKER_CACHE_REGISTRY} (typicall "mxnetci") under this name + image: ${DOCKER_CACHE_REGISTRY}/build.centos7_cpu:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + # Use "base" target declared in Dockerfile.build.centos7 as "build.centos7_cpu:latest" + target: base + args: + # BASE_IMAGE is used to dynamically specify the FROM image in Dockerfile.build.centos7 + BASE_IMAGE: centos:7 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.centos7_cpu:latest + centos7_gpu_cu100: + image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu100:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + target: base + args: + BASE_IMAGE: nvidia/cuda:10.0-cudnn7-devel-centos7 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu100:latest + centos7_gpu_cu101: + image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu101:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + target: base + args: + BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-centos7 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu101:latest + centos7_gpu_cu102: + image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu102:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + target: base + args: + BASE_IMAGE: nvidia/cuda:10.2-cudnn8-devel-centos7 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu102:latest + centos7_gpu_cu110: + image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu110:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + target: base + args: + BASE_IMAGE: nvidia/cuda:11.0-cudnn8-devel-centos7 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu110:latest + centos7_gpu_cu112: + image: ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu112:latest + build: + context: . + dockerfile: Dockerfile.build.centos7 + target: base + args: + BASE_IMAGE: nvidia/cuda:11.2.1-cudnn8-devel-centos7 + cache_from: + - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu112:latest \ No newline at end of file diff --git a/ci/docker/install/centos7_core.sh b/ci/docker/install/centos7_core.sh index ae5cb719d38a..9b2452e78bcc 100755 --- a/ci/docker/install/centos7_core.sh +++ b/ci/docker/install/centos7_core.sh @@ -39,6 +39,11 @@ yum -y install make yum -y install wget yum -y install unzip yum -y install ninja-build +yum -y install automake +yum -y install patchelf +yum -y install nasm +yum -y install libtool +yum -y install dpkg-dev # CMake 3.13.2+ is required mkdir /opt/cmake && cd /opt/cmake diff --git a/ci/docker/install/requirements b/ci/docker/install/requirements index 1abdc0051a82..210914ac129f 100644 --- a/ci/docker/install/requirements +++ b/ci/docker/install/requirements @@ -32,4 +32,4 @@ astroid==2.3.3 # pylint and astroid need to be aligned requests<2.19.0,>=2.18.4 scipy==1.2.1 setuptools -coverage +coverage \ No newline at end of file diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 8e1d31d6a0c5..c0047ce27a6d 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -1244,8 +1244,8 @@ unittest_ubuntu_cpu_julia10() { unittest_centos7_cpu() { set -ex cd /work/mxnet - python3.6 -m "nose" $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_unittest.xml --verbose tests/python/unittest - python3.6 -m "nose" $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_train.xml --verbose tests/python/train + python3 -m "nose" $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_unittest.xml --verbose tests/python/unittest + python3 -m "nose" $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_train.xml --verbose tests/python/train } unittest_centos7_gpu() { @@ -1253,7 +1253,7 @@ unittest_centos7_gpu() { cd /work/mxnet export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3} export DMLC_LOG_STACK_TRACE_DEPTH=10 - python3.6 -m "nose" $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu + python3 -m "nose" $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu } integrationtest_ubuntu_cpu_onnx() { diff --git a/ci/docker_cache.py b/ci/docker_cache.py index da01314f5f8d..0ac96e76ece0 100755 --- a/ci/docker_cache.py +++ b/ci/docker_cache.py @@ -75,6 +75,13 @@ def _build_save_container(platform, registry, load_cache) -> Optional[str]: :param load_cache: Load cache before building :return: Platform if failed, None otherwise """ + # docker-compose + if platform in build_util.DOCKER_COMPOSE_WHITELIST: + build_util.build_docker(platform=platform, registry=registry, num_retries=10, no_cache=False) + push_cmd = ['docker-compose', 'push', platform] + subprocess.check_call(push_cmd) + return None + docker_tag = build_util.get_docker_tag(platform=platform, registry=registry) # Preload cache @@ -205,7 +212,7 @@ def script_name() -> str: args = parser.parse_args() - platforms = build_util.get_platforms() + platforms = build_util.get_platforms(legacy_only=True) if "dkr.ecr" in args.docker_registry: _ecr_login(args.docker_registry) diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index da6a74a50fd3..9f96c7ad5e6d 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -356,7 +356,7 @@ def compile_centos7_gpu() { ws('workspace/build-centos7-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('centos7_gpu', 'build_centos7_gpu', false) + utils.docker_run('centos7_gpu_cu101', 'build_centos7_gpu', false) utils.pack_lib('centos7_gpu', mx_lib) } } @@ -642,7 +642,7 @@ def test_static_scala_cpu() { ws('workspace/ut-publish-scala-cpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run("publish.ubuntu1404_cpu", 'build_static_scala_cpu', false) + utils.docker_run('publish.ubuntu1404_cpu', 'build_static_scala_cpu', false) } } } @@ -655,7 +655,7 @@ def test_static_python_cpu() { ws('workspace/ut-publish-python-cpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run("publish.ubuntu1404_cpu", 'build_static_python_cpu', false) + utils.docker_run('publish.ubuntu1404_cpu', 'build_static_python_cpu', false) } } } @@ -668,7 +668,7 @@ def test_static_python_cpu_cmake() { ws('workspace/ut-publish-python-cpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run("publish.ubuntu1404_cpu", 'build_static_python_cpu_cmake', false) + utils.docker_run('publish.ubuntu1404_cpu', 'build_static_python_cpu_cmake', false) } } } @@ -681,7 +681,7 @@ def test_static_python_gpu() { ws('workspace/ut-publish-python-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run("publish.ubuntu1404_gpu", 'build_static_python_cu101', true) + utils.docker_run('publish.ubuntu1404_gpu', 'build_static_python_cu101', true) } } } @@ -694,7 +694,7 @@ def test_static_python_gpu_cmake() { ws('workspace/ut-publish-python-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run("publish.ubuntu1404_gpu", 'build_static_python_cu101_cmake', true) + utils.docker_run('publish.ubuntu1404_gpu', 'build_static_python_cu101_cmake', true) } } } @@ -1251,7 +1251,7 @@ def test_centos7_python3_gpu() { timeout(time: max_time, unit: 'MINUTES') { try { utils.unpack_and_init('centos7_gpu', mx_lib) - utils.docker_run('centos7_gpu', 'unittest_centos7_gpu', true) + utils.docker_run('centos7_gpu_cu101', 'unittest_centos7_gpu', true) utils.publish_test_coverage() } finally { utils.collect_test_results_unix('nosetests_gpu.xml', 'nosetests_python3_centos7_gpu.xml') diff --git a/tests/README.md b/tests/README.md index de5d8107a790..997ea36c3e28 100644 --- a/tests/README.md +++ b/tests/README.md @@ -62,13 +62,20 @@ Ninja is a build tool (like make) that prioritizes building speed. If you will b ## Runing Python Tests Within Docker -1. To run tests inside docker run the following comamdn - ``` - ci/build.py --platform {PLATFORM} /work/runtime_functions.sh {RUNTIME_FUNCTION} - ``` +To run tests inside docker, you first need to install `docker` and `docker-compose` on your machine. + +On Ubuntu you may install them via `sudo apt-get install docker.io docker-compose` +and set them up via `sudo usermod $(whoami) -G docker -a`. + +Then, to run tests inside docker run the following command + +``` +ci/build.py --platform {PLATFORM} /work/runtime_functions.sh {RUNTIME_FUNCTION} +``` + An example for running python tests would be ``` -ci/build.py --platform build_ubuntu_cpu_mkldnn /work/runtime_functions.sh unittest_ubuntu_python3_cpu PYTHONPATH=./python/ nosetests-2.7 tests/python/unittest +ci/build.py --platform build_ubuntu_cpu_mkldnn /work/runtime_functions.sh unittest_ubuntu_python3_cpu ```