From e203f034e4cf78dc9fe87907c8863c86ad9fee9b Mon Sep 17 00:00:00 2001
From: Zhaoqi Zhu <zhaoqizhu96@gmail.com>
Date: Mon, 1 Mar 2021 16:21:28 -0800
Subject: [PATCH] [V1.8.x] Attemp to fix cd for v1.8.x (#19947)

* [v1.x] Migrate to use ECR as docker cache instead of dockerhub (#19654)

* [v1.x] Update CI build scripts to install python 3.6 from deadsnakes repo (#19788)

* Install python3.6 from deadsnakes repo, since 3.5 is EOL'd and get-pip.py no longer works with 3.5.

* Set symlink for python3 to point to newly installed 3.6 version.

* Setting symlink or using update-alternatives causes add-apt-repository to fail, so instead just set alias in environment to call the correct python version.

* Setup symlinks in /usr/local/bin, since it comes first in the path.

* Don't use absolute path for python3 executable, just use python3 from path.

Co-authored-by: Joe Evans <joeev@amazon.com>

* Disable unix-gpu-cu110 pipeline for v1.x build since we now build with cuda 11.0 in windows pipelines. (#19828)

Co-authored-by: Joe Evans <joeev@amazon.com>

* [v1.x] For ECR, ensure we sanitize region input from environment variable (#19882)

* Set default for cache_intermediate.

* Make sure we sanitize region extracted from registry, since we pass it to os.system.

Co-authored-by: Joe Evans <joeev@amazon.com>

* [v1.x] Address CI failures with docker timeouts (v2) (#19890)

* Add random sleep only, since retry attempts are already implemented.

* Reduce random sleep to 2-10 sec.

Co-authored-by: Joe Evans <joeev@amazon.com>

* [v1.x] CI fixes to make more stable and upgradable (#19895)

* Test moving pipelines from p3 to g4.

* Remove fallback codecov command - the existing (first) command works and the second always fails a few times before finally succeeding (and also doesn't support the -P parameter, which causes an error.)

* Stop using docker python client, since it still doesn't support latest nvidia 'gpus' attribute. Switch to using subprocess calls using list parameter (to avoid shell injections).

See https://github.com/docker/docker-py/issues/2395

* Remove old files.

* Fix comment

* Set default environment variables

* Fix GPU syntax.

* Use subprocess.run and redirect output to stdout, don't run docker in interactive mode.

* Check if codecov works without providing parameters now.

* Send docker stderr to sys.stderr

* Support both nvidia-docker configurations, first try '--gpus all', and if that fails, then try '--runtime nvidia'.

Co-authored-by: Joe Evans <joeev@amazon.com>

* fix cd

* fix cudnn version for cu10.2 buiuld

* WAR the dataloader issue with forked processes holding stale references (#19924)

* skip some tests

* fix ski[

* [v.1x] Attempt to fix v1.x cd by installing new cuda compt package (#19959)

* update cude compt for cd

* Update Dockerfile.build.ubuntu_gpu_cu102

* Update Dockerfile.build.ubuntu_gpu_cu102

* Update Dockerfile.build.ubuntu_gpu_cu110

* Update runtime_functions.sh

* Update Dockerfile.build.ubuntu_gpu_cu110

* Update Dockerfile.build.ubuntu_gpu_cu102

* update command

Co-authored-by: Joe Evans <joseph.evans@gmail.com>
Co-authored-by: Joe Evans <joeev@amazon.com>
Co-authored-by: Joe Evans <github@250hacks.net>
Co-authored-by: Przemyslaw Tredak <ptredak@nvidia.com>
---
 cd/python/docker/Dockerfile                   |  12 +-
 ci/Jenkinsfile_docker_cache                   |   2 +-
 ci/Jenkinsfile_utils.groovy                   |  17 +-
 ci/build.py                                   | 122 ++---
 ci/docker/Dockerfile.build.ubuntu_gpu_cu102   |   4 +
 ci/docker/Dockerfile.build.ubuntu_gpu_cu110   |   4 +
 ci/docker/install/ubuntu_python.sh            |   7 +-
 ci/docker/runtime_functions.sh                |  18 +-
 ci/docker_cache.py                            |  46 +-
 ci/jenkins/Jenkins_steps.groovy               |   6 +-
 ci/jenkins/Jenkinsfile_full                   |   1 -
 ci/safe_docker_run.py                         | 248 ----------
 ci/test_safe_docker_run.py                    | 427 ------------------
 python/mxnet/gluon/data/dataloader.py         |   4 +
 .../unittest/test_numpy_interoperability.py   |   2 +
 tests/python/unittest/test_numpy_op.py        |   1 +
 tools/setup_gpu_build_tools.sh                |   2 +-
 17 files changed, 145 insertions(+), 778 deletions(-)
 delete mode 100755 ci/safe_docker_run.py
 delete mode 100644 ci/test_safe_docker_run.py

diff --git a/cd/python/docker/Dockerfile b/cd/python/docker/Dockerfile
index ed97bdc8316a..accbe9bd1d97 100644
--- a/cd/python/docker/Dockerfile
+++ b/cd/python/docker/Dockerfile
@@ -23,11 +23,15 @@
 ARG BASE_IMAGE
 FROM ${BASE_IMAGE}
 
-ARG PYTHON_CMD=python3
 RUN apt-get update && \
-    apt-get install -y wget ${PYTHON_CMD}-dev gcc && \
-    wget https://bootstrap.pypa.io/get-pip.py && \
-    ${PYTHON_CMD} get-pip.py
+    apt-get install -y software-properties-common && \
+    add-apt-repository -y ppa:deadsnakes/ppa && \
+    apt-get update && \
+    apt-get install -y python3.7-dev python3.7-distutils virtualenv wget && \
+    ln -sf /usr/bin/python3.7 /usr/local/bin/python3 && \
+    wget -nv https://bootstrap.pypa.io/get-pip.py && \
+    python3 get-pip.py
+
 
 ARG MXNET_COMMIT_ID
 ENV MXNET_COMMIT_ID=${MXNET_COMMIT_ID}
diff --git a/ci/Jenkinsfile_docker_cache b/ci/Jenkinsfile_docker_cache
index 35e6ff9e7d56..f90bf0459f03 100644
--- a/ci/Jenkinsfile_docker_cache
+++ b/ci/Jenkinsfile_docker_cache
@@ -37,7 +37,7 @@ core_logic: {
       ws('workspace/docker_cache') {
         timeout(time: total_timeout, unit: 'MINUTES') {
           utils.init_git()
-          sh "ci/docker_cache.py --docker-registry ${env.DOCKER_CACHE_REGISTRY}"
+          sh "ci/docker_cache.py --docker-registry ${env.DOCKER_ECR_REGISTRY}"
         }
       }
     }
diff --git a/ci/Jenkinsfile_utils.groovy b/ci/Jenkinsfile_utils.groovy
index 8ecc7e193b97..523fad92cec2 100644
--- a/ci/Jenkinsfile_utils.groovy
+++ b/ci/Jenkinsfile_utils.groovy
@@ -112,20 +112,7 @@ def get_git_commit_hash() {
 }
 
 def publish_test_coverage() {
-    // CodeCovs auto detection has trouble with our CIs PR validation due the merging strategy
-    git_commit_hash = get_git_commit_hash()
-
-    if (env.CHANGE_ID) {
-      // PR execution
-      codecovArgs = "-B ${env.CHANGE_TARGET} -C ${git_commit_hash} -P ${env.CHANGE_ID}"
-    } else {
-      // Branch execution
-      codecovArgs = "-B ${env.BRANCH_NAME} -C ${git_commit_hash}"
-    }
-
-    // To make sure we never fail because test coverage reporting is not available
-    // Fall back to our own copy of the bash helper if it failed to download the public version
-    sh "(curl --retry 10 -s https://codecov.io/bash | bash -s - ${codecovArgs}) || (curl --retry 10 -s https://s3-us-west-2.amazonaws.com/mxnet-ci-prod-slave-data/codecov-bash.txt | bash -s - ${codecovArgs}) || true"
+    sh "curl -s https://codecov.io/bash | bash"
 }
 
 def collect_test_results_unix(original_file_name, new_file_name) {
@@ -160,7 +147,7 @@ def collect_test_results_windows(original_file_name, new_file_name) {
 
 
 def docker_run(platform, function_name, use_nvidia, shared_mem = '500m', env_vars = "") {
-  def command = "ci/build.py %ENV_VARS% --docker-registry ${env.DOCKER_CACHE_REGISTRY} %USE_NVIDIA% --platform %PLATFORM% --docker-build-retries 3 --shm-size %SHARED_MEM% /work/runtime_functions.sh %FUNCTION_NAME%"
+  def command = "ci/build.py %ENV_VARS% --docker-registry ${env.DOCKER_ECR_REGISTRY} %USE_NVIDIA% --platform %PLATFORM% --docker-build-retries 3 --shm-size %SHARED_MEM% /work/runtime_functions.sh %FUNCTION_NAME%"
   command = command.replaceAll('%ENV_VARS%', env_vars.length() > 0 ? "-e ${env_vars}" : '')
   command = command.replaceAll('%USE_NVIDIA%', use_nvidia ? '--nvidiadocker' : '')
   command = command.replaceAll('%PLATFORM%', platform)
diff --git a/ci/build.py b/ci/build.py
index 8c2a6e9ac20b..645eb96875e9 100755
--- a/ci/build.py
+++ b/ci/build.py
@@ -27,6 +27,8 @@
 
 import argparse
 import glob
+import hashlib
+import os
 import pprint
 import re
 import shutil
@@ -36,7 +38,6 @@
 from subprocess import check_call, check_output
 from typing import *
 
-from safe_docker_run import SafeDockerClient
 from util import *
 
 
@@ -52,13 +53,41 @@ def get_platforms(path: str = get_dockerfiles_path()) -> List[str]:
     platforms = list(map(lambda x: os.path.split(x)[1], sorted(files)))
     return platforms
 
+def _find_copied_files(dockerfile):
+    """
+    Creates a list of files copied into given dockerfile.
+    """
+    copied_files = []
+    basedir = os.path.dirname(dockerfile)
+    with open(dockerfile, "r") as f:
+        for line in f.readlines():
+            if line.startswith("COPY "):
+                copied_files.append(os.path.join(basedir, line.split(" ")[1]))
+    return copied_files
+
+def _hash_file(ctx, filename):
+    """
+    Add contents of passed file into passed hash context.
+    """
+    bufsiz = 16384
+    with open(filename,"rb") as f:
+        while True:
+            d = f.read(bufsiz)
+            if not d:
+                break
+            ctx.update(d)
 
 def get_docker_tag(platform: str, registry: str) -> str:
     """:return: docker tag to be used for the container"""
     platform = platform if any(x in platform for x in ['build.', 'publish.']) else 'build.{}'.format(platform)
     if not registry:
         registry = "mxnet_local"
-    return "{0}/{1}".format(registry, platform)
+    dockerfile = get_dockerfile(platform)
+    sha256 = hashlib.sha256()
+    _hash_file(sha256, dockerfile)
+    for f in _find_copied_files(dockerfile):
+        _hash_file(sha256, f)
+    return "{0}:{1}-{2}".format(registry, platform, sha256.hexdigest()[:12])
 
 
 def get_dockerfile(platform: str, path=get_dockerfiles_path()) -> str:
@@ -67,7 +96,7 @@ def get_dockerfile(platform: str, path=get_dockerfiles_path()) -> str:
 
 
 def build_docker(platform: str, registry: str, num_retries: int, no_cache: bool,
-                 cache_intermediate: bool) -> str:
+                 cache_intermediate: bool=False) -> str:
     """
     Build a container for the given platform
     :param platform: Platform
@@ -157,8 +186,7 @@ def default_ccache_dir() -> str:
     return os.path.join(os.path.expanduser("~"), ".ccache")
 
 
-def container_run(docker_client: SafeDockerClient,
-                  platform: str,
+def container_run(platform: str,
                   nvidia_runtime: bool,
                   docker_registry: str,
                   shared_memory_size: str,
@@ -167,17 +195,12 @@ def container_run(docker_client: SafeDockerClient,
                   environment: Dict[str, str],
                   dry_run: bool = False) -> int:
     """Run command in a container"""
-    container_wait_s = 600
-    #
-    # Environment setup
-    #
+    # set default environment variables
     environment.update({
         'CCACHE_MAXSIZE': '500G',
         'CCACHE_TEMPDIR': '/tmp/ccache',  # temp dir should be local and not shared
-        'CCACHE_DIR': '/work/ccache',  # this path is inside the container as /work/ccache is
-                                       # mounted
-        'CCACHE_LOGFILE': '/tmp/ccache.log',  # a container-scoped log, useful for ccache
-                                              # verification.
+        'CCACHE_DIR': '/work/ccache',  # this path is inside the container as /work/ccache is mounted
+        'CCACHE_LOGFILE': '/tmp/ccache.log',  # a container-scoped log, useful for ccache verification.
     })
     environment.update({k: os.environ[k] for k in ['CCACHE_MAXSIZE'] if k in os.environ})
 
@@ -189,13 +212,9 @@ def container_run(docker_client: SafeDockerClient,
     os.makedirs(local_ccache_dir, exist_ok=True)
     logging.info("Using ccache directory: %s", local_ccache_dir)
 
-    # Equivalent command
-    docker_cmd_list = [
-        "docker",
-        'run',
-        "--gpus all" if nvidia_runtime else "",
-        "--cap-add",
-        "SYS_PTRACE", # Required by ASAN
+    # Build docker command
+    docker_arg_list = [
+        "--cap-add", "SYS_PTRACE", # Required by ASAN
         '--rm',
         '--shm-size={}'.format(shared_memory_size),
         # mount mxnet root
@@ -211,40 +230,27 @@ def container_run(docker_client: SafeDockerClient,
         '-e', "CCACHE_DIR={}".format(environment['CCACHE_DIR']),
         # a container-scoped log, useful for ccache verification.
         '-e', "CCACHE_LOGFILE={}".format(environment['CCACHE_LOGFILE']),
-        '-ti',
-        tag]
-    docker_cmd_list.extend(command)
-    docker_cmd = ' \\\n\t'.join(docker_cmd_list)
-    logging.info("Running %s in container %s", command, tag)
-    logging.info("Executing the equivalent of:\n%s\n", docker_cmd)
+    ]
+    docker_arg_list += [tag]
+    docker_arg_list.extend(command)
+
+    def docker_run_cmd(cmd):
+        logging.info("Running %s in container %s", command, tag)
+        logging.info("Executing command:\n%s\n", ' \\\n\t'.join(cmd))
+        subprocess.run(cmd, stdout=sys.stdout, stderr=sys.stderr, check=True)
 
     if not dry_run:
-        #############################
-        #
-        signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGINT, signal.SIGTERM})
-        # noinspection PyShadowingNames
-        runtime = None
-        if nvidia_runtime:
-            # noinspection PyShadowingNames
-            # runc is default (docker info | grep -i runtime)
-            runtime = 'nvidia'
-
-        return docker_client.run(
-            tag,
-            runtime=runtime,
-            command=command,
-            shm_size=shared_memory_size,
-            user='{}:{}'.format(os.getuid(), os.getgid()),
-            cap_add='SYS_PTRACE',
-            volumes={
-                mx_root:
-                    {'bind': '/work/mxnet', 'mode': 'rw'},
-                local_build_folder:
-                    {'bind': '/work/build', 'mode': 'rw'},
-                local_ccache_dir:
-                    {'bind': '/work/ccache', 'mode': 'rw'},
-            },
-            environment=environment)
+        if not nvidia_runtime:
+            docker_run_cmd(['docker', 'run'] + docker_arg_list)
+        else:
+            try:
+                docker_run_cmd(['docker', 'run', '--gpus', 'all'] + docker_arg_list)
+            except subprocess.CalledProcessError as e:
+                if e.returncode == 125:
+                    docker_run_cmd(['docker', 'run', '--runtime', 'nvidia'] + docker_arg_list)
+                else:
+                    raise
+
     return 0
 
 
@@ -348,7 +354,6 @@ def main() -> int:
     args = parser.parse_args()
 
     command = list(chain(*args.command))
-    docker_client = SafeDockerClient()
 
     environment = dict([(e.split('=')[:2] if '=' in e else (e, os.environ[e]))
                         for e in args.environment])
@@ -375,13 +380,13 @@ def main() -> int:
         ret = 0
         if command:
             ret = container_run(
-                docker_client=docker_client, platform=platform, nvidia_runtime=args.nvidiadocker,
+                platform=platform, nvidia_runtime=args.nvidiadocker,
                 shared_memory_size=args.shared_memory_size, command=command, docker_registry=args.docker_registry,
                 local_ccache_dir=args.ccache_dir, environment=environment)
         elif args.print_docker_run:
             command = []
             ret = container_run(
-                docker_client=docker_client, platform=platform, nvidia_runtime=args.nvidiadocker,
+                platform=platform, nvidia_runtime=args.nvidiadocker,
                 shared_memory_size=args.shared_memory_size, command=command, docker_registry=args.docker_registry,
                 local_ccache_dir=args.ccache_dir, dry_run=True, environment=environment)
         else:
@@ -389,7 +394,7 @@ def main() -> int:
             command = ["/work/mxnet/ci/docker/runtime_functions.sh", "build_{}".format(platform)]
             logging.info("No command specified, trying default build: %s", ' '.join(command))
             ret = container_run(
-                docker_client=docker_client, platform=platform, nvidia_runtime=args.nvidiadocker,
+                platform=platform, nvidia_runtime=args.nvidiadocker,
                 shared_memory_size=args.shared_memory_size, command=command, docker_registry=args.docker_registry,
                 local_ccache_dir=args.ccache_dir, environment=environment)
 
@@ -406,7 +411,8 @@ def main() -> int:
             tag = get_docker_tag(platform=platform, registry=args.docker_registry)
             load_docker_cache(tag=tag, docker_registry=args.docker_registry)
             build_docker(platform, registry=args.docker_registry,
-                         num_retries=args.docker_build_retries, no_cache=args.no_cache)
+                         num_retries=args.docker_build_retries, no_cache=args.no_cache,
+                         cache_intermediate=args.cache_intermediate)
             if args.build_only:
                 continue
             shutil.rmtree(buildir(), ignore_errors=True)
@@ -418,7 +424,7 @@ def main() -> int:
                 continue
             command = ["/work/mxnet/ci/docker/runtime_functions.sh", build_platform]
             container_run(
-                docker_client=docker_client, platform=platform, nvidia_runtime=args.nvidiadocker,
+                platform=platform, nvidia_runtime=args.nvidiadocker,
                 shared_memory_size=args.shared_memory_size, command=command, docker_registry=args.docker_registry,
                 local_ccache_dir=args.ccache_dir, environment=environment)
             shutil.move(buildir(), plat_buildir)
diff --git a/ci/docker/Dockerfile.build.ubuntu_gpu_cu102 b/ci/docker/Dockerfile.build.ubuntu_gpu_cu102
index 52d65aaba0f4..a929a486a2f3 100644
--- a/ci/docker/Dockerfile.build.ubuntu_gpu_cu102
+++ b/ci/docker/Dockerfile.build.ubuntu_gpu_cu102
@@ -69,6 +69,10 @@ ENV CUDNN_VERSION=7.6.5.32
 COPY install/ubuntu_cudnn.sh /work/
 RUN /work/ubuntu_cudnn.sh
 
+# update the cuda compatibity package because cd host uses nvidia driver 460
+RUN apt-get update && apt-get install -y cuda-compat-11-2
+RUN ln -sfn /usr/local/cuda-11.2 /usr/local/cuda
+
 # Always last
 ARG USER_ID=0
 ARG GROUP_ID=0
diff --git a/ci/docker/Dockerfile.build.ubuntu_gpu_cu110 b/ci/docker/Dockerfile.build.ubuntu_gpu_cu110
index 336b76e2a3e8..db1f606076b5 100644
--- a/ci/docker/Dockerfile.build.ubuntu_gpu_cu110
+++ b/ci/docker/Dockerfile.build.ubuntu_gpu_cu110
@@ -32,6 +32,10 @@ COPY install/ubuntu_python.sh /work/
 COPY install/requirements /work/
 RUN /work/ubuntu_python.sh
 
+# update the cuda compatibity package because cd host uses nvidia driver 460
+RUN apt-get update && apt-get install -y cuda-compat-11-2
+RUN ln -sfn /usr/local/cuda-11.2 /usr/local/cuda
+
 # Always last
 ARG USER_ID=0
 ARG GROUP_ID=0
diff --git a/ci/docker/install/ubuntu_python.sh b/ci/docker/install/ubuntu_python.sh
index b6792a286fad..d31a18d9c303 100755
--- a/ci/docker/install/ubuntu_python.sh
+++ b/ci/docker/install/ubuntu_python.sh
@@ -23,7 +23,12 @@
 set -ex
 # install libraries for mxnet's python package on ubuntu
 apt-get update || true
-apt-get install -y python-dev python3-dev virtualenv wget
+apt-get install -y software-properties-common
+add-apt-repository -y ppa:deadsnakes/ppa
+apt-get update || true
+apt-get install -y python3.6-dev virtualenv wget
+# setup symlink in /usr/local/bin to override python3 version
+ln -sf /usr/bin/python3.6 /usr/local/bin/python3
 
 # the version of the pip shipped with ubuntu may be too lower, install a recent version here
 wget -nv https://bootstrap.pypa.io/get-pip.py
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 4d5a94df9c6a..c230ccdccd44 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -517,7 +517,7 @@ build_ubuntu_cpu_cmake_debug() {
     cmake \
         -DUSE_CUDA=OFF \
         -DUSE_TVM_OP=ON \
-        -DPython3_EXECUTABLE=/usr/bin/python3 \
+        -DPython3_EXECUTABLE=python3 \
         -DUSE_MKL_IF_AVAILABLE=OFF \
         -DUSE_OPENMP=OFF \
         -DUSE_OPENCV=ON \
@@ -538,7 +538,7 @@ build_ubuntu_cpu_cmake_no_tvm_op() {
     cmake \
         -DUSE_CUDA=OFF \
         -DUSE_TVM_OP=OFF \
-        -DPython3_EXECUTABLE=/usr/bin/python3 \
+        -DPython3_EXECUTABLE=python3 \
         -DUSE_MKL_IF_AVAILABLE=OFF \
         -DUSE_OPENMP=OFF \
         -DUSE_OPENCV=ON \
@@ -874,7 +874,7 @@ build_ubuntu_gpu_cmake_mkldnn() {
         -DUSE_CUDA=1                            \
         -DUSE_CUDNN=1                           \
         -DUSE_TVM_OP=0                          \
-        -DPython3_EXECUTABLE=/usr/bin/python3   \
+        -DPython3_EXECUTABLE=python3            \
         -DUSE_MKLML_MKL=1                       \
         -DCMAKE_BUILD_TYPE=Release              \
         -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
@@ -892,8 +892,8 @@ build_ubuntu_gpu_cmake() {
         -DUSE_SIGNAL_HANDLER=ON                 \
         -DUSE_CUDA=ON                           \
         -DUSE_CUDNN=ON                          \
-        -DUSE_TVM_OP=OFF                         \
-        -DPython3_EXECUTABLE=/usr/bin/python3   \
+        -DUSE_TVM_OP=OFF                        \
+        -DPython3_EXECUTABLE=python3            \
         -DUSE_MKL_IF_AVAILABLE=OFF              \
         -DUSE_MKLML_MKL=OFF                     \
         -DUSE_MKLDNN=OFF                        \
@@ -915,8 +915,8 @@ build_ubuntu_gpu_cmake_no_rtc() {
         -DUSE_SIGNAL_HANDLER=ON                 \
         -DUSE_CUDA=ON                           \
         -DUSE_CUDNN=ON                          \
-        -DUSE_TVM_OP=OFF                         \
-        -DPython3_EXECUTABLE=/usr/bin/python3   \
+        -DUSE_TVM_OP=OFF                        \
+        -DPython3_EXECUTABLE=python3            \
         -DUSE_MKL_IF_AVAILABLE=OFF              \
         -DUSE_MKLML_MKL=OFF                     \
         -DUSE_MKLDNN=ON                         \
@@ -956,8 +956,8 @@ build_ubuntu_gpu_large_tensor() {
         -DUSE_SIGNAL_HANDLER=ON                 \
         -DUSE_CUDA=ON                           \
         -DUSE_CUDNN=ON                          \
-        -DUSE_TVM_OP=OFF                         \
-        -DPython3_EXECUTABLE=/usr/bin/python3   \
+        -DUSE_TVM_OP=OFF                        \
+        -DPython3_EXECUTABLE=python3            \
         -DUSE_MKL_IF_AVAILABLE=OFF              \
         -DUSE_MKLML_MKL=OFF                     \
         -DUSE_MKLDNN=OFF                        \
diff --git a/ci/docker_cache.py b/ci/docker_cache.py
index 254d6237d6e2..da01314f5f8d 100755
--- a/ci/docker_cache.py
+++ b/ci/docker_cache.py
@@ -28,6 +28,7 @@
 import logging
 import os
 import subprocess
+import re
 import sys
 from typing import *
 
@@ -84,7 +85,7 @@ def _build_save_container(platform, registry, load_cache) -> Optional[str]:
     logging.debug('Building %s as %s', platform, docker_tag)
     try:
         # Increase the number of retries for building the cache.
-        image_id = build_util.build_docker(docker_binary='docker', platform=platform, registry=registry, num_retries=10, no_cache=False)
+        image_id = build_util.build_docker(platform=platform, registry=registry, num_retries=10, no_cache=False)
         logging.info('Built %s as %s', docker_tag, image_id)
 
         # Push cache to registry
@@ -96,15 +97,33 @@ def _build_save_container(platform, registry, load_cache) -> Optional[str]:
         # Error handling is done by returning the errorous platform name. This is necessary due to
         # Parallel being unable to handle exceptions
 
+ECR_LOGGED_IN = False
+def _ecr_login(registry):
+    """
+    Use the AWS CLI to get credentials to login to ECR.
+    """
+    # extract region from registry
+    global ECR_LOGGED_IN
+    if ECR_LOGGED_IN:
+        return
+    regionMatch = re.match(r'.*?\.dkr\.ecr\.([a-z]+\-[a-z]+\-\d+)\.amazonaws\.com', registry)
+    assert(regionMatch)
+    region = regionMatch.group(1)
+    logging.info("Logging into ECR region %s using aws-cli..", region)
+    os.system("$(aws ecr get-login --region "+region+" --no-include-email)")
+    ECR_LOGGED_IN = True
 
 def _upload_image(registry, docker_tag, image_id) -> None:
     """
-    Upload the passed image by id, tag it with docker tag and upload to S3 bucket
+    Upload the passed image by id, tag it with docker tag and upload to docker registry.
     :param registry: Docker registry name
     :param docker_tag: Docker tag
     :param image_id: Image id
     :return: None
     """
+    if "dkr.ecr" in registry:
+        _ecr_login(registry)
+
     # We don't have to retag the image since it is already in the right format
     logging.info('Uploading %s (%s) to %s', docker_tag, image_id, registry)
     push_cmd = ['docker', 'push', docker_tag]
@@ -125,6 +144,9 @@ def load_docker_cache(registry, docker_tag) -> None:
         return
     assert docker_tag
 
+    if "dkr.ecr" in registry:
+        _ecr_login(registry)
+
     logging.info('Loading Docker cache for %s from %s', docker_tag, registry)
     pull_cmd = ['docker', 'pull', docker_tag]
 
@@ -185,15 +207,19 @@ def script_name() -> str:
 
     platforms = build_util.get_platforms()
 
-    secret_name = os.environ['DOCKERHUB_SECRET_NAME']
-    endpoint_url = os.environ['DOCKERHUB_SECRET_ENDPOINT_URL']
-    region_name = os.environ['DOCKERHUB_SECRET_ENDPOINT_REGION']
-
-    try:
-        login_dockerhub(secret_name, endpoint_url, region_name)
+    if "dkr.ecr" in args.docker_registry:
+        _ecr_login(args.docker_registry)
         return build_save_containers(platforms=platforms, registry=args.docker_registry, load_cache=True)
-    finally:
-        logout_dockerhub()
+    else:
+        secret_name = os.environ['DOCKERHUB_SECRET_NAME']
+        endpoint_url = os.environ['DOCKERHUB_SECRET_ENDPOINT_URL']
+        region_name = os.environ['DOCKERHUB_SECRET_ENDPOINT_REGION']
+
+        try:
+            login_dockerhub(secret_name, endpoint_url, region_name)
+            return build_save_containers(platforms=platforms, registry=args.docker_registry, load_cache=True)
+        finally:
+            logout_dockerhub()
 
 
 if __name__ == '__main__':
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index 499cc8db79b6..1c5ebaace976 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -769,7 +769,7 @@ def test_unix_python3_gpu_cu110() {
 
 def test_unix_python3_quantize_gpu() {
     return ['Python3: Quantize GPU': {
-      node(NODE_LINUX_GPU_P3) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-quantize-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             try {
@@ -787,7 +787,7 @@ def test_unix_python3_quantize_gpu() {
 
 def test_unix_python3_quantize_gpu_cu110() {
     return ['Python3+CUDA11.0: Quantize GPU': {
-      node(NODE_LINUX_GPU_P3) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-quantize-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             try {
@@ -903,7 +903,7 @@ def test_unix_python3_mkldnn_nocudnn_gpu() {
 
 def test_unix_python3_tensorrt_gpu() {
     return ['Python3: TensorRT GPU': {
-      node(NODE_LINUX_GPU_P3) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/build-tensorrt') {
           timeout(time: max_time, unit: 'MINUTES') {
             try {
diff --git a/ci/jenkins/Jenkinsfile_full b/ci/jenkins/Jenkinsfile_full
index fcdb4c2a5eeb..415bd7b8dde0 100644
--- a/ci/jenkins/Jenkinsfile_full
+++ b/ci/jenkins/Jenkinsfile_full
@@ -31,7 +31,6 @@ def buildJobs = [
     'miscellaneous',
     'unix-cpu',
     'unix-gpu',
-    'unix-gpu-cu110',
     'website',
     'windows-cpu',
     'windows-gpu'
diff --git a/ci/safe_docker_run.py b/ci/safe_docker_run.py
deleted file mode 100755
index 97ece4aecd2f..000000000000
--- a/ci/safe_docker_run.py
+++ /dev/null
@@ -1,248 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Docker command wrapper to guard against Zombie containers
-"""
-
-import argparse
-import atexit
-import logging
-import os
-import signal
-import sys
-from functools import reduce
-from itertools import chain
-from typing import Dict, Any
-
-import docker
-from docker.errors import NotFound
-from docker.models.containers import Container
-
-from util import config_logging
-
-DOCKER_STOP_TIMEOUT_SECONDS = 3
-CONTAINER_WAIT_SECONDS = 600
-
-
-class SafeDockerClient:
-    """
-    A wrapper around the docker client to ensure that no zombie containers are left hanging around
-    in case the script is not allowed to finish normally
-    """
-
-    @staticmethod
-    def _trim_container_id(cid):
-        """:return: trimmed container id"""
-        return cid[:12]
-
-    def __init__(self):
-        self._docker_client = docker.from_env()
-        self._containers = set()
-        self._docker_stop_timeout = DOCKER_STOP_TIMEOUT_SECONDS
-        self._container_wait_seconds = CONTAINER_WAIT_SECONDS
-
-        def signal_handler(signum, _):
-            signal.pthread_sigmask(signal.SIG_BLOCK, {signum})
-            logging.warning("Signal %d received, cleaning up...", signum)
-            self._clean_up()
-            logging.warning("done. Exiting with error.")
-            sys.exit(1)
-
-        atexit.register(self._clean_up)
-        signal.signal(signal.SIGTERM, signal_handler)
-        signal.signal(signal.SIGINT, signal_handler)
-
-    def _clean_up(self):
-        if self._containers:
-            logging.warning("Cleaning up containers")
-        else:
-            return
-        # noinspection PyBroadException
-        try:
-            stop_timeout = int(os.environ.get("DOCKER_STOP_TIMEOUT", self._docker_stop_timeout))
-        except Exception:
-            stop_timeout = 3
-        for container in self._containers:
-            try:
-                container.stop(timeout=stop_timeout)
-                logging.info("☠: stopped container %s", self._trim_container_id(container.id))
-                container.remove()
-                logging.info("🚽: removed container %s", self._trim_container_id(container.id))
-            except Exception as e:
-                logging.exception(e)
-        self._containers.clear()
-        logging.info("Cleaning up containers finished.")
-
-    def _add_container(self, container: Container) -> Container:
-        self._containers.add(container)
-        return container
-
-    def _remove_container(self, container: Container):
-        self._containers.remove(container)
-
-    def run(self, *args, **kwargs) -> int:
-        if "detach" in kwargs and kwargs.get("detach") is False:
-            raise ValueError("Can only safe run with 'detach' set to True")
-        else:
-            kwargs["detach"] = True
-
-        # These variables are passed to the container so the process tree killer can find runaway
-        # process inside the container
-        # https://wiki.jenkins.io/display/JENKINS/ProcessTreeKiller
-        # https://github.com/jenkinsci/jenkins/blob/578d6bacb33a5e99f149de504c80275796f0b231/core/src/main/java/hudson/model/Run.java#L2393
-        if "environment" not in kwargs:
-            kwargs["environment"] = {}
-
-        jenkins_env_vars = ["BUILD_NUMBER", "BUILD_ID", "BUILD_TAG"]
-        kwargs["environment"].update({k: os.environ[k] for k in jenkins_env_vars if k in os.environ})
-
-        ret = 0
-        try:
-            # Race condition:
-            # If the call to docker_client.containers.run is interrupted, it is possible that
-            # the container won't be cleaned up. We avoid this by temporarily masking the signals.
-            signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGINT, signal.SIGTERM})
-            container = self._add_container(self._docker_client.containers.run(*args, **kwargs))
-            signal.pthread_sigmask(signal.SIG_UNBLOCK, {signal.SIGINT, signal.SIGTERM})
-            logging.info("Started container: %s", self._trim_container_id(container.id))
-            stream = container.logs(stream=True, stdout=True, stderr=True)
-            sys.stdout.flush()
-            for chunk in stream:
-                sys.stdout.buffer.write(chunk)
-                sys.stdout.buffer.flush()
-            sys.stdout.flush()
-            stream.close()
-
-            try:
-                logging.info("Waiting for status of container %s for %d s.",
-                             self._trim_container_id(container.id),
-                             self._container_wait_seconds)
-                wait_result = container.wait(timeout=self._container_wait_seconds)
-                logging.info("Container exit status: %s", wait_result)
-                ret = wait_result.get('StatusCode', 200)
-                if ret != 0:
-                    logging.error("Container exited with an error 😞")
-                    logging.info("Executed command for reproduction:\n\n%s\n", " ".join(sys.argv))
-                else:
-                    logging.info("Container exited with success 👍")
-                    logging.info("Executed command for reproduction:\n\n%s\n", " ".join(sys.argv))
-            except Exception as err:
-                logging.exception(err)
-                return 150
-
-            try:
-                logging.info("Stopping container: %s", self._trim_container_id(container.id))
-                container.stop()
-            except Exception as e:
-                logging.exception(e)
-                ret = 151
-
-            try:
-                logging.info("Removing container: %s", self._trim_container_id(container.id))
-                container.remove()
-            except Exception as e:
-                logging.exception(e)
-                ret = 152
-            self._remove_container(container)
-            containers = self._docker_client.containers.list()
-            if containers:
-                logging.info("Other running containers: %s", [self._trim_container_id(x.id) for x in containers])
-        except NotFound as e:
-            logging.info("Container was stopped before cleanup started: %s", e)
-
-        return ret
-
-
-def _volume_mount(volume_dfn: str) -> Dict[str, Any]:
-    """
-    Converts docker volume mount format, e.g. docker run --volume /local/path:/container/path:ro
-    to an object understood by the python docker library, e.g. {"local/path": {"bind": "/container/path", "mode": "ro"}}
-    This is used by the argparser for automatic conversion and input validation.
-    If the mode is not specified, 'rw' is assumed.
-    :param volume_dfn: A string to convert to a volume mount object in the format <local path>:<container path>[:ro|rw]
-    :return: An object in the form {"<local path>" : {"bind": "<container path>", "mode": "rw|ro"}}
-    """
-    if volume_dfn is None:
-        raise argparse.ArgumentTypeError("Missing value for volume definition")
-
-    parts = volume_dfn.split(":")
-
-    if len(parts) < 2 or len(parts) > 3:
-        raise argparse.ArgumentTypeError("Invalid volume definition {}".format(volume_dfn))
-
-    mode = "rw"
-    if len(parts) == 3:
-        mode = parts[2]
-
-    if mode not in ["rw", "ro"]:
-        raise argparse.ArgumentTypeError("Invalid volume mount mode {} in volume definition {}".format(mode, volume_dfn))
-
-    return {parts[0]: {"bind": parts[1], "mode": mode}}
-
-
-def main(command_line_arguments):
-    config_logging()
-
-    parser = argparse.ArgumentParser(
-        description="""Wrapper around docker run that protects against Zombie containers""", epilog="")
-
-    parser.add_argument("-u", "--user",
-                        help="Username or UID (format: <name|uid>[:<group|gid>])",
-                        default=None)
-
-    parser.add_argument("-v", "--volume",
-                        action='append',
-                        type=_volume_mount,
-                        help="Bind mount a volume",
-                        default=[])
-
-    parser.add_argument("--cap-add",
-                        help="Add Linux capabilities",
-                        action="append",
-                        type=str,
-                        default=[])
-
-    parser.add_argument("--runtime",
-                        help="Runtime to use for this container",
-                        default=None)
-
-    parser.add_argument("--name",
-                        help="Assign a name to the container",
-                        default=None)
-
-    parser.add_argument("image", metavar="IMAGE")
-    parser.add_argument("command", metavar="COMMAND")
-    parser.add_argument("args", nargs='*', metavar="ARG")
-
-    args = parser.parse_args(args=command_line_arguments)
-    docker_client = SafeDockerClient()
-    return docker_client.run(args.image, **{
-        "command": " ".join(list(chain([args.command] + args.args))),
-        "user": args.user,
-        "runtime": args.runtime,
-        "name": args.name,
-        "volumes": reduce(lambda dct, v: {**dct, **v}, args.volume, {}),
-        "cap_add": args.cap_add
-    })
-
-
-if __name__ == "__main__":
-    exit(main(sys.argv[1:]))
diff --git a/ci/test_safe_docker_run.py b/ci/test_safe_docker_run.py
deleted file mode 100644
index 433d42e8b2ea..000000000000
--- a/ci/test_safe_docker_run.py
+++ /dev/null
@@ -1,427 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Safe docker run tests
-"""
-import itertools
-import os
-import signal
-import unittest
-from typing import Optional
-from unittest.mock import create_autospec, patch, call
-
-from docker import DockerClient
-from docker.models.containers import Container, ContainerCollection
-
-from safe_docker_run import SafeDockerClient, main
-
-
-def create_mock_container(status_code: int = 0):
-    """
-    Creates a mock docker container that exits with the specified status code
-    """
-    mock_container = create_autospec(Container, name="mock_container")
-    mock_container.wait.return_value = {
-        "StatusCode": status_code
-    }
-    return mock_container
-
-
-def create_mock_container_collection(container: Container):
-    """
-    Creates a mock ContainerCollection that return the supplied container when the 'run' method is called
-    """
-    mock_container_collection = create_autospec(ContainerCollection, name="mock_collection")
-    mock_container_collection.run.return_value = container
-    return mock_container_collection
-
-
-class MockDockerClient:
-    """
-    A mock DockerClient when docker.from_env is called
-    The supplied container will be returned when the client.containers.run method is called
-    """
-    def __init__(self, container: Container):
-        self._mock_client = create_autospec(DockerClient, name="mock_client")
-        self._mock_client.containers = create_mock_container_collection(container)
-        self._patch = patch("docker.from_env", return_value=self._mock_client)
-
-    def __enter__(self):
-        self._patch.start()
-        return self._mock_client
-
-    def __exit__(self, _, __, ___):
-        self._patch.stop()
-
-
-class TestSafeDockerRun(unittest.TestCase):
-
-    @patch("safe_docker_run.signal.pthread_sigmask")
-    @patch.dict(os.environ, {
-        "BUILD_NUMBER": "BUILD_NUMBER_5",
-        "BUILD_ID": "BUILD_ID_1",
-        "BUILD_TAG": "BUILD_TAG_7"
-    })
-    def test_run_successful(self, mock_pthread_sigmask):
-        """
-        Tests successful run
-        """
-        mock_container = create_mock_container()
-
-        with MockDockerClient(mock_container) as mock_client:
-            safe_docker = SafeDockerClient()
-
-            # Check return code is 0
-            assert safe_docker.run("image", "command") == 0
-
-            # Check call to container is correct
-            assert mock_client.containers.run.call_args_list == [
-                call("image", "command", detach=True, environment={
-                    "BUILD_NUMBER": "BUILD_NUMBER_5",
-                    "BUILD_ID": "BUILD_ID_1",
-                    "BUILD_TAG": "BUILD_TAG_7"
-                })
-            ]
-
-            # Check correct signals are blocked then unblocked
-            assert mock_pthread_sigmask.call_args_list == [
-                call(signal.SIG_BLOCK, {signal.SIGINT, signal.SIGTERM}),
-                call(signal.SIG_UNBLOCK, {signal.SIGINT, signal.SIGTERM})
-            ]
-
-            # Assert container is stopped and removed
-            assert mock_container.stop.call_count == 1
-            assert mock_container.remove.call_count == 1
-            assert len(safe_docker._containers) == 0
-
-    def test_run_detach(self):
-        """
-        Tests detach=True is passed to the underlying call by default
-        """
-        mock_container = create_mock_container()
-
-        # Test detach=True is passed in even if not specified
-        with MockDockerClient(mock_container) as mock_client:
-            safe_docker = SafeDockerClient()
-            assert safe_docker.run("image", "command") == 0
-            assert mock_client.containers.run.call_count == 1
-            _, kwargs = mock_client.containers.run.call_args
-            assert kwargs["detach"] is True
-
-        # Test passing in detach=True does not cause any issues
-        with MockDockerClient(mock_container) as mock_client:
-            safe_docker = SafeDockerClient()
-            assert safe_docker.run("image", "command", detach=True) == 0
-            assert mock_client.containers.run.call_count == 1
-            _, kwargs = mock_client.containers.run.call_args
-            assert kwargs["detach"] is True
-
-        # Test detach=False fails
-        with MockDockerClient(mock_container) as mock_client:
-            safe_docker = SafeDockerClient()
-            with self.assertRaises(ValueError):
-                safe_docker.run("image", "command", detach=False)
-                assert mock_client.containers.run.call_args_list == []
-
-    def test_jenkins_vars(self):
-        """
-        Tests jenkins environment variables are appropriately passed to the underlying docker run call
-        """
-        # NOTE: It's important that these variables are passed to the underlying docker container
-        # These variables are passed to the container so the process tree killer can find runaway
-        # process inside the container
-        # https://wiki.jenkins.io/display/JENKINS/ProcessTreeKiller
-        # https://github.com/jenkinsci/jenkins/blob/578d6bacb33a5e99f149de504c80275796f0b231/core/src/main/java/hudson/model/Run.java#L2393
-
-        jenkins_vars = {
-            "BUILD_NUMBER": "BUILD_NUMBER_5",
-            "BUILD_ID": "BUILD_ID_1",
-            "BUILD_TAG": "BUILD_TAG_7"
-        }
-        mock_container = create_mock_container()
-
-        # Test environment is empty if the jenkins vars are not present
-        with MockDockerClient(mock_container) as mock_client:
-            safe_docker = SafeDockerClient()
-            assert safe_docker.run("image", "command") == 0
-            assert mock_client.containers.run.call_count == 1
-            _, kwargs = mock_client.containers.run.call_args
-            assert kwargs["environment"] == {}
-
-        # Test environment contains jenkins env vars if they are present
-        with MockDockerClient(mock_container) as mock_client:
-            with patch.dict(os.environ, jenkins_vars):
-                safe_docker = SafeDockerClient()
-                assert safe_docker.run("image", "command") == 0
-                assert mock_client.containers.run.call_count == 1
-                _, kwargs = mock_client.containers.run.call_args
-                assert kwargs["environment"] == jenkins_vars
-
-        # Test jenkins env vars are added to callers env vars
-        user_env = {"key1": "value1", "key2": "value2"}
-        with MockDockerClient(mock_container) as mock_client:
-            with patch.dict(os.environ, jenkins_vars):
-                safe_docker = SafeDockerClient()
-                assert safe_docker.run("image", "command", environment=user_env) == 0
-                assert mock_client.containers.run.call_count == 1
-                _, kwargs = mock_client.containers.run.call_args
-                assert kwargs["environment"] == {**jenkins_vars, **user_env}
-
-    def test_run_args_kwargs_passed(self):
-        """
-        Tests args and kwargs are passed to the container run call
-        """
-        mock_container = create_mock_container()
-
-        # Test detach=True is passed in even if not specified
-        with MockDockerClient(mock_container) as mock_client:
-            safe_docker = SafeDockerClient()
-            assert safe_docker.run(
-                "image",
-                "command",
-                "another_arg",
-                str_param="value",
-                bool_param=True,
-                none_param=None,
-                int_param=5,
-                float_param=5.2,
-                list_param=["this", "is", "a", "list"],
-                map_param={
-                    "a": "5",
-                    "b": True,
-                    "c": 2
-                }) == 0
-            assert mock_client.containers.run.call_args_list == [
-                call(
-                    "image",
-                    "command",
-                    "another_arg",
-                    detach=True,
-                    environment={},
-                    str_param="value",
-                    bool_param=True,
-                    none_param=None,
-                    int_param=5,
-                    float_param=5.2,
-                    list_param=["this", "is", "a", "list"],
-                    map_param={
-                        "a": "5",
-                        "b": True,
-                        "c": 2
-                    }
-                )
-            ]
-
-    def test_container_returns_non_zero_status_code(self):
-        """
-        Tests non-zero code from container is returned and the container
-        is cleaned up
-        """
-        mock_container = create_mock_container(status_code=10)
-        with MockDockerClient(mock_container):
-            safe_docker = SafeDockerClient()
-            # check return code and that container gets cleaned up
-            assert safe_docker.run("image", "command") == 10
-            assert mock_container.stop.call_count == 1
-            assert mock_container.remove.call_count == 1
-            assert len(safe_docker._containers) == 0
-
-    def test_container_wait_raises_returns_150(self):
-        """
-        Tests 150 is returned if an error is raised when calling container.wait
-        """
-        mock_container = create_mock_container()
-        mock_container.wait.side_effect = RuntimeError("Something bad happened")
-        with MockDockerClient(mock_container):
-            safe_docker = SafeDockerClient()
-            assert safe_docker.run("image", "command") == 150
-
-    def test_container_stop_raises_returns_151(self):
-        """
-        Tests 151 is returned if an error is raised when calling container.stop
-        """
-        mock_container = create_mock_container()
-        mock_container.stop.side_effect = RuntimeError("Something bad happened")
-        with MockDockerClient(mock_container):
-            safe_docker = SafeDockerClient()
-            assert safe_docker.run("image", "command") == 151
-
-    def test_container_remove_raises_returns_152(self):
-        """
-        Tests 152 is returned if an error is raised when calling container.remove
-        """
-        mock_container = create_mock_container()
-        mock_container.remove.side_effect = RuntimeError("Something bad happened")
-        with MockDockerClient(mock_container):
-            safe_docker = SafeDockerClient()
-            assert safe_docker.run("image", "command") == 152
-
-    def test_main(self):
-        """
-        Tests main function against different command line arguments
-        """
-        tests = [
-            # ( supplied command line arguments, expected call )
-            (
-                ["image", "command"],
-                call("image", command="command", runtime=None, user=None, name=None, volumes={}, cap_add=[])
-            ),
-            (
-                ["image", "command", "arg1", "arg2"],
-                call("image", command="command arg1 arg2", runtime=None, user=None, name=None, volumes={}, cap_add=[])
-            ),
-            (
-                ["--runtime", "nvidia", "image", "command"],
-                call("image", command="command", runtime="nvidia", user=None, name=None, volumes={}, cap_add=[])
-            ),
-            (
-                ["--user", "1001:1001", "image", "command"],
-                call("image", command="command", runtime=None, user="1001:1001", name=None, volumes={}, cap_add=[])
-            ),
-            ([
-                "--volume", "/local/path1:/container/path1",
-                "--volume", "/local/path2:/container/path2:ro",
-                "image",
-                "command"
-            ], call("image", command="command", runtime=None, user=None, name=None, volumes={
-                "/local/path1": {
-                    "bind": "/container/path1",
-                    "mode": "rw"
-                },
-                "/local/path2": {
-                    "bind": "/container/path2",
-                    "mode": "ro"
-                }
-            }, cap_add=[])),
-            ([
-                "--runtime", "nvidia",
-                "-u", "1001:1001",
-                "-v", "/local/path1:/container/path1",
-                "-v", "/local/path2:/container/path2:ro",
-                "--cap-add", "bob",
-                "--cap-add", "jimmy",
-                "--name",
-                "container_name",
-                "image",
-                "command",
-                "arg1",
-                "arg2"
-            ], call(
-                "image",
-                command="command arg1 arg2",
-                runtime="nvidia",
-                user="1001:1001",
-                name="container_name",
-                volumes={
-                    "/local/path1": {
-                        "bind": "/container/path1",
-                        "mode": "rw"
-                    },
-                    "/local/path2": {
-                        "bind": "/container/path2",
-                        "mode": "ro"
-                    }
-                }, cap_add=["bob", "jimmy"])
-            )
-        ]
-
-        # Tests valid arguments
-        mock_docker = create_autospec(SafeDockerClient)
-        mock_docker.run.return_value = 0
-        with patch("safe_docker_run.SafeDockerClient", return_value=mock_docker):
-            for test in tests:
-                arguments, expected_call = test
-                main(arguments)
-                assert mock_docker.run.call_args == expected_call
-
-        # Tests invalid arguments
-        tests = [
-            [],
-            None,
-            ["image"],
-            # Test some bad volume mounts
-            ["-v", "bob", "image", "args"],
-            ["-v", "/local/path", "image", "args"],
-            ["-v", "/local/path:/container/path:blah", "image", "args"],
-            ["-v", "", "image", "args"],
-            ["-v", "a:b:c:d", "image", "args"]
-        ]
-
-        mock_docker = create_autospec(SafeDockerClient)
-        with patch("safe_docker_run.SafeDockerClient", return_value=mock_docker):
-            with self.assertRaises(SystemExit):
-                for test in tests:
-                    main(test)
-
-    def test_clean_up(self):
-        """
-        Tests container clean up in case of SIGTERM and SIGINT
-        """
-        import subprocess
-        import time
-        import docker.errors
-
-        docker_client = docker.from_env()
-        container_name = "safedockertestcontainer1234"
-
-        def get_container(name: str) -> Optional[Container]:
-            try:
-                return docker_client.containers.get(name)
-            except docker.errors.NotFound:
-                return None
-
-        def remove_container_if_exists(name: str):
-            container = get_container(name)
-            if container:
-                container.stop()
-                container.remove()
-
-        def wait_for_container(name: str) -> bool:
-            for _ in itertools.count(5):
-                if get_container(name):
-                    return True
-                time.sleep(1)
-            return False
-
-        # Clear any containers with container name
-        remove_container_if_exists(container_name)
-
-        # None => not signal is emitted - we should still finish with no containers at the end due
-        # to the atexit
-        for sig in [None, signal.SIGTERM, signal.SIGINT]:
-            # Execute the safe docker run script in a different process
-            proc = subprocess.Popen(['./safe_docker_run.py', "--name", container_name, "ubuntu:18.04", "sleep 10"])
-            # NOTE: we need to wait for the container to come up as not all operating systems support blocking signals
-            if wait_for_container(container_name) is False:
-                raise RuntimeError("Test container did not come up")
-
-            # Issue the signal and wait for the process to finish
-            if sig:
-                proc.send_signal(sig)
-            proc.wait()
-
-            # The container should no longer exist
-            assert get_container(container_name) is None
-
-
-if __name__ == '__main__':
-    import nose
-    nose.main()
diff --git a/python/mxnet/gluon/data/dataloader.py b/python/mxnet/gluon/data/dataloader.py
index d34148417355..c4d319fcf58b 100644
--- a/python/mxnet/gluon/data/dataloader.py
+++ b/python/mxnet/gluon/data/dataloader.py
@@ -607,6 +607,10 @@ def __init__(self, dataset, batch_size=None, shuffle=False, sampler=None,
         self._num_workers = num_workers if num_workers >= 0 else 0
         self._worker_pool = None
         self._prefetch = max(0, int(prefetch) if prefetch is not None else 2 * self._num_workers)
+        nd.waitall()
+        import gc
+        gc.collect()
+        nd.waitall()
         if self._num_workers > 0:
             if self._thread_pool:
                 self._worker_pool = ThreadPool(self._num_workers,
diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py
index fd8abf1849be..b675aab344ec 100644
--- a/tests/python/unittest/test_numpy_interoperability.py
+++ b/tests/python/unittest/test_numpy_interoperability.py
@@ -3035,6 +3035,8 @@ def check_interoperability(op_list):
             continue
         if name in ['shares_memory', 'may_share_memory', 'empty_like']:  # skip list
             continue
+        if name in ['delete']: # https://github.com/apache/incubator-mxnet/issues/18600
+            continue
         if name in ['full_like', 'zeros_like', 'ones_like'] and \
                 StrictVersion(platform.python_version()) < StrictVersion('3.0.0'):
             continue
diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index 4bdaf5203ef1..ff4c0ac2490c 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -3573,6 +3573,7 @@ def hybrid_forward(self, F, x):
 
 @with_seed()
 @use_np
+@unittest.skip('https://github.com/apache/incubator-mxnet/issues/18600')
 def test_np_delete():
     class TestDelete(HybridBlock):
         def __init__(self, obj, axis=None):
diff --git a/tools/setup_gpu_build_tools.sh b/tools/setup_gpu_build_tools.sh
index 05323f95a504..754022a9b516 100755
--- a/tools/setup_gpu_build_tools.sh
+++ b/tools/setup_gpu_build_tools.sh
@@ -42,7 +42,7 @@ elif [[ $VARIANT == cu102* ]]; then
     CUDA_VERSION='10.2.89-1'
     CUDA_PATCH_VERSION='10.2.2.89-1'
     LIBCUDA_VERSION='440.33.01-0ubuntu1'
-    LIBCUDNN_VERSION='7.6.5.32-1+cuda10.2'
+    LIBCUDNN_VERSION='8.0.4.30-1+cuda10.2'
     LIBNCCL_VERSION='2.5.6-1+cuda10.2'
 elif [[ $VARIANT == cu101* ]]; then
     CUDA_VERSION='10.1.105-1'