From 016b3b25c797782db93931ba41095550081ce53d Mon Sep 17 00:00:00 2001 From: Jake Lee Date: Fri, 17 May 2019 10:34:01 -0700 Subject: [PATCH] [Dependency Update] Upgrade CI to use latest cuDNN (#14950) * bump up the cuDNN version * fall back to fp32 * add fallback data type * remove the duplicate one * reenable the memory check * add comment * fix the linter * merge the conflict * retrigger CI --- ci/docker/Dockerfile.build.centos7_gpu | 2 +- ci/docker/Dockerfile.build.ubuntu_base_gpu | 2 ++ ci/docker/Dockerfile.build.ubuntu_build_cuda | 2 +- ci/docker/Dockerfile.build.ubuntu_gpu_cu100 | 3 +++ ci/docker/Dockerfile.build.ubuntu_gpu_cu90 | 4 ++++ ci/docker/Dockerfile.build.ubuntu_gpu_cu92 | 4 ++++ ci/docker/Dockerfile.build.ubuntu_nightly_gpu | 2 +- src/operator/rnn-inl.h | 15 ++++++++++++++- 8 files changed, 30 insertions(+), 4 deletions(-) diff --git a/ci/docker/Dockerfile.build.centos7_gpu b/ci/docker/Dockerfile.build.centos7_gpu index cf76f22a9f0a..0b2464fe8e36 100644 --- a/ci/docker/Dockerfile.build.centos7_gpu +++ b/ci/docker/Dockerfile.build.centos7_gpu @@ -29,7 +29,7 @@ RUN /work/centos7_ccache.sh COPY install/centos7_python.sh /work/ RUN /work/centos7_python.sh -ENV CUDNN_VERSION=7.3.1.20 +ENV CUDNN_VERSION=7.5.1.10 COPY install/centos7_cudnn.sh /work/ RUN /work/centos7_cudnn.sh diff --git a/ci/docker/Dockerfile.build.ubuntu_base_gpu b/ci/docker/Dockerfile.build.ubuntu_base_gpu index 476d882c6d76..adf9b2809f21 100644 --- a/ci/docker/Dockerfile.build.ubuntu_base_gpu +++ b/ci/docker/Dockerfile.build.ubuntu_base_gpu @@ -21,6 +21,8 @@ FROM nvidia/cuda:10.0-devel-ubuntu16.04 +ENV CUDNN_VERSION=7.5.1.10 + WORKDIR /work/deps RUN apt-get update && apt-get -y install sudo diff --git a/ci/docker/Dockerfile.build.ubuntu_build_cuda b/ci/docker/Dockerfile.build.ubuntu_build_cuda index 11fb5c0d1a62..0c1a3c33c121 100644 --- a/ci/docker/Dockerfile.build.ubuntu_build_cuda +++ b/ci/docker/Dockerfile.build.ubuntu_build_cuda @@ -23,7 +23,7 @@ FROM nvidia/cuda:10.0-devel-ubuntu16.04 -ENV CUDNN_VERSION=7.3.1.20 +ENV CUDNN_VERSION=7.5.1.10 WORKDIR /work/deps diff --git a/ci/docker/Dockerfile.build.ubuntu_gpu_cu100 b/ci/docker/Dockerfile.build.ubuntu_gpu_cu100 index edf0264b967e..c9908a5eda4e 100644 --- a/ci/docker/Dockerfile.build.ubuntu_gpu_cu100 +++ b/ci/docker/Dockerfile.build.ubuntu_gpu_cu100 @@ -77,6 +77,9 @@ ARG GROUP_ID=0 COPY install/ubuntu_adduser.sh /work/ RUN /work/ubuntu_adduser.sh +ENV CUDNN_VERSION=7.5.1.10 +COPY install/ubuntu_cudnn.sh /work/ +RUN /work/ubuntu_cudnn.sh COPY runtime_functions.sh /work/ diff --git a/ci/docker/Dockerfile.build.ubuntu_gpu_cu90 b/ci/docker/Dockerfile.build.ubuntu_gpu_cu90 index 2fb29774b0cf..16e8d998393a 100644 --- a/ci/docker/Dockerfile.build.ubuntu_gpu_cu90 +++ b/ci/docker/Dockerfile.build.ubuntu_gpu_cu90 @@ -77,6 +77,10 @@ ARG GROUP_ID=0 COPY install/ubuntu_adduser.sh /work/ RUN /work/ubuntu_adduser.sh +ENV CUDNN_VERSION=7.5.1.10 +COPY install/ubuntu_cudnn.sh /work/ +RUN /work/ubuntu_cudnn.sh + COPY runtime_functions.sh /work/ WORKDIR /work/mxnet diff --git a/ci/docker/Dockerfile.build.ubuntu_gpu_cu92 b/ci/docker/Dockerfile.build.ubuntu_gpu_cu92 index c297bf0b8a42..1618e68693e7 100644 --- a/ci/docker/Dockerfile.build.ubuntu_gpu_cu92 +++ b/ci/docker/Dockerfile.build.ubuntu_gpu_cu92 @@ -76,6 +76,10 @@ ARG GROUP_ID=0 COPY install/ubuntu_adduser.sh /work/ RUN /work/ubuntu_adduser.sh +ENV CUDNN_VERSION=7.5.1.10 +COPY install/ubuntu_cudnn.sh /work/ +RUN /work/ubuntu_cudnn.sh + COPY runtime_functions.sh /work/ WORKDIR /work/mxnet diff --git a/ci/docker/Dockerfile.build.ubuntu_nightly_gpu b/ci/docker/Dockerfile.build.ubuntu_nightly_gpu index cadb1dbaf771..82301bdbff6d 100644 --- a/ci/docker/Dockerfile.build.ubuntu_nightly_gpu +++ b/ci/docker/Dockerfile.build.ubuntu_nightly_gpu @@ -20,7 +20,7 @@ FROM nvidia/cuda:10.0-devel-ubuntu16.04 -ENV CUDNN_VERSION=7.3.1.20 +ENV CUDNN_VERSION=7.5.1.10 WORKDIR /work/deps diff --git a/src/operator/rnn-inl.h b/src/operator/rnn-inl.h index d164333953f2..e43b3c9b5131 100644 --- a/src/operator/rnn-inl.h +++ b/src/operator/rnn-inl.h @@ -1315,8 +1315,21 @@ class RNNOp { seed_)); // RNN descriptors + cudnnDataType_t dtype_with_fallback_; #if CUDNN_MAJOR >= 6 cudnnRNNAlgo_t rnn_algo = CUDNN_RNN_ALGO_STANDARD; + // On arch's 50 and 52(Maxwell), the gpu doesn't support native fp16 compute. + // Before cuDNN 7.5.0, when running fp16, cuDNN fallback to fp32 under the hood on Maxwell. + // That's not the case begining from 7.5.0. Thereby adding fallback explicitly here. + #if __CUDA_ARCH__ < 530 && CUDNN_MAJOR >=7 && CUDNN_MINOR >= 5 + if (dtype_ == CUDNN_DATA_HALF) { + dtype_with_fallback_ = CUDNN_DATA_FLOAT; + } else { + dtype_with_fallback_ = dtype_; + } + #else + dtype_with_fallback_ = dtype_; + #endif CUDNN_CALL(cudnnSetRNNDescriptor_v6(s->dnn_handle_, rnn_desc_, param_.state_size, @@ -1326,7 +1339,7 @@ class RNNOp { direction_, mode_, rnn_algo, - dtype_)); + dtype_with_fallback_)); #else CUDNN_CALL(cudnnSetRNNDescriptor(rnn_desc_, param_.state_size,