Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

cuda/cuDNN lib version checking. Force cuDNN v7 usage. #15449

Merged
merged 5 commits into from
Jul 12, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/faq/env_var.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,16 @@ If ctypes is used, it must be `mxnet._ctypes.ndarray.NDArrayBase`.
- If set to '0', disallows implicit type conversions to Float16 to use Tensor Cores
- If set to '1', allows CUDA ops like RNN and Convolution to use TensorCores even with Float32 input data by using implicit type casting to Float16. Only has an effect if `MXNET_CUDA_ALLOW_TENSOR_CORE` is `1`.

* MXNET_CUDA_VERSION_CHECKING
- 0(false) or 1(true) ```(default=1)```
- If set to '0', disallows various runtime checks of the cuda library version and associated warning messages.
- If set to '1', permits these checks (e.g. compile vs. link mismatch, old version no longer CI-tested)

* MXNET_CUDNN_VERSION_CHECKING
- 0(false) or 1(true) ```(default=1)```
- If set to '0', disallows various runtime checks of the cuDNN library version and associated warning messages.
- If set to '1', permits these checks (e.g. compile vs. link mismatch, old version no longer CI-tested)

* MXNET_GLUON_REPO
- Values: String ```(default='https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/'```
- The repository url to be used for Gluon datasets and pre-trained models.
Expand Down
8 changes: 4 additions & 4 deletions include/mxnet/resource.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ struct ResourceRequest {
kTempSpace,
/*! \brief common::RandGenerator<xpu> object, which can be used in GPU kernel functions */
kParallelRandom
#if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7
#if MXNET_USE_CUDNN == 1
,
/*! \brief cudnnDropoutDescriptor_t object for GPU dropout kernel functions */
kCuDNNDropoutDesc
#endif // MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7
#endif // MXNET_USE_CUDNN == 1
};
/*! \brief type of resources */
Type type;
Expand Down Expand Up @@ -162,7 +162,7 @@ struct Resource {
reinterpret_cast<DType*>(get_space_internal(shape.Size() * sizeof(DType))),
shape, shape[ndim - 1], stream);
}
#if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7
#if MXNET_USE_CUDNN == 1
/*!
* \brief Get cudnn dropout descriptor from shared state space.
*
Expand All @@ -175,7 +175,7 @@ struct Resource {
mshadow::Stream<gpu> *stream,
const float dropout,
uint64_t seed) const;
#endif // MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7
#endif // MXNET_USE_CUDNN == 1

/*!
* \brief Get CPU space as mshadow Tensor in specified type.
Expand Down
105 changes: 105 additions & 0 deletions src/common/cuda_utils.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* Copyright (c) 2019 by Contributors
* \file cuda_utils.cc
* \brief CUDA debugging utilities.
*/

#include <mxnet/base.h>
#include "cuda_utils.h"

#if MXNET_USE_CUDA == 1

namespace mxnet {
namespace common {
namespace cuda {

// The oldest version of cuda used in upstream MXNet CI testing, both for unix and windows.
// Users that have rebuilt MXNet against older versions will we advised with a warning to upgrade
// their systems to match the CI level. Minimally, users should rerun the CI locally.
#if defined(_MSC_VER)
#define MXNET_CI_OLDEST_CUDA_VERSION 9020
#else
#define MXNET_CI_OLDEST_CUDA_VERSION 10000
#endif

// Dynamic init here will emit a warning if runtime and compile-time cuda lib versions mismatch.
// Also if the user has recompiled their source to a version no longer tested by upstream CI.
bool cuda_version_check_performed = []() {
// Don't bother with checks if there are no GPUs visible (e.g. with CUDA_VISIBLE_DEVICES="")
if (dmlc::GetEnv("MXNET_CUDA_VERSION_CHECKING", true) && Context::GetGPUCount() > 0) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@DickJC123 we have detected an error when running a GPU compiled MXNet in a CPU machine, when building mxnet is loaded to generate the operator bindings. My colleague will fill a ticket about this. Would be great to have your guidance if the underlying cudaGetDeviceCount can run without driver, as the call is failing. Our thinking is that before we were not calling this cuda function on load time. I think a possible solution is to add a function that checks if GPUs are available if the GPU count can't be called without GPUs which is a bit puzzling.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

filed a issue on the same, this is breaking our internal build flows, where our buildfarm does not have GPU enabled machines, the GPU builds are also done on CPU machines, with CUDA installed on them, for build purposes.

// Not currently performing a runtime check of linked-against vs. compiled-against
// cuda runtime library, as major.minor must match for libmxnet.so to even load, per:
// https://docs.nvidia.com/deploy/cuda-compatibility/#binary-compatibility
if (CUDA_VERSION < MXNET_CI_OLDEST_CUDA_VERSION)
LOG(WARNING) << "Upgrade advisory: this mxnet has been built against cuda library version "
<< CUDA_VERSION << ", which is older than the oldest version tested by CI ("
<< MXNET_CI_OLDEST_CUDA_VERSION << "). "
<< "Set MXNET_CUDA_VERSION_CHECKING=0 to quiet this warning.";
}
return true;
}();

} // namespace cuda
} // namespace common
} // namespace mxnet

#endif // MXNET_USE_CUDA

#if MXNET_USE_CUDNN == 1

namespace mxnet {
namespace common {
namespace cudnn {

// The oldest version of CUDNN used in upstream MXNet CI testing, both for unix and windows.
// Users that have rebuilt MXNet against older versions will we advised with a warning to upgrade
// their systems to match the CI level. Minimally, users should rerun the CI locally.
#if defined(_MSC_VER)
#define MXNET_CI_OLDEST_CUDNN_VERSION 7600
#else
#define MXNET_CI_OLDEST_CUDNN_VERSION 7600
#endif

// Dynamic init here will emit a warning if runtime and compile-time cudnn lib versions mismatch.
// Also if the user has recompiled their source to a version no longer tested by upstream CI.
bool cudnn_version_check_performed = []() {
// Don't bother with checks if there are no GPUs visible (e.g. with CUDA_VISIBLE_DEVICES="")
if (dmlc::GetEnv("MXNET_CUDNN_VERSION_CHECKING", true) && Context::GetGPUCount() > 0) {
size_t linkedAgainstCudnnVersion = cudnnGetVersion();
if (linkedAgainstCudnnVersion != CUDNN_VERSION)
LOG(WARNING) << "cuDNN library mismatch: linked-against version " << linkedAgainstCudnnVersion
<< " != compiled-against version " << CUDNN_VERSION << ". "
<< "Set MXNET_CUDNN_VERSION_CHECKING=0 to quiet this warning.";
if (CUDNN_VERSION < MXNET_CI_OLDEST_CUDNN_VERSION)
LOG(WARNING) << "Upgrade advisory: this mxnet has been built against cuDNN library version "
<< CUDNN_VERSION << ", which is older than the oldest version tested by CI ("
<< MXNET_CI_OLDEST_CUDNN_VERSION << "). "
<< "Set MXNET_CUDNN_VERSION_CHECKING=0 to quiet this warning.";
}
return true;
}();

} // namespace cudnn
} // namespace common
} // namespace mxnet

#endif // MXNET_USE_CUDNN
27 changes: 27 additions & 0 deletions src/common/cuda_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,20 @@ extern __cuda_fake_struct threadIdx;
extern __cuda_fake_struct blockIdx;
#endif

#define QUOTE(x) #x
#define QUOTEVALUE(x) QUOTE(x)

#if MXNET_USE_CUDA

#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <curand.h>

#define STATIC_ASSERT_CUDA_VERSION_GE(min_version) \
static_assert(CUDA_VERSION >= min_version, "Compiled-against CUDA version " \
QUOTEVALUE(CUDA_VERSION) " is too old, please upgrade system to version " \
QUOTEVALUE(min_version) " or later.")

/*!
* \brief When compiling a __device__ function, check that the architecture is >= Kepler (3.0)
* Note that __CUDA_ARCH__ is not defined outside of a __device__ function
Expand Down Expand Up @@ -441,6 +449,25 @@ inline cublasMath_t SetCublasMathMode(cublasHandle_t blas_handle, cublasMath_t n

#include <cudnn.h>

// Creating CUDNN_VERSION_AS_STRING as follows avoids a static_assert error message that shows
// the formula for CUDNN_VERSION, i.e. "1000 * 7 + 100 * 6 + 0" rather than number "7600".
static_assert(CUDNN_PATCHLEVEL < 100 && CUDNN_MINOR < 10,
"CUDNN_VERSION_AS_STRING macro assumptions violated.");
#if CUDNN_PATCHLEVEL >= 10
#define CUDNN_VERSION_AS_STRING QUOTEVALUE(CUDNN_MAJOR) \
QUOTEVALUE(CUDNN_MINOR) \
QUOTEVALUE(CUDNN_PATCHLEVEL)
#else
#define CUDNN_VERSION_AS_STRING QUOTEVALUE(CUDNN_MAJOR) \
QUOTEVALUE(CUDNN_MINOR) \
"0" QUOTEVALUE(CUDNN_PATCHLEVEL)
#endif

#define STATIC_ASSERT_CUDNN_VERSION_GE(min_version) \
static_assert(CUDNN_VERSION >= min_version, "Compiled-against cuDNN version " \
CUDNN_VERSION_AS_STRING " is too old, please upgrade system to version " \
QUOTEVALUE(min_version) " or later.")

#define CUDNN_CALL(func) \
{ \
cudnnStatus_t e = (func); \
Expand Down
22 changes: 12 additions & 10 deletions src/resource.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@ class ResourceManagerImpl : public ResourceManager {
gpu_temp_space_copy_ = dmlc::GetEnv("MXNET_GPU_TEMP_COPY", 1);
cpu_native_rand_copy_ = dmlc::GetEnv("MXNET_CPU_PARALLEL_RAND_COPY", 1);
gpu_native_rand_copy_ = dmlc::GetEnv("MXNET_GPU_PARALLEL_RAND_COPY", 4);
#if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7
#if MXNET_USE_CUDNN == 1
gpu_cudnn_dropout_state_copy_ = dmlc::GetEnv("MXNET_GPU_CUDNN_DROPOUT_STATE_COPY", 4);
#endif // MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7
#endif // MXNET_USE_CUDNN == 1
engine_ref_ = Engine::_GetSharedRef();
storage_ref_ = Storage::_GetSharedRef();
cpu_rand_.reset(new ResourceRandom<cpu>(
Expand All @@ -113,9 +113,9 @@ class ResourceManagerImpl : public ResourceManager {
gpu_rand_.Clear();
gpu_space_.Clear();
gpu_parallel_rand_.Clear();
#if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7
#if MXNET_USE_CUDNN == 1
gpu_cudnn_dropout_state_.Clear();
#endif // MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7
#endif // MXNET_USE_CUDNN == 1
#endif
if (engine_ref_ != nullptr) {
engine_ref_ = nullptr;
Expand Down Expand Up @@ -153,14 +153,14 @@ class ResourceManagerImpl : public ResourceManager {
return new ResourceParallelRandom<gpu>(ctx, gpu_native_rand_copy_, global_seed_);
})->GetNext();
}
#if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7
#if MXNET_USE_CUDNN == 1
case ResourceRequest::kCuDNNDropoutDesc: {
return gpu_cudnn_dropout_state_.Get(ctx.dev_id, [ctx, this]() {
return new ResourceTempSpace<ResourceRequest::kCuDNNDropoutDesc>(
ctx, gpu_cudnn_dropout_state_copy_);
})->GetNext();
}
#endif // MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7
#endif // MXNET_USE_CUDNN == 1
default: LOG(FATAL) << "Unknown supported type " << req.type;
}
#else
Expand Down Expand Up @@ -399,13 +399,13 @@ class ResourceManagerImpl : public ResourceManager {
common::LazyAllocArray<ResourceTempSpace<ResourceRequest::kTempSpace>> gpu_space_;
/*! \brief GPU parallel (on device) random number resources */
common::LazyAllocArray<ResourceParallelRandom<gpu> > gpu_parallel_rand_;
#if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7
#if MXNET_USE_CUDNN == 1
/*! \brief number of copies in GPU cudnn dropout descriptor resources */
int gpu_cudnn_dropout_state_copy_;
/*! \brief GPU parallel (on device) random number resources */
common::LazyAllocArray<ResourceTempSpace<ResourceRequest::kCuDNNDropoutDesc>>
gpu_cudnn_dropout_state_;
#endif // MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7
#endif // MXNET_USE_CUDNN == 1
#endif
};
} // namespace resource
Expand All @@ -418,7 +418,7 @@ void* Resource::get_host_space_internal(size_t size) const {
return static_cast<resource::SpaceAllocator*>(ptr_)->GetHostSpace(size);
}

#if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7
#if MXNET_USE_CUDNN == 1
void Resource::get_cudnn_dropout_desc(
cudnnDropoutDescriptor_t* dropout_desc,
mshadow::Stream<gpu> *stream,
Expand All @@ -442,14 +442,16 @@ void Resource::get_cudnn_dropout_desc(
dropout_state_size,
seed));
} else {
// cudnnRestoreDropoutDescriptor() introduced with cuDNN v7
STATIC_ASSERT_CUDNN_VERSION_GE(7000);
CUDNN_CALL(cudnnRestoreDropoutDescriptor(*dropout_desc, stream->dnn_handle_,
dropout,
state_space->handle.dptr,
state_space->handle.size,
seed));
}
}
#endif // MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7
#endif // MXNET_USE_CUDNN == 1

ResourceManager* ResourceManager::Get() {
typedef dmlc::ThreadLocalStore<resource::ResourceManagerImpl> inst;
Expand Down