From 8ab2d3b8bebcd8cdab417f9687c6052294ab7ecb Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Tue, 22 Oct 2019 16:40:14 -0700 Subject: [PATCH 1/2] RNNOp to call cudaEventCreate lazily --- src/operator/rnn-inl.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/operator/rnn-inl.h b/src/operator/rnn-inl.h index cf6fe10fd328..ead7501a48b0 100644 --- a/src/operator/rnn-inl.h +++ b/src/operator/rnn-inl.h @@ -493,7 +493,6 @@ class RNNOp { CUDNN_CALL(cudnnCreateRNNDescriptor(&rnn_desc_)); CUDNN_CALL(cudnnCreateDropoutDescriptor(&dropout_desc_)); - CUDA_CALL(cudaEventCreateWithFlags(&dgrad_sync_event_, cudaEventDisableTiming)); #if MXNET_USE_CUDNN_GE_7200 CUDNN_CALL(cudnnCreateRNNDataDescriptor(&x_data_desc_)); @@ -538,7 +537,8 @@ class RNNOp { CUDNN_CALL(cudnnDestroyFilterDescriptor(dw_desc_)); CUDNN_CALL(cudnnDestroyRNNDescriptor(rnn_desc_)); CUDNN_CALL(cudnnDestroyDropoutDescriptor(dropout_desc_)); - CUDA_CALL(cudaEventDestroy(dgrad_sync_event_)); + if (dgrad_sync_event_created_) + CUDA_CALL(cudaEventDestroy(dgrad_sync_event_)); if (init_cudnn_) { for (size_t i = 0; i < x_desc_vec_.size(); ++i) { @@ -1502,6 +1502,10 @@ class RNNOp { if (CUDNN_VERSION <= 7604 && dgrad_sync_needed_) { // Without blocking the CPU, create a synchronization point of all current GPU activity. No // need to call cudaStreamWaitEvent- cudaEventRecord on the legacy default stream suffices. + if (!dgrad_sync_event_created_) { + CUDA_CALL(cudaEventCreateWithFlags(&dgrad_sync_event_, cudaEventDisableTiming)); + dgrad_sync_event_created_ = true; + } CUDA_CALL(cudaEventRecord(dgrad_sync_event_, cudaStreamLegacy)); } } @@ -1535,6 +1539,7 @@ class RNNOp { cudnnTensorFormat_t format_; cudaEvent_t dgrad_sync_event_; + bool dgrad_sync_event_created_ = false; bool dgrad_sync_needed_ = false; #endif // MXNET_USE_CUDNN bool init_space_, temp_init_space_; From edd5cc97f0911ffe46c3fd60d7c91d8909895d17 Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Tue, 22 Oct 2019 19:07:48 -0700 Subject: [PATCH 2/2] Saw unrelated failure. Trigger CI.