diff --git a/src/operator/rnn-inl.h b/src/operator/rnn-inl.h index 5eae413b078b..5652960baa43 100644 --- a/src/operator/rnn-inl.h +++ b/src/operator/rnn-inl.h @@ -409,10 +409,11 @@ class RNNOp { std::vector bias_memory; std::vector y_memory; std::vector hcy_memory; + size_t weights_version; bool has_cache; bool init_mem_; size_t reserve_mem_size_; - Storage::Handle mem_space_; + NDArray mem_space_; #endif explicit RNNOp(RNNParam param, Context ctx) { this->param_ = param; @@ -522,12 +523,6 @@ class RNNOp { } ~RNNOp() { -#if MXNET_USE_MKLDNN == 1 - if (init_mem_) { - Storage::Get()->Free(mem_space_); - init_mem_ = false; - } -#endif // MXNET_USE_MKLDNN #if MXNET_USE_CUDNN == 1 CUDNN_CALL(cudnnDestroyTensorDescriptor(hx_desc_)); CUDNN_CALL(cudnnDestroyTensorDescriptor(cx_desc_)); @@ -560,17 +555,6 @@ class RNNOp { CUDNN_CALL(cudnnDestroyRNNDataDescriptor(dy_data_desc_)); #endif // MXNET_USE_CUDNN_GE_7200 #endif // MXNET_USE_CUDNN - - if (ctx_.dev_type == kCPU) { - if (init_space_) { - Storage::Get()->Free(reserve_cpu_space_); - init_space_ = false; - } - if (temp_init_space_) { - Storage::Get()->Free(temp_cpu_space_); - temp_init_space_ = false; - } - } } void Forward(const OpContext &ctx, const std::vector &in_data, @@ -855,37 +839,30 @@ class RNNOp { #endif // MXNET_USE_CUDNN == 1 && defined(__CUDACC__) if (ctx_.dev_type == kCPU) { + // allocate temp space + const size_t work_cpu_space_size = GetRNNWorkspaceSize(param_.seq_length_, param_.batch_size_, + param_.state_size, direction, param_.mode); + if (!temp_init_space_ || temp_cpu_space_size_ < work_cpu_space_size) { + temp_cpu_space_size_ = work_cpu_space_size; + temp_cpu_space_ = NDArray(TShape({static_cast(temp_cpu_space_size_)}), ctx_, + false, in_data[rnn_enum::kData].type_flag_); + temp_init_space_ = true; + } + DType* work_cpu_space = static_cast(temp_cpu_space_.data().dptr_); + if (ctx.is_train) { - // allocate temp space - const size_t work_cpu_space_size = - GetRNNWorkspaceSize(param_.seq_length_, param_.batch_size_, - param_.state_size, direction, param_.mode); - if (temp_init_space_ && temp_cpu_space_size_ < work_cpu_space_size) { - Storage::Get()->Free(temp_cpu_space_); - temp_init_space_ = false; - } - if (!temp_init_space_) { - temp_cpu_space_ = Storage::Get()->Alloc - (work_cpu_space_size * sizeof(DType), Context::CPU()); - temp_cpu_space_size_ = work_cpu_space_size; - temp_init_space_ = true; - } - DType* work_cpu_space = static_cast(temp_cpu_space_.dptr); + // allocate reserve space const size_t r_size = GetRNNReserveSpaceSize(param_.num_layers, direction, param_.seq_length_, param_.batch_size_, param_.state_size, param_.mode); - if (init_space_ && reserve_cpu_space_size_ < r_size) { - Storage::Get()->Free(reserve_cpu_space_); - init_space_ = false; - } - if (!init_space_) { - reserve_cpu_space_ = Storage::Get()->Alloc(r_size * sizeof(DType), Context::CPU()); + if (!init_space_ || reserve_cpu_space_size_ < r_size) { reserve_cpu_space_size_ = r_size; + reserve_cpu_space_ = NDArray(TShape({static_cast(reserve_cpu_space_size_)}), ctx_, + false, in_data[rnn_enum::kData].type_flag_); init_space_ = true; } - - DType* reserve_space_ptr = static_cast(reserve_cpu_space_.dptr); + DType* reserve_space_ptr = static_cast(reserve_cpu_space_.data().dptr_); RNNForwardTraining(work_cpu_space, reserve_space_ptr, @@ -945,20 +922,6 @@ class RNNOp { #endif // MXNET_USE_MKLDNN == 1 // Before integrating MKLDNN GRU fp32 inference // using below code for keep func being OK - const size_t work_cpu_space_size = - GetRNNWorkspaceSize(param_.seq_length_, param_.batch_size_, - param_.state_size, direction, param_.mode); - if (temp_init_space_ && temp_cpu_space_size_ < work_cpu_space_size) { - Storage::Get()->Free(temp_cpu_space_); - temp_init_space_ = false; - } - if (!temp_init_space_) { - temp_cpu_space_ = Storage::Get()->Alloc - (work_cpu_space_size * sizeof(DType), Context::CPU()); - temp_cpu_space_size_ = work_cpu_space_size; - temp_init_space_ = true; - } - DType* work_cpu_space = static_cast(temp_cpu_space_.dptr); RNNForwardInference(work_cpu_space, param_.state_outputs, param_.num_layers, @@ -1171,7 +1134,7 @@ class RNNOp { if (!temp_init_space_ || temp_cpu_space_size_ != work_cpu_space_size) { LOG(FATAL) << "Check temp init error"; } - DType* work_cpu_space = static_cast(temp_cpu_space_.dptr); + DType* work_cpu_space = static_cast(temp_cpu_space_.data().dptr_); size_t r_size = GetRNNReserveSpaceSize(param_.num_layers, direction, param_.seq_length_, param_.batch_size_, param_.state_size, param_.mode); @@ -1180,7 +1143,7 @@ class RNNOp { LOG(FATAL) << "Check forward init error"; } - DType* reserve_space_ptr = static_cast(reserve_cpu_space_.dptr); + DType* reserve_space_ptr = static_cast(reserve_cpu_space_.data().dptr_); RNNBackward(work_cpu_space, reserve_space_ptr, param_.num_layers, @@ -1551,7 +1514,7 @@ class RNNOp { #endif // MXNET_USE_CUDNN bool init_space_, temp_init_space_; size_t reserve_cpu_space_size_, temp_cpu_space_size_; - Storage::Handle reserve_cpu_space_, temp_cpu_space_; + NDArray reserve_cpu_space_, temp_cpu_space_; }; // class RNNOp static OpStatePtr CreateRNNState(const nnvm::NodeAttrs &attrs, diff --git a/src/operator/rnn.cc b/src/operator/rnn.cc index 86fb1c7d1ec6..fd016d6819b9 100644 --- a/src/operator/rnn.cc +++ b/src/operator/rnn.cc @@ -270,22 +270,24 @@ static void RNNStatefulComputeCPU(const OpStatePtr& state_ptr, const size_t r_size = GetMKLDNNRNNCacheMemorySize(L, D, T, N, I, H, param.mode); if (op.init_mem_ && op.reserve_mem_size_ < r_size) { - Storage::Get()->Free(op.mem_space_); op.init_mem_ = false; } + const size_t weights_version = inputs[rnn_enum::kParams].version(); if (!op.init_mem_) { - op.mem_space_ = Storage::Get()->Alloc( - r_size * sizeof(DType), - Context::CPU()); + op.mem_space_ = NDArray(TShape({static_cast(r_size)}), op.ctx_, false, dtype); op.reserve_mem_size_ = r_size; op.init_mem_ = true; op.has_cache = false; + // Assign weights_version + op.weights_version = weights_version; } - if (op.has_cache && op.x_memory.size() == 0) { + // Check if NDArray was changed. + if (op.weights_version != weights_version) { op.has_cache = false; + op.weights_version = weights_version; } - DType* workptr = static_cast(op.mem_space_.dptr); + DType* workptr = static_cast(op.mem_space_.data().dptr_); mkldnn::memory::dims src_layer_tz_0 = {T, N, I}; mkldnn::memory::dims src_layer_tz = {T, N, D * H}; mkldnn::memory::dims dst_layer_tz = {T, N, D * H}; diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index f0e16b2729a2..bcf618f2784a 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -75,6 +75,45 @@ def check_rnn_consistency(cell1, cell2, T, N, I, H, grad_req, rtol=1e-2, atol=1e assert(mod2.get_input_grads()[0] == None) +@with_seed() +@assert_raises_cudnn_not_satisfied(min_version='5.1.10') +def test_rnn_with_new_param(): + rnn_modes = ['rnn_relu', 'rnn_tanh', 'gru', 'lstm'] + ngates_ = [1, 1, 3, 4] + num_layers, input_size, seq_len, batch_size, state_size = 3, 128, 5, 64, 8 + for bidirectional in [False, True]: + directions = 2 if bidirectional else 1 + for mode, ngates in zip(rnn_modes, ngates_): + first_layer_size = (input_size * state_size + state_size * state_size + state_size * 2) * ngates + rest_layer_size = (state_size * directions * state_size + state_size * state_size + state_size * 2) \ + * ngates * (num_layers - 1) + param_size = (first_layer_size + rest_layer_size) * directions + sym = mx.sym.RNN(mode=mode, num_layers=num_layers, bidirectional=bidirectional, + state_outputs=False, state_size=state_size, name='rnn') + + bind_dict = { + 'rnn_data': mx.ndarray.random.uniform(low=-1, high=1, shape=(seq_len, batch_size, input_size)), + 'rnn_parameters': mx.ndarray.random.uniform(low=-1, high=1, shape=(param_size)), + 'rnn_state': mx.ndarray.zeros(shape=(num_layers * directions, batch_size, state_size)) + } + if mode == 'lstm': + bind_dict['rnn_state_cell'] = mx.ndarray.zeros( + shape=(num_layers * directions, batch_size, state_size)) + + ex = sym.bind(default_context(), bind_dict) + ex.forward(is_train=True) + ex01 = ex.output_dict['rnn_output'].asnumpy() + ex.forward(is_train=False) + ex02 = ex.output_dict['rnn_output'].asnumpy() + assert_allclose(ex01, ex02, rtol=1e-2, atol=1e-4) + bind_dict['rnn_parameters'] = mx.ndarray.random.uniform(low=-1, high=1, shape=(param_size)) + ex.copy_params_from(bind_dict) + ex.forward(is_train=True) + ex03 = ex.output_dict['rnn_output'].asnumpy() + ex.forward(is_train=False) + ex04 = ex.output_dict['rnn_output'].asnumpy() + assert_allclose(ex03, ex04, rtol=1e-2, atol=1e-4) + @with_seed() @assert_raises_cudnn_not_satisfied(min_version='5.1.10')