Skip to content

Commit

Permalink
MKL-DNN RNN checks NDArray version (apache#16071)
Browse files Browse the repository at this point in the history
* MKL-DNN RNN checks NDArray version

* Add UT

* Use default_context()
  • Loading branch information
zixuanweeei authored and gyshi committed Sep 7, 2019
1 parent ad1d278 commit bbe9148
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 64 deletions.
79 changes: 21 additions & 58 deletions src/operator/rnn-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -409,10 +409,11 @@ class RNNOp {
std::vector<mkldnn::memory> bias_memory;
std::vector<mkldnn::memory> y_memory;
std::vector<mkldnn::memory> hcy_memory;
size_t weights_version;
bool has_cache;
bool init_mem_;
size_t reserve_mem_size_;
Storage::Handle mem_space_;
NDArray mem_space_;
#endif
explicit RNNOp(RNNParam param, Context ctx) {
this->param_ = param;
Expand Down Expand Up @@ -522,12 +523,6 @@ class RNNOp {
}

~RNNOp() {
#if MXNET_USE_MKLDNN == 1
if (init_mem_) {
Storage::Get()->Free(mem_space_);
init_mem_ = false;
}
#endif // MXNET_USE_MKLDNN
#if MXNET_USE_CUDNN == 1
CUDNN_CALL(cudnnDestroyTensorDescriptor(hx_desc_));
CUDNN_CALL(cudnnDestroyTensorDescriptor(cx_desc_));
Expand Down Expand Up @@ -560,17 +555,6 @@ class RNNOp {
CUDNN_CALL(cudnnDestroyRNNDataDescriptor(dy_data_desc_));
#endif // MXNET_USE_CUDNN_GE_7200
#endif // MXNET_USE_CUDNN

if (ctx_.dev_type == kCPU) {
if (init_space_) {
Storage::Get()->Free(reserve_cpu_space_);
init_space_ = false;
}
if (temp_init_space_) {
Storage::Get()->Free(temp_cpu_space_);
temp_init_space_ = false;
}
}
}

void Forward(const OpContext &ctx, const std::vector<TBlob> &in_data,
Expand Down Expand Up @@ -855,37 +839,30 @@ class RNNOp {
#endif // MXNET_USE_CUDNN == 1 && defined(__CUDACC__)

if (ctx_.dev_type == kCPU) {
// allocate temp space
const size_t work_cpu_space_size = GetRNNWorkspaceSize(param_.seq_length_, param_.batch_size_,
param_.state_size, direction, param_.mode);
if (!temp_init_space_ || temp_cpu_space_size_ < work_cpu_space_size) {
temp_cpu_space_size_ = work_cpu_space_size;
temp_cpu_space_ = NDArray(TShape({static_cast<dim_t>(temp_cpu_space_size_)}), ctx_,
false, in_data[rnn_enum::kData].type_flag_);
temp_init_space_ = true;
}
DType* work_cpu_space = static_cast<DType*>(temp_cpu_space_.data().dptr_);

if (ctx.is_train) {
// allocate temp space
const size_t work_cpu_space_size =
GetRNNWorkspaceSize(param_.seq_length_, param_.batch_size_,
param_.state_size, direction, param_.mode);
if (temp_init_space_ && temp_cpu_space_size_ < work_cpu_space_size) {
Storage::Get()->Free(temp_cpu_space_);
temp_init_space_ = false;
}
if (!temp_init_space_) {
temp_cpu_space_ = Storage::Get()->Alloc
(work_cpu_space_size * sizeof(DType), Context::CPU());
temp_cpu_space_size_ = work_cpu_space_size;
temp_init_space_ = true;
}
DType* work_cpu_space = static_cast<DType*>(temp_cpu_space_.dptr);
// allocate reserve space

const size_t r_size = GetRNNReserveSpaceSize(param_.num_layers, direction,
param_.seq_length_, param_.batch_size_,
param_.state_size, param_.mode);
if (init_space_ && reserve_cpu_space_size_ < r_size) {
Storage::Get()->Free(reserve_cpu_space_);
init_space_ = false;
}
if (!init_space_) {
reserve_cpu_space_ = Storage::Get()->Alloc(r_size * sizeof(DType), Context::CPU());
if (!init_space_ || reserve_cpu_space_size_ < r_size) {
reserve_cpu_space_size_ = r_size;
reserve_cpu_space_ = NDArray(TShape({static_cast<dim_t>(reserve_cpu_space_size_)}), ctx_,
false, in_data[rnn_enum::kData].type_flag_);
init_space_ = true;
}

DType* reserve_space_ptr = static_cast<DType*>(reserve_cpu_space_.dptr);
DType* reserve_space_ptr = static_cast<DType*>(reserve_cpu_space_.data().dptr_);

RNNForwardTraining<DType>(work_cpu_space,
reserve_space_ptr,
Expand Down Expand Up @@ -945,20 +922,6 @@ class RNNOp {
#endif // MXNET_USE_MKLDNN == 1
// Before integrating MKLDNN GRU fp32 inference
// using below code for keep func being OK
const size_t work_cpu_space_size =
GetRNNWorkspaceSize(param_.seq_length_, param_.batch_size_,
param_.state_size, direction, param_.mode);
if (temp_init_space_ && temp_cpu_space_size_ < work_cpu_space_size) {
Storage::Get()->Free(temp_cpu_space_);
temp_init_space_ = false;
}
if (!temp_init_space_) {
temp_cpu_space_ = Storage::Get()->Alloc
(work_cpu_space_size * sizeof(DType), Context::CPU());
temp_cpu_space_size_ = work_cpu_space_size;
temp_init_space_ = true;
}
DType* work_cpu_space = static_cast<DType*>(temp_cpu_space_.dptr);
RNNForwardInference<DType>(work_cpu_space,
param_.state_outputs,
param_.num_layers,
Expand Down Expand Up @@ -1171,7 +1134,7 @@ class RNNOp {
if (!temp_init_space_ || temp_cpu_space_size_ != work_cpu_space_size) {
LOG(FATAL) << "Check temp init error";
}
DType* work_cpu_space = static_cast<DType*>(temp_cpu_space_.dptr);
DType* work_cpu_space = static_cast<DType*>(temp_cpu_space_.data().dptr_);
size_t r_size = GetRNNReserveSpaceSize(param_.num_layers, direction,
param_.seq_length_, param_.batch_size_,
param_.state_size, param_.mode);
Expand All @@ -1180,7 +1143,7 @@ class RNNOp {
LOG(FATAL) << "Check forward init error";
}

DType* reserve_space_ptr = static_cast<DType*>(reserve_cpu_space_.dptr);
DType* reserve_space_ptr = static_cast<DType*>(reserve_cpu_space_.data().dptr_);
RNNBackward<DType>(work_cpu_space,
reserve_space_ptr,
param_.num_layers,
Expand Down Expand Up @@ -1551,7 +1514,7 @@ class RNNOp {
#endif // MXNET_USE_CUDNN
bool init_space_, temp_init_space_;
size_t reserve_cpu_space_size_, temp_cpu_space_size_;
Storage::Handle reserve_cpu_space_, temp_cpu_space_;
NDArray reserve_cpu_space_, temp_cpu_space_;
}; // class RNNOp

static OpStatePtr CreateRNNState(const nnvm::NodeAttrs &attrs,
Expand Down
14 changes: 8 additions & 6 deletions src/operator/rnn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -270,22 +270,24 @@ static void RNNStatefulComputeCPU(const OpStatePtr& state_ptr,

const size_t r_size = GetMKLDNNRNNCacheMemorySize(L, D, T, N, I, H, param.mode);
if (op.init_mem_ && op.reserve_mem_size_ < r_size) {
Storage::Get()->Free(op.mem_space_);
op.init_mem_ = false;
}
const size_t weights_version = inputs[rnn_enum::kParams].version();
if (!op.init_mem_) {
op.mem_space_ = Storage::Get()->Alloc(
r_size * sizeof(DType),
Context::CPU());
op.mem_space_ = NDArray(TShape({static_cast<dim_t>(r_size)}), op.ctx_, false, dtype);
op.reserve_mem_size_ = r_size;
op.init_mem_ = true;
op.has_cache = false;
// Assign weights_version
op.weights_version = weights_version;
}
if (op.has_cache && op.x_memory.size() == 0) {
// Check if NDArray was changed.
if (op.weights_version != weights_version) {
op.has_cache = false;
op.weights_version = weights_version;
}

DType* workptr = static_cast<DType*>(op.mem_space_.dptr);
DType* workptr = static_cast<DType*>(op.mem_space_.data().dptr_);
mkldnn::memory::dims src_layer_tz_0 = {T, N, I};
mkldnn::memory::dims src_layer_tz = {T, N, D * H};
mkldnn::memory::dims dst_layer_tz = {T, N, D * H};
Expand Down
39 changes: 39 additions & 0 deletions tests/python/unittest/test_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,45 @@ def check_rnn_consistency(cell1, cell2, T, N, I, H, grad_req, rtol=1e-2, atol=1e
assert(mod2.get_input_grads()[0] == None)


@with_seed()
@assert_raises_cudnn_not_satisfied(min_version='5.1.10')
def test_rnn_with_new_param():
rnn_modes = ['rnn_relu', 'rnn_tanh', 'gru', 'lstm']
ngates_ = [1, 1, 3, 4]
num_layers, input_size, seq_len, batch_size, state_size = 3, 128, 5, 64, 8
for bidirectional in [False, True]:
directions = 2 if bidirectional else 1
for mode, ngates in zip(rnn_modes, ngates_):
first_layer_size = (input_size * state_size + state_size * state_size + state_size * 2) * ngates
rest_layer_size = (state_size * directions * state_size + state_size * state_size + state_size * 2) \
* ngates * (num_layers - 1)
param_size = (first_layer_size + rest_layer_size) * directions
sym = mx.sym.RNN(mode=mode, num_layers=num_layers, bidirectional=bidirectional,
state_outputs=False, state_size=state_size, name='rnn')

bind_dict = {
'rnn_data': mx.ndarray.random.uniform(low=-1, high=1, shape=(seq_len, batch_size, input_size)),
'rnn_parameters': mx.ndarray.random.uniform(low=-1, high=1, shape=(param_size)),
'rnn_state': mx.ndarray.zeros(shape=(num_layers * directions, batch_size, state_size))
}
if mode == 'lstm':
bind_dict['rnn_state_cell'] = mx.ndarray.zeros(
shape=(num_layers * directions, batch_size, state_size))

ex = sym.bind(default_context(), bind_dict)
ex.forward(is_train=True)
ex01 = ex.output_dict['rnn_output'].asnumpy()
ex.forward(is_train=False)
ex02 = ex.output_dict['rnn_output'].asnumpy()
assert_allclose(ex01, ex02, rtol=1e-2, atol=1e-4)
bind_dict['rnn_parameters'] = mx.ndarray.random.uniform(low=-1, high=1, shape=(param_size))
ex.copy_params_from(bind_dict)
ex.forward(is_train=True)
ex03 = ex.output_dict['rnn_output'].asnumpy()
ex.forward(is_train=False)
ex04 = ex.output_dict['rnn_output'].asnumpy()
assert_allclose(ex03, ex04, rtol=1e-2, atol=1e-4)


@with_seed()
@assert_raises_cudnn_not_satisfied(min_version='5.1.10')
Expand Down

0 comments on commit bbe9148

Please sign in to comment.