Skip to content

Commit

Permalink
[BYOC] Fix incorrect conv2d padding handling of `dnnl with c source r…
Browse files Browse the repository at this point in the history
…untime` (apache#9097)

Co-authored-by: sunway <[email protected]>
  • Loading branch information
2 people authored and ylc committed Sep 29, 2021
1 parent 715a6d7 commit c5db43d
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 25 deletions.
4 changes: 3 additions & 1 deletion src/relay/backend/contrib/dnnl/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,13 @@ std::vector<std::string> Conv2d(const CallNode* call) {
args.push_back(std::to_string(s));
}

// Args: O, G, Ph, Pw, Kh, Kw, Sh, Sw
// Args: O, G, Ph0, Pw0, Ph1, Pw1, Kh, Kw, Sh, Sw
args.push_back(std::to_string(wshape[0]));
args.push_back(std::to_string(conv2d_attr->groups));
args.push_back(std::to_string(conv2d_attr->padding[0].as<IntImmNode>()->value));
args.push_back(std::to_string(conv2d_attr->padding[1].as<IntImmNode>()->value));
args.push_back(std::to_string(conv2d_attr->padding[2].as<IntImmNode>()->value));
args.push_back(std::to_string(conv2d_attr->padding[3].as<IntImmNode>()->value));
args.push_back(std::to_string(wshape[2]));
args.push_back(std::to_string(wshape[3]));
args.push_back(std::to_string(conv2d_attr->strides[0].as<IntImmNode>()->value));
Expand Down
37 changes: 21 additions & 16 deletions src/runtime/contrib/dnnl/dnnl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ inline void read_from_dnnl_memory(void* handle, const memory& mem) {
}

void dnnl_conv2d_common(float* data, float* weights, float* bias, float* out, int p_N_, int p_C_,
int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph_, int p_Pw_, int p_Kh_,
int p_Kw_, int p_Sh_, int p_Sw_, primitive_attr attr) {
int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph0_, int p_Pw0_, int p_Ph1_,
int p_Pw1_, int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_,
primitive_attr attr) {
using tag = memory::format_tag;
using dt = memory::data_type;
engine eng(engine::kind::cpu, 0);
Expand All @@ -64,10 +65,11 @@ void dnnl_conv2d_common(float* data, float* weights, float* bias, float* out, in
memory::dims conv2d_weights_tz = {p_O_, p_C_, p_Kh_, p_Kw_};
if (p_G_ > 1) conv2d_weights_tz = {p_G_, 1, p_C_ / p_G_, p_Kh_, p_Kw_};
memory::dims conv2d_bias_tz = {p_O_};
memory::dims conv2d_dst_tz = {p_N_, p_O_, (p_H_ - p_Kh_ + 2 * p_Ph_ + p_Sh_) / p_Sh_,
(p_W_ - p_Kw_ + 2 * p_Pw_ + p_Sw_) / p_Sw_};
memory::dims conv2d_dst_tz = {p_N_, p_O_, (p_H_ - p_Kh_ + p_Ph0_ + p_Ph1_ + p_Sh_) / p_Sh_,
(p_W_ - p_Kw_ + p_Pw0_ + p_Pw1_ + p_Sw_) / p_Sw_};
memory::dims conv2d_strides = {p_Sh_, p_Sw_};
memory::dims conv2d_padding = {p_Ph_, p_Pw_};
memory::dims conv2d_padding0 = {p_Ph0_, p_Pw0_};
memory::dims conv2d_padding1 = {p_Ph1_, p_Pw1_};

auto user_src_memory = memory({{conv2d_src_tz}, dt::f32, tag::nchw}, eng, data);
auto user_weights_memory =
Expand All @@ -81,7 +83,7 @@ void dnnl_conv2d_common(float* data, float* weights, float* bias, float* out, in

auto conv2d_desc = convolution_forward::desc(
prop_kind::forward_inference, algorithm::convolution_direct, conv2d_src_md, conv2d_weights_md,
conv2d_bias_md, conv2d_dst_md, conv2d_strides, conv2d_padding, conv2d_padding);
conv2d_bias_md, conv2d_dst_md, conv2d_strides, conv2d_padding0, conv2d_padding1);
auto conv2d_prim_desc = convolution_forward::primitive_desc(conv2d_desc, attr, eng);

auto conv2d_src_memory = user_src_memory;
Expand All @@ -98,12 +100,12 @@ void dnnl_conv2d_common(float* data, float* weights, float* bias, float* out, in
}

extern "C" void dnnl_conv2d(float* data, float* weights, float* out, int p_N_, int p_C_, int p_H_,
int p_W_, int p_O_, int p_G_, int p_Ph_, int p_Pw_, int p_Kh_,
int p_Kw_, int p_Sh_, int p_Sw_) {
int p_W_, int p_O_, int p_G_, int p_Ph0_, int p_Pw0_, int p_Ph1_,
int p_Pw1_, int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_) {
primitive_attr attr;
std::vector<float> bias(p_O_, 0);
return dnnl_conv2d_common(data, weights, bias.data(), out, p_N_, p_C_, p_H_, p_W_, p_O_, p_G_,
p_Ph_, p_Pw_, p_Kh_, p_Kw_, p_Sh_, p_Sw_, attr);
p_Ph0_, p_Pw0_, p_Ph1_, p_Pw1_, p_Kh_, p_Kw_, p_Sh_, p_Sw_, attr);
}

primitive_attr create_attr_with_relu_post_op() {
Expand All @@ -117,20 +119,23 @@ primitive_attr create_attr_with_relu_post_op() {
}

extern "C" void dnnl_fused_conv2d_relu(float* data, float* weights, float* out, int p_N_, int p_C_,
int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph_, int p_Pw_,
int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_) {
int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph0_,
int p_Pw0_, int p_Ph1_, int p_Pw1_, int p_Kh_, int p_Kw_,
int p_Sh_, int p_Sw_) {
std::vector<float> bias(p_O_, 0);
return dnnl_conv2d_common(data, weights, bias.data(), out, p_N_, p_C_, p_H_, p_W_, p_O_, p_G_,
p_Ph_, p_Pw_, p_Kh_, p_Kw_, p_Sh_, p_Sw_,
p_Ph0_, p_Pw0_, p_Ph1_, p_Pw1_, p_Kh_, p_Kw_, p_Sh_, p_Sw_,
create_attr_with_relu_post_op());
}

extern "C" void dnnl_fused_conv2d_bias_relu(float* data, float* weights, float* bias, float* out,
int p_N_, int p_C_, int p_H_, int p_W_, int p_O_,
int p_G_, int p_Ph_, int p_Pw_, int p_Kh_, int p_Kw_,
int p_Sh_, int p_Sw_) {
return dnnl_conv2d_common(data, weights, bias, out, p_N_, p_C_, p_H_, p_W_, p_O_, p_G_, p_Ph_,
p_Pw_, p_Kh_, p_Kw_, p_Sh_, p_Sw_, create_attr_with_relu_post_op());
int p_G_, int p_Ph0_, int p_Pw0_, int p_Ph1_,
int p_Pw1_, int p_Kh_, int p_Kw_, int p_Sh_,
int p_Sw_) {
return dnnl_conv2d_common(data, weights, bias, out, p_N_, p_C_, p_H_, p_W_, p_O_, p_G_, p_Ph0_,
p_Pw0_, p_Ph1_, p_Pw1_, p_Kh_, p_Kw_, p_Sh_, p_Sw_,
create_attr_with_relu_post_op());
}

extern "C" void dnnl_dense(float* data, float* weight, float* out, int p_B_, int p_I_, int p_O_) {
Expand Down
15 changes: 8 additions & 7 deletions src/runtime/contrib/dnnl/dnnl_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,20 @@ namespace contrib {
using namespace dnnl;

extern "C" TVM_DLL void dnnl_conv2d(float* data, float* weights, float* out, int p_N_, int p_C_,
int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph_, int p_Pw_,
int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_);
int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph0_, int p_Pw0_,
int p_Ph1_, int p_Pw1_, int p_Kh_, int p_Kw_, int p_Sh_,
int p_Sw_);

extern "C" TVM_DLL void dnnl_fused_conv2d_relu(float* data, float* weights, float* out, int p_N_,
int p_C_, int p_H_, int p_W_, int p_O_, int p_G_,
int p_Ph_, int p_Pw_, int p_Kh_, int p_Kw_,
int p_Sh_, int p_Sw_);
int p_Ph0_, int p_Pw0_, int p_Ph1_, int p_Pw1_,
int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_);

extern "C" TVM_DLL void dnnl_fused_conv2d_bias_relu(float* data, float* weights, float* bias,
float* out, int p_N_, int p_C_, int p_H_,
int p_W_, int p_O_, int p_G_, int p_Ph_,
int p_Pw_, int p_Kh_, int p_Kw_, int p_Sh_,
int p_Sw_);
int p_W_, int p_O_, int p_G_, int p_Ph0_,
int p_Pw0_, int p_Ph1_, int p_Pw1_, int p_Kh_,
int p_Kw_, int p_Sh_, int p_Sw_);

extern "C" TVM_DLL void dnnl_dense(float* data, float* weight, float* out, int p_B_, int p_I_,
int p_O_);
Expand Down
33 changes: 33 additions & 0 deletions tests/python/relay/test_external_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,39 @@ def constant_updater(expr, symbol):
tvm._ffi.registry.remove_global_func("relay.ext.ccompiler.constant_updater")


@pytest.mark.skipif(
not tvm.get_global_func("relay.ext.dnnl", True),
reason="skip because DNNL codegen is not available",
)
@parametrize_external_json_codegen_checks
def test_extern_dnnl_padding(check_result):
dtype = "float32"
ishape = (1, 1, 99, 12)
w1shape = (54, 1, 3, 3)
data0 = relay.var("data0", shape=(ishape), dtype=dtype)
weight0 = relay.var("weight0", shape=(w1shape), dtype=dtype)
out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), strides=(2, 2), padding=(1, 0, 1, 1))
f = relay.Function([data0, weight0], out)
ref_mod = tvm.IRModule()
ref_mod["main"] = f

data1 = relay.var("data0", shape=(ishape), dtype=dtype)
weight1 = relay.var("weight0", shape=(w1shape), dtype=dtype)
f = set_external_func_attr(f, "dnnl", "dnnl_0")
call = relay.Call(f, [data1, weight1])
mod = tvm.IRModule.from_expr(call)

i_data = np.random.uniform(0, 1, ishape).astype(dtype)
w_data = np.random.uniform(0, 1, w1shape).astype(dtype)

ref_res = relay.create_executor("graph", mod=ref_mod, device=tvm.cpu()).evaluate()(
i_data, w_data
)
check_result(
mod, {"data0": i_data, "weight0": w_data}, (1, 54, 50, 6), ref_res.numpy(), tol=1e-5
)


@pytest.mark.skipif(
not tvm.get_global_func("relay.ext.dnnl", True),
reason="skip because DNNL codegen is not available",
Expand Down
2 changes: 1 addition & 1 deletion tests/python/relay/utils/external_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def parametrize_external_json_codegen_checks(test):

def update_lib(lib):
test_dir = os.path.dirname(os.path.realpath(os.path.expanduser(__file__)))
source_dir = os.path.join(test_dir, "..", "..", "..")
source_dir = os.path.join(test_dir, "..", "..", "..", "..")
contrib_path = os.path.join(source_dir, "src", "runtime", "contrib")

kwargs = {}
Expand Down

0 comments on commit c5db43d

Please sign in to comment.