[BYOC] Fix incorrect conv2d padding handling of `dnnl with c source r…

…untime` (apache#9097) Co-authored-by: sunway <[email protected]>
ylc · Sep 29, 2021 · c5db43d · c5db43d
1 parent 715a6d7
commit c5db43d
Show file tree

Hide file tree

Showing 5 changed files with 66 additions and 25 deletions.
diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -67,11 +67,13 @@ std::vector<std::string> Conv2d(const CallNode* call) {
     args.push_back(std::to_string(s));
   }
 
-  // Args: O, G, Ph, Pw, Kh, Kw, Sh, Sw
+  // Args: O, G, Ph0, Pw0, Ph1, Pw1, Kh, Kw, Sh, Sw
   args.push_back(std::to_string(wshape[0]));
   args.push_back(std::to_string(conv2d_attr->groups));
   args.push_back(std::to_string(conv2d_attr->padding[0].as<IntImmNode>()->value));
   args.push_back(std::to_string(conv2d_attr->padding[1].as<IntImmNode>()->value));
+  args.push_back(std::to_string(conv2d_attr->padding[2].as<IntImmNode>()->value));
+  args.push_back(std::to_string(conv2d_attr->padding[3].as<IntImmNode>()->value));
   args.push_back(std::to_string(wshape[2]));
   args.push_back(std::to_string(wshape[3]));
   args.push_back(std::to_string(conv2d_attr->strides[0].as<IntImmNode>()->value));

diff --git a/src/runtime/contrib/dnnl/dnnl.cc b/src/runtime/contrib/dnnl/dnnl.cc
@@ -53,8 +53,9 @@ inline void read_from_dnnl_memory(void* handle, const memory& mem) {
 }
 
 void dnnl_conv2d_common(float* data, float* weights, float* bias, float* out, int p_N_, int p_C_,
-                        int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph_, int p_Pw_, int p_Kh_,
-                        int p_Kw_, int p_Sh_, int p_Sw_, primitive_attr attr) {
+                        int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph0_, int p_Pw0_, int p_Ph1_,
+                        int p_Pw1_, int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_,
+                        primitive_attr attr) {
   using tag = memory::format_tag;
   using dt = memory::data_type;
   engine eng(engine::kind::cpu, 0);
@@ -64,10 +65,11 @@ void dnnl_conv2d_common(float* data, float* weights, float* bias, float* out, in
   memory::dims conv2d_weights_tz = {p_O_, p_C_, p_Kh_, p_Kw_};
   if (p_G_ > 1) conv2d_weights_tz = {p_G_, 1, p_C_ / p_G_, p_Kh_, p_Kw_};
   memory::dims conv2d_bias_tz = {p_O_};
-  memory::dims conv2d_dst_tz = {p_N_, p_O_, (p_H_ - p_Kh_ + 2 * p_Ph_ + p_Sh_) / p_Sh_,
-                                (p_W_ - p_Kw_ + 2 * p_Pw_ + p_Sw_) / p_Sw_};
+  memory::dims conv2d_dst_tz = {p_N_, p_O_, (p_H_ - p_Kh_ + p_Ph0_ + p_Ph1_ + p_Sh_) / p_Sh_,
+                                (p_W_ - p_Kw_ + p_Pw0_ + p_Pw1_ + p_Sw_) / p_Sw_};
   memory::dims conv2d_strides = {p_Sh_, p_Sw_};
-  memory::dims conv2d_padding = {p_Ph_, p_Pw_};
+  memory::dims conv2d_padding0 = {p_Ph0_, p_Pw0_};
+  memory::dims conv2d_padding1 = {p_Ph1_, p_Pw1_};
 
   auto user_src_memory = memory({{conv2d_src_tz}, dt::f32, tag::nchw}, eng, data);
   auto user_weights_memory =
@@ -81,7 +83,7 @@ void dnnl_conv2d_common(float* data, float* weights, float* bias, float* out, in
 
   auto conv2d_desc = convolution_forward::desc(
       prop_kind::forward_inference, algorithm::convolution_direct, conv2d_src_md, conv2d_weights_md,
-      conv2d_bias_md, conv2d_dst_md, conv2d_strides, conv2d_padding, conv2d_padding);
+      conv2d_bias_md, conv2d_dst_md, conv2d_strides, conv2d_padding0, conv2d_padding1);
   auto conv2d_prim_desc = convolution_forward::primitive_desc(conv2d_desc, attr, eng);
 
   auto conv2d_src_memory = user_src_memory;
@@ -98,12 +100,12 @@ void dnnl_conv2d_common(float* data, float* weights, float* bias, float* out, in
 }
 
 extern "C" void dnnl_conv2d(float* data, float* weights, float* out, int p_N_, int p_C_, int p_H_,
-                            int p_W_, int p_O_, int p_G_, int p_Ph_, int p_Pw_, int p_Kh_,
-                            int p_Kw_, int p_Sh_, int p_Sw_) {
+                            int p_W_, int p_O_, int p_G_, int p_Ph0_, int p_Pw0_, int p_Ph1_,
+                            int p_Pw1_, int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_) {
   primitive_attr attr;
   std::vector<float> bias(p_O_, 0);
   return dnnl_conv2d_common(data, weights, bias.data(), out, p_N_, p_C_, p_H_, p_W_, p_O_, p_G_,
-                            p_Ph_, p_Pw_, p_Kh_, p_Kw_, p_Sh_, p_Sw_, attr);
+                            p_Ph0_, p_Pw0_, p_Ph1_, p_Pw1_, p_Kh_, p_Kw_, p_Sh_, p_Sw_, attr);
 }
 
 primitive_attr create_attr_with_relu_post_op() {
@@ -117,20 +119,23 @@ primitive_attr create_attr_with_relu_post_op() {
 }
 
 extern "C" void dnnl_fused_conv2d_relu(float* data, float* weights, float* out, int p_N_, int p_C_,
-                                       int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph_, int p_Pw_,
-                                       int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_) {
+                                       int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph0_,
+                                       int p_Pw0_, int p_Ph1_, int p_Pw1_, int p_Kh_, int p_Kw_,
+                                       int p_Sh_, int p_Sw_) {
   std::vector<float> bias(p_O_, 0);
   return dnnl_conv2d_common(data, weights, bias.data(), out, p_N_, p_C_, p_H_, p_W_, p_O_, p_G_,
-                            p_Ph_, p_Pw_, p_Kh_, p_Kw_, p_Sh_, p_Sw_,
+                            p_Ph0_, p_Pw0_, p_Ph1_, p_Pw1_, p_Kh_, p_Kw_, p_Sh_, p_Sw_,
                             create_attr_with_relu_post_op());
 }
 
 extern "C" void dnnl_fused_conv2d_bias_relu(float* data, float* weights, float* bias, float* out,
                                             int p_N_, int p_C_, int p_H_, int p_W_, int p_O_,
-                                            int p_G_, int p_Ph_, int p_Pw_, int p_Kh_, int p_Kw_,
-                                            int p_Sh_, int p_Sw_) {
-  return dnnl_conv2d_common(data, weights, bias, out, p_N_, p_C_, p_H_, p_W_, p_O_, p_G_, p_Ph_,
-                            p_Pw_, p_Kh_, p_Kw_, p_Sh_, p_Sw_, create_attr_with_relu_post_op());
+                                            int p_G_, int p_Ph0_, int p_Pw0_, int p_Ph1_,
+                                            int p_Pw1_, int p_Kh_, int p_Kw_, int p_Sh_,
+                                            int p_Sw_) {
+  return dnnl_conv2d_common(data, weights, bias, out, p_N_, p_C_, p_H_, p_W_, p_O_, p_G_, p_Ph0_,
+                            p_Pw0_, p_Ph1_, p_Pw1_, p_Kh_, p_Kw_, p_Sh_, p_Sw_,
+                            create_attr_with_relu_post_op());
 }
 
 extern "C" void dnnl_dense(float* data, float* weight, float* out, int p_B_, int p_I_, int p_O_) {

diff --git a/src/runtime/contrib/dnnl/dnnl_kernel.h b/src/runtime/contrib/dnnl/dnnl_kernel.h
@@ -36,19 +36,20 @@ namespace contrib {
 using namespace dnnl;
 
 extern "C" TVM_DLL void dnnl_conv2d(float* data, float* weights, float* out, int p_N_, int p_C_,
-                                    int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph_, int p_Pw_,
-                                    int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_);
+                                    int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph0_, int p_Pw0_,
+                                    int p_Ph1_, int p_Pw1_, int p_Kh_, int p_Kw_, int p_Sh_,
+                                    int p_Sw_);
 
 extern "C" TVM_DLL void dnnl_fused_conv2d_relu(float* data, float* weights, float* out, int p_N_,
                                                int p_C_, int p_H_, int p_W_, int p_O_, int p_G_,
-                                               int p_Ph_, int p_Pw_, int p_Kh_, int p_Kw_,
-                                               int p_Sh_, int p_Sw_);
+                                               int p_Ph0_, int p_Pw0_, int p_Ph1_, int p_Pw1_,
+                                               int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_);
 
 extern "C" TVM_DLL void dnnl_fused_conv2d_bias_relu(float* data, float* weights, float* bias,
                                                     float* out, int p_N_, int p_C_, int p_H_,
-                                                    int p_W_, int p_O_, int p_G_, int p_Ph_,
-                                                    int p_Pw_, int p_Kh_, int p_Kw_, int p_Sh_,
-                                                    int p_Sw_);
+                                                    int p_W_, int p_O_, int p_G_, int p_Ph0_,
+                                                    int p_Pw0_, int p_Ph1_, int p_Pw1_, int p_Kh_,
+                                                    int p_Kw_, int p_Sh_, int p_Sw_);
 
 extern "C" TVM_DLL void dnnl_dense(float* data, float* weight, float* out, int p_B_, int p_I_,
                                    int p_O_);

diff --git a/tests/python/relay/test_external_codegen.py b/tests/python/relay/test_external_codegen.py
@@ -213,6 +213,39 @@ def constant_updater(expr, symbol):
     tvm._ffi.registry.remove_global_func("relay.ext.ccompiler.constant_updater")
 
 
+@pytest.mark.skipif(
+    not tvm.get_global_func("relay.ext.dnnl", True),
+    reason="skip because DNNL codegen is not available",
+)
+@parametrize_external_json_codegen_checks
+def test_extern_dnnl_padding(check_result):
+    dtype = "float32"
+    ishape = (1, 1, 99, 12)
+    w1shape = (54, 1, 3, 3)
+    data0 = relay.var("data0", shape=(ishape), dtype=dtype)
+    weight0 = relay.var("weight0", shape=(w1shape), dtype=dtype)
+    out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), strides=(2, 2), padding=(1, 0, 1, 1))
+    f = relay.Function([data0, weight0], out)
+    ref_mod = tvm.IRModule()
+    ref_mod["main"] = f
+
+    data1 = relay.var("data0", shape=(ishape), dtype=dtype)
+    weight1 = relay.var("weight0", shape=(w1shape), dtype=dtype)
+    f = set_external_func_attr(f, "dnnl", "dnnl_0")
+    call = relay.Call(f, [data1, weight1])
+    mod = tvm.IRModule.from_expr(call)
+
+    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
+    w_data = np.random.uniform(0, 1, w1shape).astype(dtype)
+
+    ref_res = relay.create_executor("graph", mod=ref_mod, device=tvm.cpu()).evaluate()(
+        i_data, w_data
+    )
+    check_result(
+        mod, {"data0": i_data, "weight0": w_data}, (1, 54, 50, 6), ref_res.numpy(), tol=1e-5
+    )
+
+
 @pytest.mark.skipif(
     not tvm.get_global_func("relay.ext.dnnl", True),
     reason="skip because DNNL codegen is not available",

diff --git a/tests/python/relay/utils/external_codegen.py b/tests/python/relay/utils/external_codegen.py
@@ -59,7 +59,7 @@ def parametrize_external_json_codegen_checks(test):
 
 def update_lib(lib):
     test_dir = os.path.dirname(os.path.realpath(os.path.expanduser(__file__)))
-    source_dir = os.path.join(test_dir, "..", "..", "..")
+    source_dir = os.path.join(test_dir, "..", "..", "..", "..")
     contrib_path = os.path.join(source_dir, "src", "runtime", "contrib")
 
     kwargs = {}