benchdnn: remove gpu2cpu reorder kind from service reorders

This is done in favor of separate validation of cross-engine reorders and to stop hitting issues when gpu2cpu internal scratchpad occupies the most device space resulting in OOM.
oneapi-src · Dec 14, 2023 · 84a8f57 · 84a8f57
1 parent 16720ea
commit 84a8f57
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 19 deletions.
diff --git a/tests/benchdnn/dnnl_common.cpp b/tests/benchdnn/dnnl_common.cpp
@@ -1081,17 +1081,7 @@ void add_md_size(const_dnnl_memory_desc_t md,
         // Correctness pass allocates additional tag::abx f32 memory.
         const bool compare_mem_factor = !check_mem_size_args.want_input
                 && check_mem_size_args.add_ref_size;
-        const auto compare_mem_size = compare_mem_factor * ref_md_size;
-        check_mem_size_args.total_size_cpu += compare_mem_size;
-        // In the `compare_norm(...)` method a service reorder introduced by
-        // `dnn_mem_t got_f32(...)` requires additional memory that should be
-        // taken into account ahead of time. For GPU engine a gpu2cpu reorder
-        // will be called which allocates additional internal scratchpad most of
-        // times to handle reorder, and then perform device2host copy.
-        // Scratchpad is a full copy of f32 destination memory allocated on GPU.
-        if (is_gpu() && compare_mem_size > 0) {
-            check_mem_size_args.sizes.push_back(compare_mem_size);
-        }
+        check_mem_size_args.total_size_cpu += compare_mem_factor * ref_md_size;
     }
 }
 

diff --git a/tests/benchdnn/dnnl_memory.cpp b/tests/benchdnn/dnnl_memory.cpp
@@ -116,18 +116,11 @@ int execute_reorder(const dnn_mem_t &src, dnn_mem_t &dst,
     // succeeded, then create CPU memory object wrapping mapped pointers of
     // source and destination and execute CPU reorder. If CPU reorder can't be
     // create, then just execute a regular GPU reorder.
-    //
-    // This optimization is skipped when testing reorder, sum and concat
-    // primitives because they are used specifically to test GPU reorders.
 #if ((DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL) \
         || (DNNL_GPU_RUNTIME == DNNL_RUNTIME_SYCL)) \
         && DNNL_CPU_RUNTIME != DNNL_RUNTIME_NONE
-    bool is_reorder_related_driver = (driver_name == "reorder"
-            || driver_name == "sum" || driver_name == "concat");
     const auto &cpu_engine = get_cpu_engine();
-    if (!is_reorder_related_driver
-            && (src.engine_kind() == dnnl_gpu
-                    || dst.engine_kind() == dnnl_gpu)) {
+    if (src.engine_kind() == dnnl_gpu || dst.engine_kind() == dnnl_gpu) {
 
         dnnl_status_t status = dnnl_reorder_primitive_desc_create(
                 &r_pd_, src.md_, cpu_engine, dst.md_, cpu_engine, attr);