From 7da364db460afda3147e1968fd072c08485dba14 Mon Sep 17 00:00:00 2001 From: "jag.Xu" Date: Fri, 17 Jan 2025 14:01:38 +0800 Subject: [PATCH] [GPU] fix memory conflict for multi iteration in loop. (#28487) Cause is shown in graph below. The black, green, and dotted line donates the primitive dependency, memory buffer reuse, and memory conflict (at the second iteration) respectively. The body of the loop is compiled as a separate model and is not aware the data reuse in backedge in multiple iteration. Tickets: [CVS-158017](https://jira.devtools.intel.com/browse/CVS-158017) --- src/plugins/intel_gpu/src/graph/primitive_inst.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 6e1af3f5429283..abfeabe2b6a149 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -2391,6 +2391,9 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, if (_node.is_in_shape_of_subgraph()) reusable_across_network = false; + if (reusable_across_network && _node.get_program().is_body_program() && is_output_buffer && runtime_alloc) + reusable_across_network = false; + // For outputs, cpu prim we want to have lockable alloc type // Also if the successor of a node is an cpu, then memory needs to be lockable. bool is_cpu = _node.get_selected_impl() ? _node.get_selected_impl()->is_cpu() :