Skip to content

Commit

Permalink
Scratchpad reused for intermediate repackings
Browse files Browse the repository at this point in the history
  • Loading branch information
v-Golubev committed Nov 11, 2024
1 parent 4724749 commit 56e2acc
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 32 deletions.
50 changes: 26 additions & 24 deletions src/plugins/intel_cpu/src/nodes/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ class SubgraphStaticExecutor : public Subgraph::SubgraphExecutor {
const std::vector<ptrdiff_t>& start_offset_out,
const std::shared_ptr<CPURuntimeConfig>& snippet_config,
const BufferScratchpadAllocator& allocator,
const DnnlScratchPadPtr& scratchpad)
: SubgraphExecutor(snippet_attrs, snippet, start_offset_in, start_offset_out, snippet_config, allocator, scratchpad) {}
const dnnl::engine& engine)
: SubgraphExecutor(snippet_attrs, snippet, start_offset_in, start_offset_out, snippet_config, allocator, engine) {}

void exec_impl(const std::vector<MemoryPtr>& inMemPtrs, const std::vector<MemoryPtr>& outMemPtrs) override {
const auto& callable = m_schedule->get_callable<kernel>();
Expand Down Expand Up @@ -126,8 +126,8 @@ class SubgraphDynamicSpecializedExecutor : public Subgraph::SubgraphExecutor {
const std::vector<ptrdiff_t>& start_offset_out,
const std::shared_ptr<CPURuntimeConfig>& snippet_config,
const BufferScratchpadAllocator& allocator,
const DnnlScratchPadPtr& scratchpad)
: SubgraphExecutor(snippet_attrs, snippet, start_offset_in, start_offset_out, snippet_config, allocator, scratchpad) {
const dnnl::engine& engine)
: SubgraphExecutor(snippet_attrs, snippet, start_offset_in, start_offset_out, snippet_config, allocator, engine) {
buffer_offsets = snippet_config->buffer_cluster_offsets;
data_offsets = snippet_config->io_data_offsets;
loop_args = snippet_config->loop_args;
Expand Down Expand Up @@ -798,7 +798,7 @@ void Subgraph::prepareParams() {
start_offset_out,
snippet_config,
allocator,
context->getScratchPad());
getEngine());
} else {
// Static case:
// 1. Update runtime config to get static scheduling data (io data offsets, parallel domain) which will be compiled in JIT code
Expand All @@ -815,7 +815,7 @@ void Subgraph::prepareParams() {
start_offset_out,
snippet_config,
allocator,
context->getScratchPad());
getEngine());
}
};

Expand Down Expand Up @@ -905,8 +905,8 @@ Subgraph::SubgraphExecutor::SubgraphExecutor(const std::shared_ptr<Subgraph::Sub
const std::vector<ptrdiff_t>& start_offset_out,
const std::shared_ptr<CPURuntimeConfig>& snippet_config,
const BufferScratchpadAllocator& allocator,
const DnnlScratchPadPtr& scratchpad)
: m_schedule(snippet->get()), m_start_offset_in(start_offset_in), m_start_offset_out(start_offset_out), m_scratchpad(scratchpad) {
const dnnl::engine& engine)
: m_schedule(snippet->get()), m_start_offset_in(start_offset_in), m_start_offset_out(start_offset_out) {
OPENVINO_ASSERT(m_schedule, "Schedule is empty!");
OPENVINO_ASSERT(snippet_config, "Runtime Config is empty!");
init_parallel_domain(snippet_config, m_parallel_exec_domain);
Expand All @@ -916,12 +916,23 @@ Subgraph::SubgraphExecutor::SubgraphExecutor(const std::shared_ptr<Subgraph::Sub

m_buffer_scratchpad_size = snippet_config->buffer_scratchpad_size;
OPENVINO_ASSERT(!ov::snippets::utils::is_dynamic_value(m_buffer_scratchpad_size), "Undefined buffer scratchpad size!");
m_buffer_scratchpad = allocator(static_cast<size_t>(m_nthreads) * m_buffer_scratchpad_size);

// TODO: here we need to already create memory, preliminary provide to allocator the adjusted scracth size
const auto internal_buffer_size = static_cast<size_t>(m_nthreads) * m_buffer_scratchpad_size;
const auto external_repacking_buffer_size =
std::accumulate(snippet_config->m_in_requested_descs.begin(),
snippet_config->m_in_requested_descs.end(),
size_t(0),
[](size_t sum, const std::pair<size_t, ov::intel_cpu::MemoryDescPtr>& requested_desc_elem) {
return sum + requested_desc_elem.second->getCurrentMemSize();
});
m_buffer_scratchpad = allocator(internal_buffer_size + external_repacking_buffer_size);

size_t offset = internal_buffer_size;
for (const auto& desc : snippet_config->m_in_requested_descs) {
const auto& requested_desc = desc.second;
m_in_requested_repackings.emplace(desc.first, RequestedRepacking(requested_desc, nullptr));
const void* data_ptr = m_buffer_scratchpad->getDataAs<uint8_t>() + offset;
m_in_requested_repackings[desc.first] = std::make_shared<Memory>(engine, requested_desc, data_ptr);
offset += requested_desc->getCurrentMemSize();
std::cout << "scratch_mem is created for requested desc " << desc.first << std::endl;
}

#if defined(__linux__) && defined(OPENVINO_ARCH_X86_64) && defined(SNIPPETS_DEBUG_CAPS)
Expand Down Expand Up @@ -1008,18 +1019,9 @@ void Subgraph::SubgraphExecutor::execute(dnnl::stream strm, std::vector<MemoryPt

void Subgraph::SubgraphExecutor::reorder_execute(dnnl::stream strm, std::vector<MemoryPtr> inMemPtrs, const std::vector<MemoryPtr>& outMemPtrs) {
for (auto& requested_repacking : m_in_requested_repackings) {
const auto& requested_desc = requested_repacking.second.requested_desc;
auto& scratch_mem = requested_repacking.second.scratch_mem;
if (requested_desc) {
if (!scratch_mem || !scratch_mem->getDesc().isCompatible(*requested_desc)) {
// TODO: move to prepareParams and investigate why the repacking is called on each iteration
// scratch_mem = m_scratchpad->createScratchPadMem(requested_desc);
scratch_mem = std::make_shared<Memory>(strm.get_engine(), requested_desc);
std::cout << "scratch_mem is created for requested desc " << requested_repacking.first << std::endl;
}
scratch_mem->load(*inMemPtrs[requested_repacking.first]);
inMemPtrs[requested_repacking.first] = scratch_mem;
}
const auto& scratch_mem = requested_repacking.second;
scratch_mem->load(*inMemPtrs[requested_repacking.first]);
inMemPtrs[requested_repacking.first] = scratch_mem;
}
exec_impl(inMemPtrs, outMemPtrs);
}
Expand Down
10 changes: 2 additions & 8 deletions src/plugins/intel_cpu/src/nodes/subgraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ class Subgraph::SubgraphExecutor {
const std::vector<ptrdiff_t>& start_offset_out,
const std::shared_ptr<CPURuntimeConfig>& snippet_config,
const BufferScratchpadAllocator& allocator,
const DnnlScratchPadPtr& scratchpad);
const dnnl::engine& engine);
virtual ~SubgraphExecutor() = default;

void execute(dnnl::stream strm, std::vector<MemoryPtr>& inMemPtrs, std::vector<MemoryPtr>& outMemPtrs);
Expand Down Expand Up @@ -171,13 +171,7 @@ class Subgraph::SubgraphExecutor {
private:
void reorder_execute(dnnl::stream strm, std::vector<MemoryPtr> inMemPtrs, const std::vector<MemoryPtr>& outMemPtrs);

struct RequestedRepacking {
RequestedRepacking(MemoryDescPtr desc, MemoryPtr memory) : requested_desc(desc), scratch_mem(memory) {}
MemoryDescPtr requested_desc = {};
MemoryPtr scratch_mem = {};
};
std::unordered_map<size_t, RequestedRepacking> m_in_requested_repackings = {};
DnnlScratchPadPtr m_scratchpad = {};
std::unordered_map<size_t, MemoryPtr> m_in_requested_repackings = {};
};

} // namespace node
Expand Down

0 comments on commit 56e2acc

Please sign in to comment.