Skip to content

Commit 0e14999

Browse files
author
LittleMouse
committed
[fix] Fix inference issues caused by memory synchronization
1 parent 7a97143 commit 0e14999

File tree

1 file changed

+15
-2
lines changed

1 file changed

+15
-2
lines changed

projects/llm_framework/main_llm/src/runner/ax_model_runner/ax_model_runner_ax650.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -432,10 +432,23 @@ void ax_runner_ax650::deinit()
432432

433433
int ax_runner_ax650::inference()
434434
{
435-
return AX_ENGINE_RunSync(m_handle->handle, &m_handle->io_data[0]);
435+
int ret = AX_ENGINE_RunSync(m_handle->handle, &m_handle->io_data[0]);
436+
for (size_t i = 0; i < get_num_outputs(); i++)
437+
{
438+
auto &tensor = get_output(i);
439+
AX_SYS_MinvalidateCache(tensor.phyAddr, tensor.pVirAddr, tensor.nSize);
440+
}
441+
return ret;
436442
}
437443

438444
int ax_runner_ax650::inference(int grpid)
439445
{
440-
return AX_ENGINE_RunGroupIOSync(m_handle->handle, m_handle->context, grpid, &m_handle->io_data[grpid]);
446+
int ret = AX_ENGINE_RunGroupIOSync(m_handle->handle, m_handle->context, grpid, &m_handle->io_data[grpid]);
447+
448+
for (size_t i = 0; i < get_num_outputs(); i++)
449+
{
450+
auto &tensor = get_output(grpid, i);
451+
AX_SYS_MinvalidateCache(tensor.phyAddr, tensor.pVirAddr, tensor.nSize);
452+
}
453+
return ret;
441454
}

0 commit comments

Comments
 (0)