Fix num_iters > 5 Shiftpointer issue (#9150)

billmguo · facebook-github-bot · commit 972e0b0d47a0 · 2025-03-12T20:22:53.000-07:00
Summary:

in the original implementation when num_iters &gt; 5 it will crash during exit

Reviewed By: cccclai

Differential Revision: D70974749
diff --git a/examples/qualcomm/oss_scripts/llama/runner/io_manager.cpp b/examples/qualcomm/oss_scripts/llama/runner/io_manager.cpp
@@ -168,13 +168,53 @@ void ShiftPointerIoMgr::init_io() {
   }
 }
 
-void ShiftPointerIoMgr::reset_io() {
+void ShiftPointerIoMgr::reset_io(
+    const std::vector<executorch::runtime::Result<
+        executorch::runtime::MethodMeta>>& prefill_methods_meta,
+    const std::vector<
+        executorch::runtime::Result<executorch::runtime::MethodMeta>>&
+        kv_methods_meta) {
   IO* ptr = static_cast<IO*>(data_ptr_.get());
+  std::fill(ptr->prefill_input_pos.begin(), ptr->prefill_input_pos.end(), 0);
+  ptr->kv_input_pos = 0;
   std::fill(
       ptr->prefill_attention_mask.begin(),
       ptr->prefill_attention_mask.end(),
       0);
   std::fill(ptr->kv_attention_mask.begin(), ptr->kv_attention_mask.end(), 0);
+
+  input_tensors_[kv_forward_name_].clear();
+  input_tensors_[kv_forward_name_].resize(modules_.size());
+  output_tensors_[kv_forward_name_].clear();
+  output_tensors_[kv_forward_name_].resize(modules_.size());
+
+  k_cache_in_[kv_forward_name_].clear();
+  v_cache_in_[kv_forward_name_].clear();
+  k_cache_out_[kv_forward_name_].clear();
+  v_cache_out_[kv_forward_name_].clear();
+
+  input_tensors_[prefill_forward_name_].clear();
+  input_tensors_[prefill_forward_name_].resize(modules_.size());
+  output_tensors_[prefill_forward_name_].clear();
+  output_tensors_[prefill_forward_name_].resize(modules_.size());
+
+  k_cache_in_[prefill_forward_name_].clear();
+  v_cache_in_[prefill_forward_name_].clear();
+  k_cache_out_[prefill_forward_name_].clear();
+  v_cache_out_[prefill_forward_name_].clear();
+
+  switch (eval_mode_) {
+    case EvalMode::kKVCached:
+      prepare_kv_io(kv_methods_meta);
+      break;
+    case EvalMode::kHybrid:
+      prepare_prefill_io(prefill_methods_meta);
+      prepare_kv_io(kv_methods_meta);
+      break;
+    default:
+      ET_CHECK_MSG(false, "unsupported mode");
+      break;
+  }
 }
 void ShiftPointerIoMgr::prepare_kv_io(
     const std::vector<Result<MethodMeta>>& methods_meta) {
@@ -893,7 +933,12 @@ void SmartMaskIoMgr::init_io() {
   ptr->init_io_ptrs(shared_ptr, io_bytes_map);
 }
 
-void SmartMaskIoMgr::reset_io() {
+void SmartMaskIoMgr::reset_io(
+    const std::vector<executorch::runtime::Result<
+        executorch::runtime::MethodMeta>>& prefill_methods_meta,
+    const std::vector<
+        executorch::runtime::Result<executorch::runtime::MethodMeta>>&
+        kv_methods_meta) {
   IO* ptr = static_cast<IO*>(data_ptr_.get());
   int32_t prefill_attn_size = prefill_ar_len_ * context_len_;
   int32_t kv_attn_size = kv_ar_len_ * context_len_;
diff --git a/examples/qualcomm/oss_scripts/llama/runner/io_manager.h b/examples/qualcomm/oss_scripts/llama/runner/io_manager.h
@@ -33,7 +33,12 @@ class IoMgrBase {
       std::vector<std::shared_ptr<executorch::extension::Module>>& modules);
   virtual ~IoMgrBase();
   virtual void init_io() = 0;
-  virtual void reset_io() = 0;
+  virtual void reset_io(
+      const std::vector<executorch::runtime::Result<
+          executorch::runtime::MethodMeta>>& prefill_methods_meta,
+      const std::vector<
+          executorch::runtime::Result<executorch::runtime::MethodMeta>>&
+          kv_methods_meta) = 0;
   virtual void prepare_prefill_io(
       const std::vector<
           executorch::runtime::Result<executorch::runtime::MethodMeta>>&
@@ -98,7 +103,12 @@ class ShiftPointerIoMgr : public IoMgrBase {
       const bool use_int64_token);
 
   void init_io() override;
-  void reset_io() override;
+  void reset_io(
+      const std::vector<executorch::runtime::Result<
+          executorch::runtime::MethodMeta>>& prefill_methods_meta,
+      const std::vector<
+          executorch::runtime::Result<executorch::runtime::MethodMeta>>&
+          kv_methods_meta) override;
   void prepare_prefill_io(
       const std::vector<
           executorch::runtime::Result<executorch::runtime::MethodMeta>>&
@@ -201,7 +211,12 @@ class SmartMaskIoMgr : public IoMgrBase {
       const bool use_int64_token);
 
   void init_io() override;
-  void reset_io() override;
+  void reset_io(
+      const std::vector<executorch::runtime::Result<
+          executorch::runtime::MethodMeta>>& prefill_methods_meta,
+      const std::vector<
+          executorch::runtime::Result<executorch::runtime::MethodMeta>>&
+          kv_methods_meta) override;
   void prepare_prefill_io(
       const std::vector<
           executorch::runtime::Result<executorch::runtime::MethodMeta>>&
diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp
@@ -447,7 +447,9 @@ Error Runner::generate(
   if (stats_callback) {
     stats_callback(stats_);
   }
-  io_mgr_->reset_io();
+  io_mgr_->reset_io(
+      get_methods_meta(prefill_forward_name_),
+      get_methods_meta(kv_forward_name_));
   prompt_.clear();
   return Error::Ok;
 }

Original file line number	Diff line number	Diff line change
`@@ -447,7 +447,9 @@ Error Runner::generate(`
`447`	`447`	`if (stats_callback) {`
`448`	`448`	`stats_callback(stats_);`
`449`	`449`	`}`
`450`		`- io_mgr_->reset_io();`
	`450`	`+ io_mgr_->reset_io(`
	`451`	`+ get_methods_meta(prefill_forward_name_),`
	`452`	`+ get_methods_meta(kv_forward_name_));`
`451`	`453`	`prompt_.clear();`
`452`	`454`	`return Error::Ok;`
`453`	`455`	`}`