Skip to content

Commit d1ac1c0

Browse files
authored
[KVCache] Fix the aux data syncing order of paged KV cache (#16988)
Fix the aux data syncing order of paged KV cache
1 parent 4403379 commit d1ac1c0

File tree

1 file changed

+11
-10
lines changed

1 file changed

+11
-10
lines changed

src/runtime/relax_vm/paged_kv_cache.cc

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1709,24 +1709,28 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj {
17091709
// - Reset the copy.
17101710
aux_data_manager_->ResetCopy();
17111711

1712-
// 1. qo_indptr_on_depths
1712+
// 1. q_rope_position_map
1713+
// q_rope_position_map has to be synced first so that it has a 0 byte offset
1714+
ICHECK_EQ(q_rope_position_map_host_.size(), total_append_length);
1715+
q_rope_position_map_view_ = aux_data_manager_->CopyQRoPEPosMapAsync(&q_rope_position_map_host_);
1716+
// 2. qo_indptr_on_depths
17131717
for (int d = 0; d < num_depths_; ++d) {
17141718
qo_indptr_on_depths_view_[d] =
17151719
aux_data_manager_->CopyQOIndptrOnDepthAsync(&qo_indptr_on_depths_host_[d], d);
17161720
}
1717-
// 2. page_indptr_on_depths
1721+
// 3. page_indptr_on_depths
17181722
for (int d = 0; d < num_depths_; ++d) {
17191723
ICHECK_EQ(page_indptr_on_depths_host_[d].size(), qo_indptr_on_depths_host_[d].size());
17201724
page_indptr_on_depths_view_[d] =
17211725
aux_data_manager_->CopyPageIndptrOnDepthAsync(&page_indptr_on_depths_host_[d], d);
17221726
}
1723-
// 3. page_indices_on_depths
1727+
// 4. page_indices_on_depths
17241728
for (int d = 0; d < num_depths_; ++d) {
17251729
ICHECK_EQ(page_indices_on_depths_host_[d].size(), page_indptr_on_depths_host_[d].back());
17261730
page_indices_on_depths_view_[d] =
17271731
aux_data_manager_->CopyPageIndicesOnDepthAsync(&page_indices_on_depths_host_[d], d);
17281732
}
1729-
// 4. length_info_on_depths
1733+
// 5. length_info_on_depths
17301734
// last_page_len_on_depths_host_;
17311735
// sliding_window_offset_on_depths_host_;
17321736
// sink_size_on_depths_host_;
@@ -1746,23 +1750,20 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj {
17461750
&sink_size_on_depths_host_[d], d);
17471751
}
17481752
}
1749-
// 5. k_rope_pos_offset_on_depths
1753+
// 6. k_rope_pos_offset_on_depths
17501754
for (int d = 0; d < num_depths_; ++d) {
17511755
ICHECK_EQ(k_rope_pos_offset_on_depths_host_[d].size() + 1,
17521756
qo_indptr_on_depths_host_[d].size());
17531757
k_rope_pos_offset_view_[d] = aux_data_manager_->CopyKRoPEPosOffsetOnDepthAsync(
17541758
&k_rope_pos_offset_on_depths_host_[d], d);
17551759
}
1756-
// 6. cur_append_lengths_indptr
1760+
// 7. cur_append_lengths_indptr
17571761
cur_append_length_indptr_view_ =
17581762
aux_data_manager_->CopyCurAppendLengthIndptrAsync(&cur_append_lengths_indptr_host_);
1759-
// 7. k_ragged_rope_pos_offset
1763+
// 8. k_ragged_rope_pos_offset
17601764
ICHECK_EQ(k_ragged_rope_pos_offset_host_.size(), num_sequences);
17611765
k_ragged_rope_pos_offset_view_ =
17621766
aux_data_manager_->CopyKRaggedRoPEPosOffsetAsync(&k_ragged_rope_pos_offset_host_);
1763-
// 8. q_rope_position_map
1764-
ICHECK_EQ(q_rope_position_map_host_.size(), total_append_length);
1765-
q_rope_position_map_view_ = aux_data_manager_->CopyQRoPEPosMapAsync(&q_rope_position_map_host_);
17661767
// 9. append_position_map
17671768
append_position_map_view_ =
17681769
aux_data_manager_->CopyAppendPositionMapAsync(&append_position_map_host_);

0 commit comments

Comments
 (0)