Merge branch 'pod_batched_new' of github.com:AKKamath/flashinfer into pod_batched_new

Aditya K Kamath · Aditya K Kamath · commit 1e004a6856a4 · 2025-11-12T00:47:45.000Z
diff --git a/benchmarks/bench_mixed_attention.py b/benchmarks/bench_mixed_attention.py
@@ -110,8 +110,8 @@ def run_bench(
     kv_indices_d = torch.arange(0, d_kv_indptr[-1], device=device, dtype=torch.int32)
     kv_indices_p = torch.arange(0, p_kv_indptr[-1], device=device, dtype=torch.int32)
 
-    last_page_len_d = (d_seq_lens_blocks - 1) % page_block_size + 1
-    last_page_len_p = (p_seq_lens_blocks - 1) % page_block_size + 1
+    last_page_len_d = (torch.tensor(d_kv_lens, device=device) - 1) % page_block_size + 1
+    last_page_len_p = (torch.tensor(p_kv_lens, device=device) - 1) % page_block_size + 1
     wrapper_pod = flashinfer.BatchPODWithPagedKVCacheWrapper(
         workspace_buffer,
         kv_layout=kv_layout,
diff --git a/flashinfer/pod.py b/flashinfer/pod.py
@@ -1077,12 +1077,7 @@ def run(
             logits_soft_cap_p > 0,  # use_logits_soft_cap
             use_fp16_qk_reduction,
             # Decode params
-            # q_d.dtype,
-            # self._cached_kv_data_type,
-            # self._cached_q_data_type,
             self._indptr_type,
-            # head_dim,  # head_dim_qk
-            # head_dim,  # head_dim_vo
             PosEncodingMode[pos_encoding_mode_d].value,
             window_left_d != -1,  # use_sliding_window
             logits_soft_cap_d > 0,  # use_logits_soft_cap