diff --git a/vllm/v1/attention/ops/triton_turboquant_decode.py b/vllm/v1/attention/ops/triton_turboquant_decode.py
index a789f9be7bb2..33591d6961ab 100644
--- a/vllm/v1/attention/ops/triton_turboquant_decode.py
+++ b/vllm/v1/attention/ops/triton_turboquant_decode.py
@@ -139,8 +139,12 @@ def _tq_decode_stage1(
 
         page_idx = kv_offs // BLOCK_SIZE
         page_off = kv_offs % BLOCK_SIZE
+        # Clamp OOB lanes to index 0 before pointer arithmetic so Triton's
+        # bounds checker does not fire on masked-out lanes (mask only guards
+        # the output value, not the address computation).
+        safe_page_idx = tl.where(kv_mask, page_idx, 0)
         block_nums = tl.load(
-            Block_table_ptr + bt_base + page_idx,
+            Block_table_ptr + bt_base + safe_page_idx,
             mask=kv_mask,
             other=0,
         ).to(tl.int64)