diff --git a/vllm/v1/sample/ops/topk_topp_sampler.py b/vllm/v1/sample/ops/topk_topp_sampler.py
index 1fc5f6e8ccf0..f07fa17a9d42 100644
--- a/vllm/v1/sample/ops/topk_topp_sampler.py
+++ b/vllm/v1/sample/ops/topk_topp_sampler.py
@@ -291,7 +291,8 @@ def forward_xpu(
         )
         # The custom XPU sampler kernel consumes RNG values internally, so advance
         # the default generator's offset to keep future draws deterministic.
-        offset += logits.numel()
+        # pytorch: offset must be multiple of 4
+        offset = (offset + logits.numel() + 3) // 4 * 4
         state.view(torch.int64)[1] = offset
         generator.set_state(state)
         return random_sampled, logits_to_return