diff --git a/vllm/v1/sample/ops/topk_topp_sampler.py b/vllm/v1/sample/ops/topk_topp_sampler.py index 1fc5f6e8ccf0..f07fa17a9d42 100644 --- a/vllm/v1/sample/ops/topk_topp_sampler.py +++ b/vllm/v1/sample/ops/topk_topp_sampler.py @@ -291,7 +291,8 @@ def forward_xpu( ) # The custom XPU sampler kernel consumes RNG values internally, so advance # the default generator's offset to keep future draws deterministic. - offset += logits.numel() + # pytorch: offset must be multiple of 4 + offset = (offset + logits.numel() + 3) // 4 * 4 state.view(torch.int64)[1] = offset generator.set_state(state) return random_sampled, logits_to_return