diff --git a/vllm/v1/sample/ops/topk_topp_sampler.py b/vllm/v1/sample/ops/topk_topp_sampler.py index 363b113f0a4f..cc3120233814 100644 --- a/vllm/v1/sample/ops/topk_topp_sampler.py +++ b/vllm/v1/sample/ops/topk_topp_sampler.py @@ -289,6 +289,11 @@ def forward_xpu( torch.ops.vllm.xpu_topk_topp_sampler( random_sampled, logits_to_return, logits, k, p, self.logprobs_mode, seeds ) + # The custom XPU sampler kernel consumes RNG values internally, so advance + # the default generator's offset to keep future draws deterministic. + offset += logits.numel() + state.view(torch.int64)[1] = offset + generator.set_state(state) return random_sampled, logits_to_return