From 7080adc328584886bc1369fca1d1c1d74597481c Mon Sep 17 00:00:00 2001 From: realliujiaxu Date: Sat, 27 Dec 2025 11:50:03 +0800 Subject: [PATCH 1/2] fix greedy temperature detection Signed-off-by: realliujiaxu --- vllm_ascend/sample/rejection_sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_ascend/sample/rejection_sampler.py b/vllm_ascend/sample/rejection_sampler.py index b0e6f848a42..161b986811e 100644 --- a/vllm_ascend/sample/rejection_sampler.py +++ b/vllm_ascend/sample/rejection_sampler.py @@ -9,7 +9,7 @@ from vllm_ascend.sample.sampler import apply_top_k_top_p PLACEHOLDER_TOKEN_ID = -1 -GREEDY_TEMPERATURE = -1 +GREEDY_TEMPERATURE = 0 # Maximum number of speculative draft tokens allowed per request in a single # step. This value is chosen to be large enough to handle typical use cases. MAX_SPEC_LEN = 32 From 6ac40c69bd03f613bd6a3d950e3fe727353386e9 Mon Sep 17 00:00:00 2001 From: realliujiaxu Date: Sat, 27 Dec 2025 12:56:02 +0800 Subject: [PATCH 2/2] import GREEDY_TEMPERATURE from vllm Signed-off-by: realliujiaxu --- vllm_ascend/sample/rejection_sampler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm_ascend/sample/rejection_sampler.py b/vllm_ascend/sample/rejection_sampler.py index 161b986811e..3361c6f27c5 100644 --- a/vllm_ascend/sample/rejection_sampler.py +++ b/vllm_ascend/sample/rejection_sampler.py @@ -4,12 +4,12 @@ import torch from vllm.triton_utils import HAS_TRITON, tl, triton from vllm.v1.sample.metadata import SamplingMetadata -from vllm.v1.sample.rejection_sampler import generate_uniform_probs +from vllm.v1.sample.rejection_sampler import (GREEDY_TEMPERATURE, + generate_uniform_probs) from vllm_ascend.sample.sampler import apply_top_k_top_p PLACEHOLDER_TOKEN_ID = -1 -GREEDY_TEMPERATURE = 0 # Maximum number of speculative draft tokens allowed per request in a single # step. This value is chosen to be large enough to handle typical use cases. MAX_SPEC_LEN = 32