From b9b3b00fb9637dfdcb6fa18af50ea9af7ad3e601 Mon Sep 17 00:00:00 2001 From: CaveNightingale Date: Thu, 14 Aug 2025 15:48:43 +0800 Subject: [PATCH 1/2] fix non-working disaggregated prefill Signed-off-by: CaveNightingale --- vllm_ascend/worker/model_runner_v1.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index a8355b36576..c878cd2c060 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -1858,8 +1858,13 @@ def kv_connector_no_forward( return EMPTY_MODEL_RUNNER_OUTPUT output = copy.copy(EMPTY_MODEL_RUNNER_OUTPUT) - output.finished_sending = finished_sending - output.finished_recving = finished_recving + if vllm_version_is("0.10.0"): + output.finished_sending = finished_sending + output.finished_recving = finished_recving + else: + output.kv_connector_output = KVConnectorOutput( + finished_sending=finished_sending, + finished_recving=finished_recving) return output @staticmethod From 3c8d8388ef7d414bd5d69daaef99aa11e51d882a Mon Sep 17 00:00:00 2001 From: CaveNightingale Date: Fri, 15 Aug 2025 11:32:33 +0800 Subject: [PATCH 2/2] fix D hanging after ~17 requests Signed-off-by: CaveNightingale --- vllm_ascend/worker/model_runner_v1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index dbfd20b01f4..ebf76ebff99 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -1636,7 +1636,7 @@ def execute_model( finished_recving) = (self._process_reqs(scheduler_output, intermediate_tensors)) kv_connector_output = None - if finished_sending is not None and finished_recving is not None: + if finished_sending is not None or finished_recving is not None: kv_connector_output = KVConnectorOutput( finished_sending=finished_sending, finished_recving=finished_recving)