From da1580eeea63e42bf40197ed4b1e4e63038fd91d Mon Sep 17 00:00:00 2001 From: wangli Date: Wed, 6 Aug 2025 15:40:18 +0800 Subject: [PATCH 1/4] fix env Signed-off-by: wangli --- vllm_ascend/envs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py index dee6f5a5424..eb5e216cac0 100644 --- a/vllm_ascend/envs.py +++ b/vllm_ascend/envs.py @@ -128,6 +128,8 @@ "VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION": lambda: bool( int(os.getenv("VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION", '1'))), + "VLLM_ASCEND_ENABLE_CHUNK_MC2": + lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_CHUNK_MC2", '0'))), # `LLMDataDistCMgrConnector` required variable. `DISAGGREGATED_PREFILL_RANK_TABLE_PATH` is # used for llmdatadist to build the communication topology for kv cache transfer, it is From a7f3993871220610142d06cf3f03a899b4edc2a5 Mon Sep 17 00:00:00 2001 From: wangli Date: Wed, 6 Aug 2025 15:52:24 +0800 Subject: [PATCH 2/4] rm env Signed-off-by: wangli --- vllm_ascend/worker/model_runner_v1.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index b2f730a1b65..1e29d336d7b 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -617,12 +617,6 @@ def _get_forward_metadata_across_dp_and_pad( if self.dp_size == 1: return num_tokens, None, with_prefill, enable_dbo - if self.is_kv_producer and not envs_ascend.VLLM_ASCEND_ENABLE_CHUNK_MC2: - num_tokens_across_dp = torch.tensor([num_tokens] * self.dp_size, - device="cpu", - dtype=torch.int32) - return num_tokens, num_tokens_across_dp, True, enable_dbo - if self.is_kv_consumer and self.torchair_graph_enabled and len( self.torchair_graph_batch_sizes ) == 1 and not self.in_profile_run: From a966e5187617f0bc6d7583e442bb59bee19e1d35 Mon Sep 17 00:00:00 2001 From: wangli Date: Wed, 6 Aug 2025 15:52:45 +0800 Subject: [PATCH 3/4] revert Signed-off-by: wangli --- vllm_ascend/envs.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py index eb5e216cac0..dee6f5a5424 100644 --- a/vllm_ascend/envs.py +++ b/vllm_ascend/envs.py @@ -128,8 +128,6 @@ "VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION": lambda: bool( int(os.getenv("VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION", '1'))), - "VLLM_ASCEND_ENABLE_CHUNK_MC2": - lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_CHUNK_MC2", '0'))), # `LLMDataDistCMgrConnector` required variable. `DISAGGREGATED_PREFILL_RANK_TABLE_PATH` is # used for llmdatadist to build the communication topology for kv cache transfer, it is From f232d088eeebcc1ac6f0357b790c86439e60b697 Mon Sep 17 00:00:00 2001 From: wangli Date: Wed, 6 Aug 2025 16:57:26 +0800 Subject: [PATCH 4/4] rm consumer Signed-off-by: wangli --- vllm_ascend/worker/model_runner_v1.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 1e29d336d7b..396bebd16e1 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -617,16 +617,6 @@ def _get_forward_metadata_across_dp_and_pad( if self.dp_size == 1: return num_tokens, None, with_prefill, enable_dbo - if self.is_kv_consumer and self.torchair_graph_enabled and len( - self.torchair_graph_batch_sizes - ) == 1 and not self.in_profile_run: - max_num_decode_tokens = self.torchair_graph_batch_sizes[0] - num_tokens_across_dp = torch.tensor([max_num_decode_tokens] * - self.dp_size, - device="cpu", - dtype=torch.int32) - return max_num_decode_tokens, num_tokens_across_dp, False, enable_dbo - maybe_padded_num_tokens = num_tokens num_tokens_across_dp, with_prefill, enable_dbo = self._get_forward_metadata_across_dp( num_tokens, with_prefill, enable_dbo)