From 0f005986e89bcc3fb312cc63f602ac5cac1f67ed Mon Sep 17 00:00:00 2001 From: jesse Date: Tue, 3 Feb 2026 12:37:42 +0800 Subject: [PATCH 1/4] fix qwen3-asr Signed-off-by: jesse --- vllm/model_executor/models/qwen3_asr.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/models/qwen3_asr.py b/vllm/model_executor/models/qwen3_asr.py index b27f710dbe13..0c95d1603526 100644 --- a/vllm/model_executor/models/qwen3_asr.py +++ b/vllm/model_executor/models/qwen3_asr.py @@ -125,6 +125,13 @@ def get_feature_extractor(self, **kwargs: object) -> WhisperFeatureExtractor: def get_supported_mm_limits(self) -> Mapping[str, int | None]: return {"audio": None} + def get_data_parser(self) -> MultiModalDataParser: + feature_extractor = self.get_feature_extractor() + return Qwen3ASRMultiModalDataParser( + target_sr=feature_extractor.sampling_rate, + expected_hidden_size=self._get_expected_hidden_size(), + ) + class Qwen3ASRDummyInputsBuilder(BaseDummyInputsBuilder[Qwen3ASRProcessingInfo]): def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str: @@ -195,10 +202,7 @@ class Qwen3ASRMultiModalProcessor( Qwen3OmniMoeThinkerMultiModalProcessor, ): def _get_data_parser(self) -> MultiModalDataParser: - feature_extractor = self.info.get_feature_extractor() - return Qwen3ASRMultiModalDataParser( - target_sr=feature_extractor.sampling_rate, - ) + return self.info.get_data_parser() def _get_mm_fields_config( self, From c938653d6cf6cb7f126dbbc7a5d8559f09303c73 Mon Sep 17 00:00:00 2001 From: jesse Date: Tue, 3 Feb 2026 14:41:36 +0800 Subject: [PATCH 2/4] remove unused function Signed-off-by: jesse --- vllm/model_executor/models/qwen3_asr.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/vllm/model_executor/models/qwen3_asr.py b/vllm/model_executor/models/qwen3_asr.py index 0c95d1603526..e63e03e23e47 100644 --- a/vllm/model_executor/models/qwen3_asr.py +++ b/vllm/model_executor/models/qwen3_asr.py @@ -201,9 +201,6 @@ def _parse_audio_data( class Qwen3ASRMultiModalProcessor( Qwen3OmniMoeThinkerMultiModalProcessor, ): - def _get_data_parser(self) -> MultiModalDataParser: - return self.info.get_data_parser() - def _get_mm_fields_config( self, hf_inputs: BatchFeature, From 03ff0a31aacbbc2634e69cd16c23241b3b626738 Mon Sep 17 00:00:00 2001 From: jesse Date: Tue, 3 Feb 2026 17:22:38 +0800 Subject: [PATCH 3/4] fix error Signed-off-by: jesse --- vllm/multimodal/budget.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/multimodal/budget.py b/vllm/multimodal/budget.py index 0cd2419ca109..050f5170b3b6 100644 --- a/vllm/multimodal/budget.py +++ b/vllm/multimodal/budget.py @@ -73,7 +73,7 @@ def __init__( ) mm_max_toks_per_item = { - modality: all_mm_max_toks_per_item[modality] + modality: all_mm_max_toks_per_item.get(modality, 0) for modality in active_modalities } From f453459afc70f2619569add9f7c759c02b183940 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Tue, 3 Feb 2026 19:33:27 +0800 Subject: [PATCH 4/4] Apply suggestion from @DarkLight1337 Signed-off-by: DarkLight1337 Signed-off-by: Cyrus Leung --- vllm/multimodal/budget.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/multimodal/budget.py b/vllm/multimodal/budget.py index 050f5170b3b6..0cd2419ca109 100644 --- a/vllm/multimodal/budget.py +++ b/vllm/multimodal/budget.py @@ -73,7 +73,7 @@ def __init__( ) mm_max_toks_per_item = { - modality: all_mm_max_toks_per_item.get(modality, 0) + modality: all_mm_max_toks_per_item[modality] for modality in active_modalities }