diff --git a/tests/quantization/test_compressed_tensors.py b/tests/quantization/test_compressed_tensors.py index 6b95d9e346d..7e80b0ecfa6 100644 --- a/tests/quantization/test_compressed_tensors.py +++ b/tests/quantization/test_compressed_tensors.py @@ -32,6 +32,9 @@ CompressedTensorsW8A16Fp8, CompressedTensorsWNA16, ) +from vllm.model_executor.layers.quantization.compressed_tensors.utils import ( + find_matched_target, +) from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8 from vllm.model_executor.layers.quantization.utils.nvfp4_utils import ( cutlass_fp4_supported, @@ -635,6 +638,24 @@ def test_get_quant_method_returns_none_for_unmatched_parallel_lm_head(): ) +def test_find_matched_target_returns_none_on_no_match(): + result = find_matched_target( + layer_name="model.layers.0.self_attn.qkv_proj", + module=Mock(spec=torch.nn.Linear), + targets=["no_match_target"], + ) + assert result is None + + +def test_get_scheme_dict_returns_none_on_no_match(): + config = _make_ct_config(target="matched_layer") + result = config.get_scheme_dict( + layer=Mock(spec=torch.nn.Linear), + layer_name="model.layers.0.unmatched_layer", + ) + assert result is None + + @pytest.mark.skipif( not current_platform.is_cuda() or not current_platform.has_device_capability(75), reason="MXFP8 requires Turing (sm_75+) or newer.", diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py index 8d16a143b10..2910e63678f 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from contextlib import suppress from functools import partial from typing import TYPE_CHECKING, Any, Literal, cast @@ -747,13 +746,13 @@ def get_scheme( self.sparsity_ignore_list ) sparsity_scheme: SparsityCompressionConfig | None = None - with suppress(ValueError): - matched_target = find_matched_target( - layer_name=layer_name, - module=layer, - targets=sparsity_targets, - fused_mapping=self.packed_modules_mapping, - ) + matched_target = find_matched_target( + layer_name=layer_name, + module=layer, + targets=sparsity_targets, + fused_mapping=self.packed_modules_mapping, + ) + if matched_target is not None: sparsity_scheme = self.sparsity_scheme_map[matched_target] if self.supports_cutlass_24( @@ -821,10 +820,11 @@ def get_scheme_dict( targets=self.target_scheme_map.keys(), fused_mapping=self.packed_modules_mapping, ) - scheme_dict = self.target_scheme_map[matched_target] - if scheme_dict.get("format") is None: - scheme_dict["format"] = self.quant_format - return scheme_dict + if matched_target is not None: + scheme_dict = self.target_scheme_map[matched_target] + if scheme_dict.get("format") is None: + scheme_dict["format"] = self.quant_format + return scheme_dict return None diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py index 04c64d9bd56..def4797b139 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py @@ -115,7 +115,7 @@ def find_matched_target( module: Module, targets: Iterable[str], fused_mapping: Mapping[str, list[str]] = MappingProxyType({}), -) -> str: +) -> str | None: """ Helper function to look up which "target" in the compressed-tensors config that a layer corresponds to. @@ -150,12 +150,6 @@ def find_matched_target( or _match_fused_layer(layer_name, targets, fused_mapping) ) - if matched_target is None: - raise ValueError( - f"Unable to find matching target for {layer_name} in the " - "compressed-tensors config." - ) - return matched_target