From 079a651c224113fbdc972df30da2a3af90997a57 Mon Sep 17 00:00:00 2001 From: Kyle Sayers Date: Thu, 7 May 2026 10:09:35 -0400 Subject: [PATCH 1/3] allow non-explicit ignores Signed-off-by: Kyle Sayers --- .../compressed_tensors/compressed_tensors.py | 24 +++++++++---------- .../quantization/compressed_tensors/utils.py | 8 +------ 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py index 8d16a143b10a..2910e63678fe 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from contextlib import suppress from functools import partial from typing import TYPE_CHECKING, Any, Literal, cast @@ -747,13 +746,13 @@ def get_scheme( self.sparsity_ignore_list ) sparsity_scheme: SparsityCompressionConfig | None = None - with suppress(ValueError): - matched_target = find_matched_target( - layer_name=layer_name, - module=layer, - targets=sparsity_targets, - fused_mapping=self.packed_modules_mapping, - ) + matched_target = find_matched_target( + layer_name=layer_name, + module=layer, + targets=sparsity_targets, + fused_mapping=self.packed_modules_mapping, + ) + if matched_target is not None: sparsity_scheme = self.sparsity_scheme_map[matched_target] if self.supports_cutlass_24( @@ -821,10 +820,11 @@ def get_scheme_dict( targets=self.target_scheme_map.keys(), fused_mapping=self.packed_modules_mapping, ) - scheme_dict = self.target_scheme_map[matched_target] - if scheme_dict.get("format") is None: - scheme_dict["format"] = self.quant_format - return scheme_dict + if matched_target is not None: + scheme_dict = self.target_scheme_map[matched_target] + if scheme_dict.get("format") is None: + scheme_dict["format"] = self.quant_format + return scheme_dict return None diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py index 04c64d9bd56f..def4797b1396 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py @@ -115,7 +115,7 @@ def find_matched_target( module: Module, targets: Iterable[str], fused_mapping: Mapping[str, list[str]] = MappingProxyType({}), -) -> str: +) -> str | None: """ Helper function to look up which "target" in the compressed-tensors config that a layer corresponds to. @@ -150,12 +150,6 @@ def find_matched_target( or _match_fused_layer(layer_name, targets, fused_mapping) ) - if matched_target is None: - raise ValueError( - f"Unable to find matching target for {layer_name} in the " - "compressed-tensors config." - ) - return matched_target From 68bad70adebef1a471de810c6e74cc640cbd3456 Mon Sep 17 00:00:00 2001 From: Kyle Sayers Date: Thu, 7 May 2026 11:57:40 -0400 Subject: [PATCH 2/3] add test Signed-off-by: Kyle Sayers --- tests/quantization/test_compressed_tensors.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/quantization/test_compressed_tensors.py b/tests/quantization/test_compressed_tensors.py index 6b95d9e346db..cc46067fded8 100644 --- a/tests/quantization/test_compressed_tensors.py +++ b/tests/quantization/test_compressed_tensors.py @@ -635,6 +635,28 @@ def test_get_quant_method_returns_none_for_unmatched_parallel_lm_head(): ) +def test_find_matched_target_returns_none_on_no_match(): + from vllm.model_executor.layers.quantization.compressed_tensors.utils import ( + find_matched_target, + ) + + result = find_matched_target( + layer_name="model.layers.0.self_attn.qkv_proj", + module=Mock(spec=torch.nn.Linear), + targets=["no_match_target"], + ) + assert result is None + + +def test_get_scheme_dict_returns_none_on_no_match(): + config = _make_ct_config(target="Linear") + result = config.get_scheme_dict( + layer=Mock(spec=torch.nn.Linear), + layer_name="model.layers.0.unmatched_layer", + ) + assert result is None + + @pytest.mark.skipif( not current_platform.is_cuda() or not current_platform.has_device_capability(75), reason="MXFP8 requires Turing (sm_75+) or newer.", From 1c2e89ebdaa53a8b650e51dbb3907773755c65e6 Mon Sep 17 00:00:00 2001 From: Kyle Sayers Date: Thu, 7 May 2026 12:02:59 -0400 Subject: [PATCH 3/3] fix ai test Signed-off-by: Kyle Sayers --- tests/quantization/test_compressed_tensors.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/quantization/test_compressed_tensors.py b/tests/quantization/test_compressed_tensors.py index cc46067fded8..7e80b0ecfa61 100644 --- a/tests/quantization/test_compressed_tensors.py +++ b/tests/quantization/test_compressed_tensors.py @@ -32,6 +32,9 @@ CompressedTensorsW8A16Fp8, CompressedTensorsWNA16, ) +from vllm.model_executor.layers.quantization.compressed_tensors.utils import ( + find_matched_target, +) from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8 from vllm.model_executor.layers.quantization.utils.nvfp4_utils import ( cutlass_fp4_supported, @@ -636,10 +639,6 @@ def test_get_quant_method_returns_none_for_unmatched_parallel_lm_head(): def test_find_matched_target_returns_none_on_no_match(): - from vllm.model_executor.layers.quantization.compressed_tensors.utils import ( - find_matched_target, - ) - result = find_matched_target( layer_name="model.layers.0.self_attn.qkv_proj", module=Mock(spec=torch.nn.Linear), @@ -649,7 +648,7 @@ def test_find_matched_target_returns_none_on_no_match(): def test_get_scheme_dict_returns_none_on_no_match(): - config = _make_ct_config(target="Linear") + config = _make_ct_config(target="matched_layer") result = config.get_scheme_dict( layer=Mock(spec=torch.nn.Linear), layer_name="model.layers.0.unmatched_layer",