From 079a651c224113fbdc972df30da2a3af90997a57 Mon Sep 17 00:00:00 2001
From: Kyle Sayers <kylesayrs@gmail.com>
Date: Thu, 7 May 2026 10:09:35 -0400
Subject: [PATCH 1/3] allow non-explicit ignores

Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
---
 .../compressed_tensors/compressed_tensors.py  | 24 +++++++++----------
 .../quantization/compressed_tensors/utils.py  |  8 +------
 2 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
index 8d16a143b10a..2910e63678fe 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-from contextlib import suppress
 from functools import partial
 from typing import TYPE_CHECKING, Any, Literal, cast
 
@@ -747,13 +746,13 @@ def get_scheme(
             self.sparsity_ignore_list
         )
         sparsity_scheme: SparsityCompressionConfig | None = None
-        with suppress(ValueError):
-            matched_target = find_matched_target(
-                layer_name=layer_name,
-                module=layer,
-                targets=sparsity_targets,
-                fused_mapping=self.packed_modules_mapping,
-            )
+        matched_target = find_matched_target(
+            layer_name=layer_name,
+            module=layer,
+            targets=sparsity_targets,
+            fused_mapping=self.packed_modules_mapping,
+        )
+        if matched_target is not None:
             sparsity_scheme = self.sparsity_scheme_map[matched_target]
 
         if self.supports_cutlass_24(
@@ -821,10 +820,11 @@ def get_scheme_dict(
                 targets=self.target_scheme_map.keys(),
                 fused_mapping=self.packed_modules_mapping,
             )
-            scheme_dict = self.target_scheme_map[matched_target]
-            if scheme_dict.get("format") is None:
-                scheme_dict["format"] = self.quant_format
-            return scheme_dict
+            if matched_target is not None:
+                scheme_dict = self.target_scheme_map[matched_target]
+                if scheme_dict.get("format") is None:
+                    scheme_dict["format"] = self.quant_format
+                return scheme_dict
 
         return None
 
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py
index 04c64d9bd56f..def4797b1396 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py
@@ -115,7 +115,7 @@ def find_matched_target(
     module: Module,
     targets: Iterable[str],
     fused_mapping: Mapping[str, list[str]] = MappingProxyType({}),
-) -> str:
+) -> str | None:
     """
     Helper function to look up which "target" in the compressed-tensors
     config that a layer corresponds to.
@@ -150,12 +150,6 @@ def find_matched_target(
         or _match_fused_layer(layer_name, targets, fused_mapping)
     )
 
-    if matched_target is None:
-        raise ValueError(
-            f"Unable to find matching target for {layer_name} in the "
-            "compressed-tensors config."
-        )
-
     return matched_target
 
 

From 68bad70adebef1a471de810c6e74cc640cbd3456 Mon Sep 17 00:00:00 2001
From: Kyle Sayers <kylesayrs@gmail.com>
Date: Thu, 7 May 2026 11:57:40 -0400
Subject: [PATCH 2/3] add test

Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
---
 tests/quantization/test_compressed_tensors.py | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/quantization/test_compressed_tensors.py b/tests/quantization/test_compressed_tensors.py
index 6b95d9e346db..cc46067fded8 100644
--- a/tests/quantization/test_compressed_tensors.py
+++ b/tests/quantization/test_compressed_tensors.py
@@ -635,6 +635,28 @@ def test_get_quant_method_returns_none_for_unmatched_parallel_lm_head():
     )
 
 
+def test_find_matched_target_returns_none_on_no_match():
+    from vllm.model_executor.layers.quantization.compressed_tensors.utils import (
+        find_matched_target,
+    )
+
+    result = find_matched_target(
+        layer_name="model.layers.0.self_attn.qkv_proj",
+        module=Mock(spec=torch.nn.Linear),
+        targets=["no_match_target"],
+    )
+    assert result is None
+
+
+def test_get_scheme_dict_returns_none_on_no_match():
+    config = _make_ct_config(target="Linear")
+    result = config.get_scheme_dict(
+        layer=Mock(spec=torch.nn.Linear),
+        layer_name="model.layers.0.unmatched_layer",
+    )
+    assert result is None
+
+
 @pytest.mark.skipif(
     not current_platform.is_cuda() or not current_platform.has_device_capability(75),
     reason="MXFP8 requires Turing (sm_75+) or newer.",

From 1c2e89ebdaa53a8b650e51dbb3907773755c65e6 Mon Sep 17 00:00:00 2001
From: Kyle Sayers <kylesayrs@gmail.com>
Date: Thu, 7 May 2026 12:02:59 -0400
Subject: [PATCH 3/3] fix ai test

Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
---
 tests/quantization/test_compressed_tensors.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tests/quantization/test_compressed_tensors.py b/tests/quantization/test_compressed_tensors.py
index cc46067fded8..7e80b0ecfa61 100644
--- a/tests/quantization/test_compressed_tensors.py
+++ b/tests/quantization/test_compressed_tensors.py
@@ -32,6 +32,9 @@
     CompressedTensorsW8A16Fp8,
     CompressedTensorsWNA16,
 )
+from vllm.model_executor.layers.quantization.compressed_tensors.utils import (
+    find_matched_target,
+)
 from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
 from vllm.model_executor.layers.quantization.utils.nvfp4_utils import (
     cutlass_fp4_supported,
@@ -636,10 +639,6 @@ def test_get_quant_method_returns_none_for_unmatched_parallel_lm_head():
 
 
 def test_find_matched_target_returns_none_on_no_match():
-    from vllm.model_executor.layers.quantization.compressed_tensors.utils import (
-        find_matched_target,
-    )
-
     result = find_matched_target(
         layer_name="model.layers.0.self_attn.qkv_proj",
         module=Mock(spec=torch.nn.Linear),
@@ -649,7 +648,7 @@ def test_find_matched_target_returns_none_on_no_match():
 
 
 def test_get_scheme_dict_returns_none_on_no_match():
-    config = _make_ct_config(target="Linear")
+    config = _make_ct_config(target="matched_layer")
     result = config.get_scheme_dict(
         layer=Mock(spec=torch.nn.Linear),
         layer_name="model.layers.0.unmatched_layer",