vllm-project · sfeng33 · May 18, 2026 · May 17, 2026 · May 18, 2026
@@ -39,7 +39,6 @@
 )
 from vllm.model_executor.layers.quantization.compressed_tensors.schemes import (
     WNA16_SUPPORTED_BITS,
-    CompressedTensors24,
     CompressedTensorsScheme,
     CompressedTensorsW4A4Fp4,
     CompressedTensorsW4A4Mxfp4,
@@ -760,19 +759,8 @@ def get_scheme(
             input_quant=input_quant,
             sparsity_scheme=sparsity_scheme,
         ):
-            # Have a valid sparsity scheme
-            # Validate layer is supported by Cutlass 2:4 Kernel
-            model_compression_config = (
-                None
-                if sparsity_scheme is None or sparsity_scheme.format == "dense"
-                else self.config
-            )
-
-            scheme = CompressedTensors24(
-                quantized=weight_quant is not None or input_quant is not None,
-                weight_quant=weight_quant,
-                input_quant=input_quant,
-                model_compression_config=model_compression_config,
+            raise NotImplementedError(
+                "Sparse24 models are no longer supported by vLLM."
             )
         elif weight_quant is None:
             # Falling back to UnquantizedLinearMethod

@@ -13,17 +13,13 @@
 from .compressed_tensors_w8a16_fp8 import CompressedTensorsW8A16Fp8
 from .compressed_tensors_wNa16 import WNA16_SUPPORTED_BITS, CompressedTensorsWNA16
 
-# This avoids circular import error
-from .compressed_tensors_24 import CompressedTensors24  # isort: skip
-
 __all__ = [
     "CompressedTensorsScheme",
     "CompressedTensorsWNA16",
     "CompressedTensorsW8A16Fp8",
     "CompressedTensorsW8A8Int8",
     "CompressedTensorsW8A8Fp8",
     "WNA16_SUPPORTED_BITS",
-    "CompressedTensors24",
     "CompressedTensorsW4A16Fp4",
     "CompressedTensorsW4A4Mxfp4",
     "CompressedTensorsW4A4Fp4",