vllm-project · Isotr0py · Jun 3, 2026 · Jun 3, 2026
diff --git a/tests/test_gguf_utils.py b/tests/test_gguf_utils.py
@@ -43,6 +43,23 @@ def test_is_remote_gguf_extended_quant_types(self):
         assert not is_remote_gguf("repo/model:INVALID_M")
         assert not is_remote_gguf("repo/model:Q9_K_M")
 
+    def test_is_remote_gguf_file_type_only_quants(self):
+        """Test is_remote_gguf with file-type-only quants (LlamaFileType).
+
+        IQ2_M / IQ3_M / IQ3_XS / MXFP4_MOE exist only as GGUF file types
+        (LlamaFileType), not as GGML tensor types. Regression test for
+        https://github.com/vllm-project/vllm/issues/42734.
+        """
+        assert is_remote_gguf("unsloth/Qwen3.6-35B-A3B-GGUF:UD-IQ2_M")
+        assert is_remote_gguf("repo/model:IQ2_M")
+        assert is_remote_gguf("repo/model:IQ3_M")
+        assert is_remote_gguf("repo/model:IQ3_XS")
+        assert is_remote_gguf("repo/model:MXFP4_MOE")
+        assert is_remote_gguf("user/Model-GGUF:UD-IQ3_XS")
+
+        assert not is_remote_gguf("repo/model:IQ9_M")
+        assert not is_remote_gguf("repo/model:NOTATYPE")
+
     def test_is_remote_gguf_nonstandard_quant_type(self):
         """Test is_remote_gguf with non-standard quant types containing
         a known GGML type."""

diff --git a/vllm_gguf_plugin/gguf_utils.py b/vllm_gguf_plugin/gguf_utils.py
@@ -8,7 +8,7 @@
 
 import gguf
 import regex as re
-from gguf.constants import Keys, VisionProjectorType
+from gguf.constants import Keys, LlamaFileType, VisionProjectorType
 from gguf.quants import GGMLQuantizationType
 from transformers import Gemma3Config, PretrainedConfig, SiglipVisionConfig
 from vllm.logger import init_logger
@@ -88,10 +88,18 @@ def is_nonstandard_gguf_quant_type(quant_type: str) -> bool:
 def is_valid_gguf_quant_type(gguf_quant_type: str) -> bool:
     """Check if the quant type is a valid GGUF quant type.
 
-    Supports both exact GGML quant types (e.g., Q4_K, IQ1_S) and
-    extended naming conventions (e.g., Q4_K_M, Q3_K_S, Q5_K_L).
+    The quant type in a ``repo_id:quant_type`` reference is a GGUF *file*
+    type (``LlamaFileType``, members prefixed ``MOSTLY_``), which is distinct
+    from a GGML *tensor* type (``GGMLQuantizationType``). Some file types
+    (e.g. ``IQ2_M``, ``IQ3_XS``, ``MXFP4_MOE``) have no ``GGMLQuantizationType``
+    member, so accept either enum, plus extended naming conventions
+    (e.g. ``Q4_K_M`` -> ``Q4_K``).
     """
-    # Check for exact match first
+    # File type (LlamaFileType), e.g. IQ2_M / MXFP4_MOE with no tensor type
+    if getattr(LlamaFileType, f"MOSTLY_{gguf_quant_type}", None) is not None:
+        return True
+
+    # Exact GGML tensor type
     if getattr(GGMLQuantizationType, gguf_quant_type, None) is not None:
         return True