diff --git a/tests/test_gguf_utils.py b/tests/test_gguf_utils.py index ecdd6bd..9aabdec 100644 --- a/tests/test_gguf_utils.py +++ b/tests/test_gguf_utils.py @@ -43,6 +43,23 @@ def test_is_remote_gguf_extended_quant_types(self): assert not is_remote_gguf("repo/model:INVALID_M") assert not is_remote_gguf("repo/model:Q9_K_M") + def test_is_remote_gguf_file_type_only_quants(self): + """Test is_remote_gguf with file-type-only quants (LlamaFileType). + + IQ2_M / IQ3_M / IQ3_XS / MXFP4_MOE exist only as GGUF file types + (LlamaFileType), not as GGML tensor types. Regression test for + https://github.com/vllm-project/vllm/issues/42734. + """ + assert is_remote_gguf("unsloth/Qwen3.6-35B-A3B-GGUF:UD-IQ2_M") + assert is_remote_gguf("repo/model:IQ2_M") + assert is_remote_gguf("repo/model:IQ3_M") + assert is_remote_gguf("repo/model:IQ3_XS") + assert is_remote_gguf("repo/model:MXFP4_MOE") + assert is_remote_gguf("user/Model-GGUF:UD-IQ3_XS") + + assert not is_remote_gguf("repo/model:IQ9_M") + assert not is_remote_gguf("repo/model:NOTATYPE") + def test_is_remote_gguf_nonstandard_quant_type(self): """Test is_remote_gguf with non-standard quant types containing a known GGML type.""" diff --git a/vllm_gguf_plugin/gguf_utils.py b/vllm_gguf_plugin/gguf_utils.py index 5564ab1..f415631 100644 --- a/vllm_gguf_plugin/gguf_utils.py +++ b/vllm_gguf_plugin/gguf_utils.py @@ -8,7 +8,7 @@ import gguf import regex as re -from gguf.constants import Keys, VisionProjectorType +from gguf.constants import Keys, LlamaFileType, VisionProjectorType from gguf.quants import GGMLQuantizationType from transformers import Gemma3Config, PretrainedConfig, SiglipVisionConfig from vllm.logger import init_logger @@ -88,10 +88,18 @@ def is_nonstandard_gguf_quant_type(quant_type: str) -> bool: def is_valid_gguf_quant_type(gguf_quant_type: str) -> bool: """Check if the quant type is a valid GGUF quant type. - Supports both exact GGML quant types (e.g., Q4_K, IQ1_S) and - extended naming conventions (e.g., Q4_K_M, Q3_K_S, Q5_K_L). + The quant type in a ``repo_id:quant_type`` reference is a GGUF *file* + type (``LlamaFileType``, members prefixed ``MOSTLY_``), which is distinct + from a GGML *tensor* type (``GGMLQuantizationType``). Some file types + (e.g. ``IQ2_M``, ``IQ3_XS``, ``MXFP4_MOE``) have no ``GGMLQuantizationType`` + member, so accept either enum, plus extended naming conventions + (e.g. ``Q4_K_M`` -> ``Q4_K``). """ - # Check for exact match first + # File type (LlamaFileType), e.g. IQ2_M / MXFP4_MOE with no tensor type + if getattr(LlamaFileType, f"MOSTLY_{gguf_quant_type}", None) is not None: + return True + + # Exact GGML tensor type if getattr(GGMLQuantizationType, gguf_quant_type, None) is not None: return True