Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions tests/test_gguf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,23 @@ def test_is_remote_gguf_extended_quant_types(self):
assert not is_remote_gguf("repo/model:INVALID_M")
assert not is_remote_gguf("repo/model:Q9_K_M")

def test_is_remote_gguf_file_type_only_quants(self):
"""Test is_remote_gguf with file-type-only quants (LlamaFileType).

IQ2_M / IQ3_M / IQ3_XS / MXFP4_MOE exist only as GGUF file types
(LlamaFileType), not as GGML tensor types. Regression test for
https://github.com/vllm-project/vllm/issues/42734.
"""
assert is_remote_gguf("unsloth/Qwen3.6-35B-A3B-GGUF:UD-IQ2_M")
assert is_remote_gguf("repo/model:IQ2_M")
assert is_remote_gguf("repo/model:IQ3_M")
assert is_remote_gguf("repo/model:IQ3_XS")
assert is_remote_gguf("repo/model:MXFP4_MOE")
assert is_remote_gguf("user/Model-GGUF:UD-IQ3_XS")

assert not is_remote_gguf("repo/model:IQ9_M")
assert not is_remote_gguf("repo/model:NOTATYPE")

def test_is_remote_gguf_nonstandard_quant_type(self):
"""Test is_remote_gguf with non-standard quant types containing
a known GGML type."""
Expand Down
16 changes: 12 additions & 4 deletions vllm_gguf_plugin/gguf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import gguf
import regex as re
from gguf.constants import Keys, VisionProjectorType
from gguf.constants import Keys, LlamaFileType, VisionProjectorType
from gguf.quants import GGMLQuantizationType
from transformers import Gemma3Config, PretrainedConfig, SiglipVisionConfig
from vllm.logger import init_logger
Expand Down Expand Up @@ -88,10 +88,18 @@ def is_nonstandard_gguf_quant_type(quant_type: str) -> bool:
def is_valid_gguf_quant_type(gguf_quant_type: str) -> bool:
"""Check if the quant type is a valid GGUF quant type.

Supports both exact GGML quant types (e.g., Q4_K, IQ1_S) and
extended naming conventions (e.g., Q4_K_M, Q3_K_S, Q5_K_L).
The quant type in a ``repo_id:quant_type`` reference is a GGUF *file*
type (``LlamaFileType``, members prefixed ``MOSTLY_``), which is distinct
from a GGML *tensor* type (``GGMLQuantizationType``). Some file types
(e.g. ``IQ2_M``, ``IQ3_XS``, ``MXFP4_MOE``) have no ``GGMLQuantizationType``
member, so accept either enum, plus extended naming conventions
(e.g. ``Q4_K_M`` -> ``Q4_K``).
"""
# Check for exact match first
# File type (LlamaFileType), e.g. IQ2_M / MXFP4_MOE with no tensor type
if getattr(LlamaFileType, f"MOSTLY_{gguf_quant_type}", None) is not None:
return True

# Exact GGML tensor type
if getattr(GGMLQuantizationType, gguf_quant_type, None) is not None:
return True

Expand Down
Loading