Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions tests/transformers_utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from vllm.transformers_utils.gguf_utils import (
is_gguf,
is_remote_gguf,
is_valid_gguf_quant_type,
split_remote_gguf,
)
from vllm.transformers_utils.utils import (
Expand Down Expand Up @@ -119,6 +120,16 @@ def test_is_remote_gguf_with_cloud_storage(self):
assert not is_remote_gguf("s3://repo/model:Q4_K")
assert not is_remote_gguf("gs://repo/model:Q8_0")

def test_vendor_prefixed_remote_gguf(self):
"""Remote GGUF with vendor-prefixed quant types should be detected."""
assert is_remote_gguf("unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL")
assert is_remote_gguf("repo/model:UD-Q4_K_M")
assert is_remote_gguf("repo/model:UD-Q3_K_S")
assert is_remote_gguf("repo/model:UD-F16")

# Invalid vendor-prefixed should still fail
assert not is_remote_gguf("repo/model:UD-INVALID")


class TestSplitRemoteGGUF:
"""Test split_remote_gguf utility function."""
Expand Down Expand Up @@ -167,6 +178,14 @@ def test_split_remote_gguf_invalid(self):
with pytest.raises(ValueError, match="Wrong GGUF model"):
split_remote_gguf("s3://bucket/repo/model:Q2_K")

def test_split_vendor_prefixed_remote_gguf(self):
"""split_remote_gguf should preserve full quant string including prefix."""
repo_id, quant_type = split_remote_gguf(
"unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL"
)
assert repo_id == "unsloth/Qwen3.5-35B-A3B-GGUF"
assert quant_type == "UD-Q4_K_XL"


class TestIsGGUF:
"""Test is_gguf utility function."""
Expand Down Expand Up @@ -218,3 +237,41 @@ def test_is_gguf_edge_cases(self):
# Cloud storage
assert not is_gguf("s3://bucket/repo/model:IQ1_S")
assert not is_gguf("gs://bucket/repo/model:Q2_K")

def test_vendor_prefixed_is_gguf(self):
"""is_gguf should recognize vendor-prefixed remote GGUF."""
assert is_gguf("unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL")
assert is_gguf("repo/model:UD-Q4_K_M")
assert not is_gguf("repo/model:UD-INVALID")


class TestIsValidGGUFQuantType:
"""Test is_valid_gguf_quant_type utility function."""

def test_vendor_prefixed_quant_types(self):
"""Vendor-prefixed quant types like UD-Q4_K_XL should be valid."""
# Unsloth Dynamic (UD-) prefix with size suffix
assert is_valid_gguf_quant_type("UD-Q4_K_XL")
assert is_valid_gguf_quant_type("UD-Q4_K_M")
assert is_valid_gguf_quant_type("UD-Q3_K_S")
assert is_valid_gguf_quant_type("UD-Q5_K_L")
assert is_valid_gguf_quant_type("UD-Q6_K_XXS")

# UD- prefix with exact GGML types (no size suffix)
assert is_valid_gguf_quant_type("UD-Q4_K")
assert is_valid_gguf_quant_type("UD-Q4_0")
assert is_valid_gguf_quant_type("UD-F16")
assert is_valid_gguf_quant_type("UD-BF16")
assert is_valid_gguf_quant_type("UD-IQ1_S")

# Other hypothetical vendor prefixes should also work
assert is_valid_gguf_quant_type("XX-Q4_K_M")

# Invalid base type after valid prefix should fail
assert not is_valid_gguf_quant_type("UD-INVALID")
assert not is_valid_gguf_quant_type("UD-Q9_K_M")
assert not is_valid_gguf_quant_type("UD-")

# Empty prefix should fail (e.g., "-Q4_K" is not a valid quant type)
assert not is_valid_gguf_quant_type("-Q4_K")
assert not is_valid_gguf_quant_type("-")
46 changes: 36 additions & 10 deletions vllm/transformers_utils/gguf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,26 +54,51 @@ def is_remote_gguf(model: str | Path) -> bool:
_GGUF_QUANT_SUFFIXES = ("_M", "_S", "_L", "_XL", "_XS", "_XXS")


def is_valid_gguf_quant_type(gguf_quant_type: str) -> bool:
"""Check if the quant type is a valid GGUF quant type.
def _is_base_gguf_quant_type(quant_type: str) -> bool:
"""Check if quant_type matches a GGMLQuantizationType, with optional
size suffix.

Supports both exact GGML quant types (e.g., Q4_K, IQ1_S) and
extended naming conventions (e.g., Q4_K_M, Q3_K_S, Q5_K_L).
Checks for exact enum match first, then tries stripping known suffixes
(e.g., Q4_K_M -> Q4_K).
"""
# Check for exact match first
if getattr(GGMLQuantizationType, gguf_quant_type, None) is not None:
if getattr(GGMLQuantizationType, quant_type, None) is not None:
return True

# Check for extended naming conventions (e.g., Q4_K_M -> Q4_K)
for suffix in _GGUF_QUANT_SUFFIXES:
if gguf_quant_type.endswith(suffix):
base_type = gguf_quant_type[: -len(suffix)]
if quant_type.endswith(suffix):
base_type = quant_type[: -len(suffix)]
if getattr(GGMLQuantizationType, base_type, None) is not None:
return True

return False


def is_valid_gguf_quant_type(gguf_quant_type: str) -> bool:
"""Check if the quant type is a valid GGUF quant type.

Supports exact GGML quant types (e.g., Q4_K, IQ1_S), extended naming
conventions with size suffixes (e.g., Q4_K_M, Q3_K_S, Q5_K_L), and
vendor-prefixed quant types (e.g., UD-Q4_K_XL) where the prefix is
separated by a hyphen.

Vendor prefixes are stripped because GGML quant type names never
contain hyphens — any hyphen indicates a vendor prefix (e.g., "UD-"
for Unsloth Dynamic quantization).
"""
if _is_base_gguf_quant_type(gguf_quant_type):
return True

# Try stripping vendor prefix (e.g., "UD-Q4_K_XL" -> "Q4_K_XL").
# GGML quant type names never contain hyphens, so a hyphen indicates
# a vendor prefix.
if "-" in gguf_quant_type:
prefix, remainder = gguf_quant_type.rsplit("-", 1)
if prefix and remainder:
return _is_base_gguf_quant_type(remainder)

return False


def split_remote_gguf(model: str | Path) -> tuple[str, str]:
"""Split the model into repo_id and quant type."""
model = str(model)
Expand All @@ -84,7 +109,8 @@ def split_remote_gguf(model: str | Path) -> tuple[str, str]:
f"Wrong GGUF model or invalid GGUF quant type: {model}.\n"
"- It should be in repo_id:quant_type format.\n"
f"- Valid base quant types: {GGMLQuantizationType._member_names_}\n"
f"- Extended suffixes also supported: {_GGUF_QUANT_SUFFIXES}",
f"- Extended suffixes also supported: {_GGUF_QUANT_SUFFIXES}\n"
"- Vendor-prefixed types also supported (e.g., UD-Q4_K_XL)",
)


Expand Down
Loading