diff --git a/tests/transformers_utils/test_utils.py b/tests/transformers_utils/test_utils.py index 485c2efff77f..7f6356080af9 100644 --- a/tests/transformers_utils/test_utils.py +++ b/tests/transformers_utils/test_utils.py @@ -8,6 +8,7 @@ from vllm.transformers_utils.gguf_utils import ( is_gguf, is_remote_gguf, + is_valid_gguf_quant_type, split_remote_gguf, ) from vllm.transformers_utils.utils import ( @@ -119,6 +120,16 @@ def test_is_remote_gguf_with_cloud_storage(self): assert not is_remote_gguf("s3://repo/model:Q4_K") assert not is_remote_gguf("gs://repo/model:Q8_0") + def test_vendor_prefixed_remote_gguf(self): + """Remote GGUF with vendor-prefixed quant types should be detected.""" + assert is_remote_gguf("unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL") + assert is_remote_gguf("repo/model:UD-Q4_K_M") + assert is_remote_gguf("repo/model:UD-Q3_K_S") + assert is_remote_gguf("repo/model:UD-F16") + + # Invalid vendor-prefixed should still fail + assert not is_remote_gguf("repo/model:UD-INVALID") + class TestSplitRemoteGGUF: """Test split_remote_gguf utility function.""" @@ -167,6 +178,14 @@ def test_split_remote_gguf_invalid(self): with pytest.raises(ValueError, match="Wrong GGUF model"): split_remote_gguf("s3://bucket/repo/model:Q2_K") + def test_split_vendor_prefixed_remote_gguf(self): + """split_remote_gguf should preserve full quant string including prefix.""" + repo_id, quant_type = split_remote_gguf( + "unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL" + ) + assert repo_id == "unsloth/Qwen3.5-35B-A3B-GGUF" + assert quant_type == "UD-Q4_K_XL" + class TestIsGGUF: """Test is_gguf utility function.""" @@ -218,3 +237,41 @@ def test_is_gguf_edge_cases(self): # Cloud storage assert not is_gguf("s3://bucket/repo/model:IQ1_S") assert not is_gguf("gs://bucket/repo/model:Q2_K") + + def test_vendor_prefixed_is_gguf(self): + """is_gguf should recognize vendor-prefixed remote GGUF.""" + assert is_gguf("unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL") + assert is_gguf("repo/model:UD-Q4_K_M") + assert not is_gguf("repo/model:UD-INVALID") + + +class TestIsValidGGUFQuantType: + """Test is_valid_gguf_quant_type utility function.""" + + def test_vendor_prefixed_quant_types(self): + """Vendor-prefixed quant types like UD-Q4_K_XL should be valid.""" + # Unsloth Dynamic (UD-) prefix with size suffix + assert is_valid_gguf_quant_type("UD-Q4_K_XL") + assert is_valid_gguf_quant_type("UD-Q4_K_M") + assert is_valid_gguf_quant_type("UD-Q3_K_S") + assert is_valid_gguf_quant_type("UD-Q5_K_L") + assert is_valid_gguf_quant_type("UD-Q6_K_XXS") + + # UD- prefix with exact GGML types (no size suffix) + assert is_valid_gguf_quant_type("UD-Q4_K") + assert is_valid_gguf_quant_type("UD-Q4_0") + assert is_valid_gguf_quant_type("UD-F16") + assert is_valid_gguf_quant_type("UD-BF16") + assert is_valid_gguf_quant_type("UD-IQ1_S") + + # Other hypothetical vendor prefixes should also work + assert is_valid_gguf_quant_type("XX-Q4_K_M") + + # Invalid base type after valid prefix should fail + assert not is_valid_gguf_quant_type("UD-INVALID") + assert not is_valid_gguf_quant_type("UD-Q9_K_M") + assert not is_valid_gguf_quant_type("UD-") + + # Empty prefix should fail (e.g., "-Q4_K" is not a valid quant type) + assert not is_valid_gguf_quant_type("-Q4_K") + assert not is_valid_gguf_quant_type("-") diff --git a/vllm/transformers_utils/gguf_utils.py b/vllm/transformers_utils/gguf_utils.py index 3faa5ee60e9f..be490160e0e6 100644 --- a/vllm/transformers_utils/gguf_utils.py +++ b/vllm/transformers_utils/gguf_utils.py @@ -54,26 +54,51 @@ def is_remote_gguf(model: str | Path) -> bool: _GGUF_QUANT_SUFFIXES = ("_M", "_S", "_L", "_XL", "_XS", "_XXS") -def is_valid_gguf_quant_type(gguf_quant_type: str) -> bool: - """Check if the quant type is a valid GGUF quant type. +def _is_base_gguf_quant_type(quant_type: str) -> bool: + """Check if quant_type matches a GGMLQuantizationType, with optional + size suffix. - Supports both exact GGML quant types (e.g., Q4_K, IQ1_S) and - extended naming conventions (e.g., Q4_K_M, Q3_K_S, Q5_K_L). + Checks for exact enum match first, then tries stripping known suffixes + (e.g., Q4_K_M -> Q4_K). """ - # Check for exact match first - if getattr(GGMLQuantizationType, gguf_quant_type, None) is not None: + if getattr(GGMLQuantizationType, quant_type, None) is not None: return True - # Check for extended naming conventions (e.g., Q4_K_M -> Q4_K) for suffix in _GGUF_QUANT_SUFFIXES: - if gguf_quant_type.endswith(suffix): - base_type = gguf_quant_type[: -len(suffix)] + if quant_type.endswith(suffix): + base_type = quant_type[: -len(suffix)] if getattr(GGMLQuantizationType, base_type, None) is not None: return True return False +def is_valid_gguf_quant_type(gguf_quant_type: str) -> bool: + """Check if the quant type is a valid GGUF quant type. + + Supports exact GGML quant types (e.g., Q4_K, IQ1_S), extended naming + conventions with size suffixes (e.g., Q4_K_M, Q3_K_S, Q5_K_L), and + vendor-prefixed quant types (e.g., UD-Q4_K_XL) where the prefix is + separated by a hyphen. + + Vendor prefixes are stripped because GGML quant type names never + contain hyphens — any hyphen indicates a vendor prefix (e.g., "UD-" + for Unsloth Dynamic quantization). + """ + if _is_base_gguf_quant_type(gguf_quant_type): + return True + + # Try stripping vendor prefix (e.g., "UD-Q4_K_XL" -> "Q4_K_XL"). + # GGML quant type names never contain hyphens, so a hyphen indicates + # a vendor prefix. + if "-" in gguf_quant_type: + prefix, remainder = gguf_quant_type.rsplit("-", 1) + if prefix and remainder: + return _is_base_gguf_quant_type(remainder) + + return False + + def split_remote_gguf(model: str | Path) -> tuple[str, str]: """Split the model into repo_id and quant type.""" model = str(model) @@ -84,7 +109,8 @@ def split_remote_gguf(model: str | Path) -> tuple[str, str]: f"Wrong GGUF model or invalid GGUF quant type: {model}.\n" "- It should be in repo_id:quant_type format.\n" f"- Valid base quant types: {GGMLQuantizationType._member_names_}\n" - f"- Extended suffixes also supported: {_GGUF_QUANT_SUFFIXES}", + f"- Extended suffixes also supported: {_GGUF_QUANT_SUFFIXES}\n" + "- Vendor-prefixed types also supported (e.g., UD-Q4_K_XL)", )