vllm-project · hmellor · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026
@@ -6,7 +6,9 @@
 """
 
 import pytest
+from packaging.version import Version
 from transformers import BitsAndBytesConfig
+from transformers import __version__ as TRANSFORMERS_VERSION
 
 from tests.quantization.utils import is_quant_method_supported
 from vllm.platforms import current_platform
@@ -138,6 +140,12 @@ def test_load_pp_4bit_bnb_model(model_name, description) -> None:
     compare_two_settings(model_name, common_args, pp_args)
 
 
+@pytest.mark.skipif(
+    Version(TRANSFORMERS_VERSION) >= Version("5.0.0"),
+    reason="Need to add support for quantizing MoE experts with bnb"
+    " in transformers v5. See"
+    " https://github.com/bitsandbytes-foundation/bitsandbytes/issues/1849",
+)
 @pytest.mark.skipif(
     not is_quant_method_supported("bitsandbytes"),
     reason="bitsandbytes is not supported on this GPU type.",