diff --git a/vllm/model_executor/layers/quantization/quark/quark.py b/vllm/model_executor/layers/quantization/quark/quark.py index 78c64bac6187..e1242cbdcb93 100644 --- a/vllm/model_executor/layers/quantization/quark/quark.py +++ b/vllm/model_executor/layers/quantization/quark/quark.py @@ -64,12 +64,39 @@ def __init__( self.dynamic_mxfp4_quant = False def maybe_update_config(self, model_name: str, revision: str | None = None): - self.hf_config = get_config( - model=model_name, - trust_remote_code=False, # or get from model_config if available - revision=revision, - config_format="auto", - ) + try: + self.hf_config = get_config( + model=model_name, + trust_remote_code=False, + revision=revision, + config_format="auto", + ) + except (RuntimeError, ValueError) as e: + # Some local quantized checkpoints rely on custom HF config code. + # Retry only for the explicit trust-remote-code failure mode. + error_text = str(e) + if ( + "trust_remote_code=True" not in error_text + and "contains custom code which must be executed" not in error_text + and "requires you to execute the configuration file" not in error_text + ): + raise + + import logging + + logger = logging.getLogger(__name__) + logger.warning( + "The model %s requires custom code to be executed. " + "Falling back to `trust_remote_code=True`.", + model_name, + ) + + self.hf_config = get_config( + model=model_name, + trust_remote_code=True, + revision=revision, + config_format="auto", + ) quant_config = getattr(self.hf_config, "quantization_config", None) if quant_config is not None: