diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py index 97dc6c51c5a9..c16efd065e1b 100644 --- a/tests/models/multimodal/generation/test_common.py +++ b/tests/models/multimodal/generation/test_common.py @@ -777,6 +777,7 @@ max_model_len=8192, max_num_seqs=2, auto_cls=AutoModelForCausalLM, + patch_hf_runner=model_utils.paddleocr_vl_patch_hf_runner, image_size_factors=[(0.25,)], marks=[ pytest.mark.skipif( diff --git a/tests/models/multimodal/generation/vlm_utils/model_utils.py b/tests/models/multimodal/generation/vlm_utils/model_utils.py index b8e31e274de4..01a2ebde8c7c 100644 --- a/tests/models/multimodal/generation/vlm_utils/model_utils.py +++ b/tests/models/multimodal/generation/vlm_utils/model_utils.py @@ -1149,6 +1149,31 @@ def processor(*args, text="", images=None, videos=None, **kwargs): return hf_model +def paddleocr_vl_patch_hf_runner(hf_model: HfRunner) -> HfRunner: + """Patches the HfRunner to fix create_causal_mask API mismatch. + + The PaddleOCR-VL HF model passes `inputs_embeds` to create_causal_mask, + but transformers renamed this parameter to `input_embeds`. + """ + import sys + + model_module = sys.modules.get(type(hf_model.model.model).__module__) + if model_module is None: + return hf_model + + original_create_causal_mask = getattr(model_module, "create_causal_mask", None) + if original_create_causal_mask is None: + return hf_model + + def patched_create_causal_mask(*args, **kwargs): + if "inputs_embeds" in kwargs: + kwargs["input_embeds"] = kwargs.pop("inputs_embeds") + return original_create_causal_mask(*args, **kwargs) + + model_module.create_causal_mask = patched_create_causal_mask # type: ignore[attr-defined] + return hf_model + + def qwen2_5_omni_patch_hf_runner(hf_model: HfRunner) -> HfRunner: """Patches and returns an instance of the HfRunner for Qwen2.5-Omni.""" thinker = hf_model.model.thinker