diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 545b696d6737..cbcb0665eb30 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -1610,7 +1610,7 @@ def test_generate_from_inputs_embeds(self, _, num_beams): inputs_dict.pop("pixel_values_images", None) # 2.C - No easy fix, let's skip the check that compares the outputs from `input_ids` and `inputs_embeds` has_complex_embeds_computation = any( - model_name in model_class.__name__.lower() for model_name in ["moshi"] + model_name in model_class.__name__.lower() for model_name in ["moshi", "qwen2vl"] ) # 3 - `inputs_dict` doesn't contain `attention_mask`. When `attention_mask` is not passed to generate, # we infer it from `input_ids`. The last test case will fail if there is a pad token in the original input.