huggingface · gante · Oct 3, 2024 · Sep 20, 2024 · gante · Sep 20, 2024
diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py
@@ -4711,6 +4711,7 @@ def test_custom_4d_attention_mask(self):
             normalized_1 = F.softmax(out_shared_prefix_last_tokens)
             torch.testing.assert_close(normalized_0, normalized_1, rtol=1e-3, atol=1e-4)
 
+    @is_flaky(max_attempts=10)  # TODO @raushan: this test is VERY flaky on some VLMs, like paligemma
     def test_static_cache_matches_dynamic(self):
         """
         Tests that generating with static cache give almost same results as with dynamic cache.
@@ -4747,7 +4748,7 @@ def test_static_cache_matches_dynamic(self):
                 output_logits=True,
                 return_dict_in_generate=True,
             )
-            self.assertTrue(torch.allclose(dynamic_out.logits[0], static_out.logits[0], rtol=1e-3, atol=1e-3))
+            self.assertTrue(torch.allclose(dynamic_out.logits[0], static_out.logits[0], rtol=1e-3, atol=1e-4))
 
     # For now, Let's focus only on GPU for `torch.compile`
     @slow