diff --git a/tests/attention/test_fmha_v2_prefill.py b/tests/attention/test_fmha_v2_prefill.py index c31aeb2b94..90d075c649 100644 --- a/tests/attention/test_fmha_v2_prefill.py +++ b/tests/attention/test_fmha_v2_prefill.py @@ -3,6 +3,10 @@ import math from typing import Optional, Tuple, Union +pytestmark = pytest.mark.skip( + reason="todo(jimmyzho): temporarily skip this test due to hangs" +) + import flashinfer from flashinfer.prefill import fmha_v2_prefill_deepseek from tests.utils_fp8 import to_float8 @@ -837,6 +841,10 @@ def test_trtllm_fmha_v2_prefill( and mask_mode == "SLIDING_WINDOW" ): pytest.skip("Skip due to bug in fp8 sliding window") + if mask_mode == "SLIDING_WINDOW": + pytest.skip("todo(jimmyzho): temporarily skip sliding window test due to hang") + if dtype == torch.float8_e4m3fn and o_dtype == torch.float8_e4m3fn: + pytest.skip("todo(jimmyzho): temporarily skip fp8 tests due to hang") run_trtllm_fmha_v2_prefill_case( input_layout=input_layout, batch_size=batch_size, @@ -955,7 +963,8 @@ def test_trtllm_fmha_v2_prefill_attention_sinks( if not is_sm90a_supported(torch.device("cuda")): pytest.skip("FMHA v2 requires SM90+ (Hopper) GPUs.") - + if mask_mode == "SLIDING_WINDOW": + pytest.skip("todo(jimmyzho): temporarily skip sliding window test due to hang") torch.manual_seed(42) device = torch.device("cuda")