diff --git a/tests/model_optimizations/test_tinygemm2.py b/tests/model_optimizations/test_tinygemm2.py index 68793d8249..dee92d2800 100644 --- a/tests/model_optimizations/test_tinygemm2.py +++ b/tests/model_optimizations/test_tinygemm2.py @@ -3,6 +3,10 @@ import torch.nn.functional as F from flashinfer.utils import get_compute_capability +pytestmark = pytest.mark.skip( + reason="tinygemm2 hangs on CI H100 runners — investigation in progress" +) + def _skip_if_not_sm90(): cc = get_compute_capability(torch.device("cuda")) diff --git a/tests/moe/test_trtllm_gen_fused_moe.py b/tests/moe/test_trtllm_gen_fused_moe.py index 127e35fa97..4654a8f963 100644 --- a/tests/moe/test_trtllm_gen_fused_moe.py +++ b/tests/moe/test_trtllm_gen_fused_moe.py @@ -3475,6 +3475,7 @@ def test_mxfp8_block_scale_moe_relu2_non_gated( weight_processing=weight_processing, activation_type=ActivationType.Relu2, cache_permute_indices=cache_permute_indices, + logits_dtype=torch.bfloat16, zero_hidden_states=zero_hidden_states, ) @@ -3510,6 +3511,7 @@ def test_mxfp8_block_scale_moe_relu2_deepseekv3_topk22(cache_permute_indices): }, activation_type=ActivationType.Relu2, cache_permute_indices=cache_permute_indices, + logits_dtype=torch.float32, ) @@ -3598,6 +3600,7 @@ def test_fp8_block_scale_autotune_valid_configs(autotune_case, cache_permute_ind }, activation_type=autotune_case["activation_type"], cache_permute_indices=cache_permute_indices, + logits_dtype=torch.float32, zero_hidden_states=False, ) @@ -3659,6 +3662,7 @@ def test_fp8_per_tensor_autotune_valid_configs_nonefp8( }, activation_type=autotune_case["activation_type"], cache_permute_indices=cache_permute_indices, + logits_dtype=torch.bfloat16, zero_hidden_states=False, )