diff --git a/tests/model_optimizations/test_tinygemm2.py b/tests/model_optimizations/test_tinygemm2.py
index 68793d8249..dee92d2800 100644
--- a/tests/model_optimizations/test_tinygemm2.py
+++ b/tests/model_optimizations/test_tinygemm2.py
@@ -3,6 +3,10 @@
 import torch.nn.functional as F
 from flashinfer.utils import get_compute_capability
 
+pytestmark = pytest.mark.skip(
+    reason="tinygemm2 hangs on CI H100 runners — investigation in progress"
+)
+
 
 def _skip_if_not_sm90():
     cc = get_compute_capability(torch.device("cuda"))
diff --git a/tests/moe/test_trtllm_gen_fused_moe.py b/tests/moe/test_trtllm_gen_fused_moe.py
index 127e35fa97..4654a8f963 100644
--- a/tests/moe/test_trtllm_gen_fused_moe.py
+++ b/tests/moe/test_trtllm_gen_fused_moe.py
@@ -3475,6 +3475,7 @@ def test_mxfp8_block_scale_moe_relu2_non_gated(
         weight_processing=weight_processing,
         activation_type=ActivationType.Relu2,
         cache_permute_indices=cache_permute_indices,
+        logits_dtype=torch.bfloat16,
         zero_hidden_states=zero_hidden_states,
     )
 
@@ -3510,6 +3511,7 @@ def test_mxfp8_block_scale_moe_relu2_deepseekv3_topk22(cache_permute_indices):
         },
         activation_type=ActivationType.Relu2,
         cache_permute_indices=cache_permute_indices,
+        logits_dtype=torch.float32,
     )
 
 
@@ -3598,6 +3600,7 @@ def test_fp8_block_scale_autotune_valid_configs(autotune_case, cache_permute_ind
         },
         activation_type=autotune_case["activation_type"],
         cache_permute_indices=cache_permute_indices,
+        logits_dtype=torch.float32,
         zero_hidden_states=False,
     )
 
@@ -3659,6 +3662,7 @@ def test_fp8_per_tensor_autotune_valid_configs_nonefp8(
         },
         activation_type=autotune_case["activation_type"],
         cache_permute_indices=cache_permute_indices,
+        logits_dtype=torch.bfloat16,
         zero_hidden_states=False,
     )