diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index d36fdd239aa..f61ea65d4da 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -339,9 +339,8 @@ unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[pp1-ep1-disable_a accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2] SKIP (https://nvbugs/5444687) accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True] SKIP (https://nvbugs/5444687) accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-pp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5565604) -unittest/_torch/modules/test_fused_moe.py::test_fused_moe_fp8_blockwise_wide_ep[MNNVL] SKIP (https://nvbugs/5565565) -unittest/_torch/modules/test_fused_moe.py::test_fused_moe_fp8_blockwise_wide_ep[NotEnabled] SKIP (https://nvbugs/5565565) unittest/_torch/multi_gpu_modeling/test_llama3.py::test_llama_3_3 SKIP (https://nvbugs/5565559) disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_spec_dec_batch_slot_limit[False-False-EAGLE3-LLaMA3.1-Instruct-8B-Llama-3.1-8B-Instruct] SKIP (https://nvbugs/5565549) accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype SKIP (https://nvbugs/5565530) accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_fp8_prequantized SKIP (https://nvbugs/5565521) +test_e2e.py::test_openai_chat_harmony SKIP (https://nvbugs/5575829) diff --git a/tests/unittest/_torch/modules/test_fused_moe.py b/tests/unittest/_torch/modules/test_fused_moe.py index 49a7e30e9ba..b33bb41886d 100644 --- a/tests/unittest/_torch/modules/test_fused_moe.py +++ b/tests/unittest/_torch/modules/test_fused_moe.py @@ -639,6 +639,7 @@ def set_tensor_value_4(x, num_row, num_cols): x.copy_(repeated) +@pytest.mark.skip(reason="https://nvbugs/5565565") @skip_pre_blackwell @pytest.mark.skipif(torch.cuda.device_count() < 4, reason="needs 4 GPUs to run this test") diff --git a/tests/unittest/_torch/thop/parallel/test_moe.py b/tests/unittest/_torch/thop/parallel/test_moe.py index 06fb7e97846..559aba5eb06 100644 --- a/tests/unittest/_torch/thop/parallel/test_moe.py +++ b/tests/unittest/_torch/thop/parallel/test_moe.py @@ -991,6 +991,7 @@ class TestMoeFp4: the default tactic selection works. This reduces unnecessary test runs for CI """ + @pytest.mark.skip(reason="https://nvbugs/5575841") @pytest.mark.parametrize("num_tokens", [1, 1024]) @pytest.mark.parametrize("hidden_size", [1024]) @pytest.mark.parametrize("intermediate_size", [1024, 768, 384, 192]) @@ -1076,6 +1077,7 @@ def test_autotune(self, num_tokens, hidden_size, intermediate_size, use_autotune=True, use_topk_as_input=False) + @pytest.mark.skip(reason="https://nvbugs/5575841") @pytest.mark.parametrize("num_tokens", [1, 150]) @pytest.mark.parametrize("hidden_size", [1024]) @pytest.mark.parametrize("intermediate_size", [1024])