From fd8716006ef2872ebbedb036a7b874b86e84c5ff Mon Sep 17 00:00:00 2001
From: qqiao <qqiao@nvidia.com>
Date: Wed, 13 Aug 2025 03:35:59 -0700
Subject: [PATCH 1/2] Waive failed cases on main

Signed-off-by: qqiao <qqiao@nvidia.com>
---
 tests/integration/test_lists/waives.txt           | 14 ++++++++++++++
 tests/unittest/_torch/test_attention_mla.py       |  6 ++++++
 tests/unittest/llmapi/test_llm_kv_cache_events.py |  1 +
 3 files changed, 21 insertions(+)

diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
index cb2d68fe503..49c875145e4 100644
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@@ -292,3 +292,17 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency]
 accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_trtllmgen] SKIP (https://nvbugs/5445466)
 examples/test_qwen2audio.py::test_llm_qwen2audio_single_gpu[qwen2_audio_7b_instruct] SKIP (https://nvbugs/5447530)
 examples/test_nemotron_nas.py::test_nemotron_nas_summary_2gpu[DeciLM-7B] SKIP (https://nvbugs/5444636)
+examples/test_multimodal.py::test_llm_multimodal_general[Qwen2-VL-7B-Instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:4] SKIP (https://nvbugs/5453709)
+examples/test_multimodal.py::test_llm_fp8_multimodal_general[fp8-fp8-cnn_dailymail-Qwen2-VL-7B-Instruct-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False] SKIP (https://nvbugs/5453709)
+examples/test_multimodal.py::test_llm_multimodal_general[VILA1.5-3b-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5453709)
+examples/test_llama.py::test_llm_llama_v2_1gpu_auto_parallel[llama-v2-7b-hf] SKIP (https://nvbugs/5453742)
+accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_cutlass-torch_compile=False] SKIP (https://nvbugs/5403818)
+accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_cutlass-torch_compile=True] SKIP (https://nvbugs/5403818)
+accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_trtllm-torch_compile=False] SKIP (https://nvbugs/5403818)
+accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_trtllm-torch_compile=True] SKIP (https://nvbugs/5403818)
+accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[tep4_latency_moe_cutlass-torch_compile=False] SKIP (https://nvbugs/5403818)
+accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[tep4_latency_moe_cutlass-torch_compile=True] SKIP (https://nvbugs/5403818)
+accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[tep4_latency_moe_trtllm-torch_compile=False] SKIP (https://nvbugs/5403818)
+accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[tep4_latency_moe_trtllm-torch_compile=True] SKIP (https://nvbugs/5403818)
+accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] SKIP (https://nvbugs/5442827)
+test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8] SKIP (https://nvbugs/5453992)
diff --git a/tests/unittest/_torch/test_attention_mla.py b/tests/unittest/_torch/test_attention_mla.py
index 182c80e34ef..cef85f34576 100644
--- a/tests/unittest/_torch/test_attention_mla.py
+++ b/tests/unittest/_torch/test_attention_mla.py
@@ -388,6 +388,12 @@ def test_attention_mla(scenario: Scenario, context_sequence_lengths: List[int],
     device = torch.device('cuda')
     dtype = scenario.dtype
     kv_cache_dtype = scenario.kv_cache_dtype
+
+    FAILED_CSL = [777, 912, 431, 42, 266, 989, 524]
+    if (kv_cache_dtype is torch.float8_e4m3fn
+            and context_sequence_lengths == FAILED_CSL):
+        pytest.skip("https://nvbugs/5453806")
+
     print(
         f"--------------------------------Test for scenario: {scenario} start--------------------------------"
     )
diff --git a/tests/unittest/llmapi/test_llm_kv_cache_events.py b/tests/unittest/llmapi/test_llm_kv_cache_events.py
index c75d42ae21a..f505bd03838 100644
--- a/tests/unittest/llmapi/test_llm_kv_cache_events.py
+++ b/tests/unittest/llmapi/test_llm_kv_cache_events.py
@@ -247,6 +247,7 @@ def test_llm_kv_events_api():
     check_events(llm, requests, sampling_params)
 
 
+@pytest.mark.skip(reason="https://nvbugs/5451407")
 @skip_single_gpu
 @pytest.mark.threadleak(enabled=False)
 def test_llm_api_attention_dp_kv_events():

From d60c913a37771d95d48af6da032178c09561c191 Mon Sep 17 00:00:00 2001
From: qqiao <qqiao@nvidia.com>
Date: Wed, 13 Aug 2025 04:14:03 -0700
Subject: [PATCH 2/2] Waive failed tests in latest post-merge 2237

Signed-off-by: qqiao <qqiao@nvidia.com>
---
 tests/integration/test_lists/waives.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
index 49c875145e4..13b6e93bbb1 100644
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@@ -306,3 +306,7 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[tep4_latency_moe
 accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[tep4_latency_moe_trtllm-torch_compile=True] SKIP (https://nvbugs/5403818)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] SKIP (https://nvbugs/5442827)
 test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8] SKIP (https://nvbugs/5453992)
+accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype SKIP (https://nvbugs/5454875)
+accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] SKIP (https://nvbugs/5445466)
+accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cutlass] SKIP (https://nvbugs/5454898)
+accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm] SKIP (https://nvbugs/5454898)