NVIDIA · xinhe-nv · Oct 10, 2025 · Oct 10, 2025 · Oct 10, 2025
diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@@ -2131,42 +2131,7 @@ def test_nvfp4_multi_gpus_chunked_prefill(self, tp_size, pp_size, ep_size,
             task = GSM8K(self.MODEL_NAME)
             task.evaluate(llm)
 
-    def test_nvfp4_multi_gpus_corner_case(self):
-        """
-        This test is used to test the corner case of the NVFP4 model.
-        When using the same value for max_seq_len and max_num_tokens, there will be no
-        enough kv block for the dummy requests in CUDA graph warmup when creating
-        the py_executor before estimating kv cache. Then CUDA graph capture will be
-        triggered when estimating kv cache. This may cause some errors.
-        More info in https://nvbugs/5485325.
-        """
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.80,
-                                        dtype="fp8",
-                                        enable_block_reuse=False)
-        pytorch_config = dict(disable_overlap_scheduler=False,
-                              cuda_graph_config=CudaGraphConfig(
-                                  enable_padding=True, max_batch_size=1024),
-                              moe_config=MoeConfig(backend="TRTLLM"))
-
-        mtp_config = MTPDecodingConfig(num_nextn_predict_layers=1)
-        with LLM(f"{llm_models_root()}/DeepSeek-R1/DeepSeek-R1-FP4",
-                 tensor_parallel_size=8,
-                 pipeline_parallel_size=1,
-                 moe_expert_parallel_size=8,
-                 kv_cache_config=kv_cache_config,
-                 **pytorch_config,
-                 enable_attention_dp=False,
-                 speculative_config=mtp_config,
-                 max_seq_len=5120,
-                 max_num_tokens=5120) as llm:
-
-            assert llm.args.quant_config.quant_algo == QuantAlgo.NVFP4
-
-            task = MMLU(self.MODEL_NAME)
-            task.evaluate(llm)
-            task = GSM8K(self.MODEL_NAME)
-            task.evaluate(llm)
-
+    @skip_pre_blackwell
     def test_nvfp4_multi_gpus_corner_case(self):
         """
         This test is used to test the corner case of the NVFP4 model.

diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py
@@ -1637,6 +1637,7 @@ def test_openai_perf_metrics(llm_root, llm_venv):
          str(test_root / "_test_openai_perf_metrics.py")])
 
 
+@skip_pre_hopper
 def test_openai_chat_harmony(llm_root, llm_venv):
     test_root = unittest_path() / "llmapi" / "apps"
     llm_venv.run_cmd(

diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
@@ -361,3 +361,4 @@ cpp/test_e2e.py::test_benchmarks[bart-90] SKIP (https://nvbugs/5550689)
 examples/test_nemotron_nas.py::test_nemotron_nano_8b_lora_torch[Llama-3.1-Nemotron-Nano-8B-v1] SKIP (https://nvbugs/5563469)
 unittest/bindings/test_executor_bindings.py::test_request_perf_metrics_draft SKIP (https://nvbugs/5565590)
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass-auto] SKIP (https://nvbugs/5568676)
+test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image-False] SKIP (https://nvbugs/5547437)