Skip to content

Commit b555f1f

Browse files
authored
[None][chore] Add failed cases into waives.txt (#8229)
Signed-off-by: Xin He (SW-GPU) <[email protected]>
1 parent 795a051 commit b555f1f

File tree

3 files changed

+3
-36
lines changed

3 files changed

+3
-36
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2131,42 +2131,7 @@ def test_nvfp4_multi_gpus_chunked_prefill(self, tp_size, pp_size, ep_size,
21312131
task = GSM8K(self.MODEL_NAME)
21322132
task.evaluate(llm)
21332133

2134-
def test_nvfp4_multi_gpus_corner_case(self):
2135-
"""
2136-
This test is used to test the corner case of the NVFP4 model.
2137-
When using the same value for max_seq_len and max_num_tokens, there will be no
2138-
enough kv block for the dummy requests in CUDA graph warmup when creating
2139-
the py_executor before estimating kv cache. Then CUDA graph capture will be
2140-
triggered when estimating kv cache. This may cause some errors.
2141-
More info in https://nvbugs/5485325.
2142-
"""
2143-
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.80,
2144-
dtype="fp8",
2145-
enable_block_reuse=False)
2146-
pytorch_config = dict(disable_overlap_scheduler=False,
2147-
cuda_graph_config=CudaGraphConfig(
2148-
enable_padding=True, max_batch_size=1024),
2149-
moe_config=MoeConfig(backend="TRTLLM"))
2150-
2151-
mtp_config = MTPDecodingConfig(num_nextn_predict_layers=1)
2152-
with LLM(f"{llm_models_root()}/DeepSeek-R1/DeepSeek-R1-FP4",
2153-
tensor_parallel_size=8,
2154-
pipeline_parallel_size=1,
2155-
moe_expert_parallel_size=8,
2156-
kv_cache_config=kv_cache_config,
2157-
**pytorch_config,
2158-
enable_attention_dp=False,
2159-
speculative_config=mtp_config,
2160-
max_seq_len=5120,
2161-
max_num_tokens=5120) as llm:
2162-
2163-
assert llm.args.quant_config.quant_algo == QuantAlgo.NVFP4
2164-
2165-
task = MMLU(self.MODEL_NAME)
2166-
task.evaluate(llm)
2167-
task = GSM8K(self.MODEL_NAME)
2168-
task.evaluate(llm)
2169-
2134+
@skip_pre_blackwell
21702135
def test_nvfp4_multi_gpus_corner_case(self):
21712136
"""
21722137
This test is used to test the corner case of the NVFP4 model.

tests/integration/defs/test_e2e.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1637,6 +1637,7 @@ def test_openai_perf_metrics(llm_root, llm_venv):
16371637
str(test_root / "_test_openai_perf_metrics.py")])
16381638

16391639

1640+
@skip_pre_hopper
16401641
def test_openai_chat_harmony(llm_root, llm_venv):
16411642
test_root = unittest_path() / "llmapi" / "apps"
16421643
llm_venv.run_cmd(

tests/integration/test_lists/waives.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,3 +361,4 @@ cpp/test_e2e.py::test_benchmarks[bart-90] SKIP (https://nvbugs/5550689)
361361
examples/test_nemotron_nas.py::test_nemotron_nano_8b_lora_torch[Llama-3.1-Nemotron-Nano-8B-v1] SKIP (https://nvbugs/5563469)
362362
unittest/bindings/test_executor_bindings.py::test_request_perf_metrics_draft SKIP (https://nvbugs/5565590)
363363
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass-auto] SKIP (https://nvbugs/5568676)
364+
test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image-False] SKIP (https://nvbugs/5547437)

0 commit comments

Comments
 (0)