Skip to content

Commit 715428c

Browse files
authored
test: add test cases for 0.19 release (#3608)
* fix test name Signed-off-by: Ivy Zhang <[email protected]> * add quickstart test for nemotron-ultra Signed-off-by: Ivy Zhang <[email protected]> * add rcca multi-node test case for deepseek-v3 Signed-off-by: Ivy Zhang <[email protected]> * add rcca info Signed-off-by: Ivy Zhang <[email protected]> --------- Signed-off-by: Ivy Zhang <[email protected]> Signed-off-by: Ivy Zhang <[email protected]>
1 parent 3471d6c commit 715428c

File tree

3 files changed

+32
-2
lines changed

3 files changed

+32
-2
lines changed

tests/integration/defs/test_e2e.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1517,6 +1517,31 @@ def test_ptq_quickstart_advanced_mtp(llm_root, llm_venv, model_name,
15171517
])
15181518

15191519

1520+
@pytest.mark.skip_less_device_memory(80000)
1521+
@pytest.mark.skip_less_device(8)
1522+
@pytest.mark.parametrize("model_name,model_path", [
1523+
pytest.param('DeepSeek-V3', 'DeepSeek-V3', marks=skip_pre_hopper),
1524+
])
1525+
def test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus(
1526+
llm_root, llm_venv, model_name, model_path):
1527+
# "RCCA https://nvbugs/5163844"
1528+
print(f"Testing {model_name}.")
1529+
example_root = Path(os.path.join(llm_root, "examples", "pytorch"))
1530+
llm_venv.run_cmd([
1531+
str(example_root / "quickstart_advanced.py"),
1532+
"--enable_overlap_scheduler",
1533+
"--model_dir",
1534+
f"{llm_models_root()}/{model_path}",
1535+
"--moe_ep_size=8",
1536+
"--tp_size=16",
1537+
"--use_cuda_graph",
1538+
"--kv_cache_fraction=0.5",
1539+
"--max_batch_size=32",
1540+
"--max_num_tokens=2048",
1541+
"--kv_cache_enable_block_reuse",
1542+
])
1543+
1544+
15201545
@pytest.mark.parametrize("model_name,model_path,eagle_model_path", [
15211546
("Llama-3.1-8b-Instruct", "llama-3.1-model/Llama-3.1-8B-Instruct",
15221547
"EAGLE3-LLaMA3.1-Instruct-8B"),
@@ -1580,6 +1605,9 @@ def test_ptp_quickstart_advanced_deepseek_r1_8gpus(llm_root, llm_venv,
15801605
pytest.param('Mixtral-8x7B-NVFP4',
15811606
'nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1',
15821607
marks=skip_pre_blackwell),
1608+
pytest.param('Nemotron-Ultra-253B',
1609+
'nemotron-nas/Llama-3_1-Nemotron-Ultra-253B-v1',
1610+
marks=skip_pre_hopper),
15831611
])
15841612
def test_ptp_quickstart_advanced_8gpus(llm_root, llm_venv, model_name,
15851613
model_path):

tests/integration/test_lists/qa/examples_test_list.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -425,14 +425,14 @@ accuracy/test_llm_api.py::TestQwen2_5_7BInstruct::test_fp8
425425
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
426426
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4
427427
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4
428-
accuracy/test_llm_api_pytorch.py::TestMistral_7B::test_auto_dtype
428+
accuracy/test_llm_api_pytorch.py::TestMistral7B::test_auto_dtype
429429
accuracy/test_llm_api_pytorch.py::TestMixtral8x7B::test_fp8_tp2
430430
accuracy/test_llm_api_pytorch.py::TestMixtral8x7B::test_nvfp4_tp2
431431
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[]
432432
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[]
433433
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[]
434434
accuracy/test_llm_api_pytorch.py::TestMinitron4BBaseInstruct::test_fp8_prequantized
435-
accuracy/test_llm_api_pytorch.py::TestNemotronNas::test_auto_dtype
435+
accuracy/test_llm_api_pytorch.py::TestNemotronNas::test_auto_dtype_tp8
436436
accuracy/test_llm_api_pytorch.py::TestQwen2_7BInstruct::test_auto_dtype
437437

438438
test_e2e.py::test_benchmark_sanity[bert_base] # 127.18s
@@ -465,6 +465,7 @@ test_e2e.py::test_ptp_quickstart_advanced_8gpus[Llama3.1-70B-FP8-llama-3.1-model
465465
test_e2e.py::test_ptp_quickstart_advanced_8gpus[Llama3.1-405B-FP8-llama-3.1-model/Llama-3.1-405B-Instruct-FP8]
466466
test_e2e.py::test_ptp_quickstart_advanced_8gpus[Mixtral-8x7B-BF16-Mixtral-8x7B-v0.1]
467467
test_e2e.py::test_ptp_quickstart_advanced_8gpus[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1]
468+
test_e2e.py::test_ptp_quickstart_advanced_8gpus[Nemotron-Ultra-253B-nemotron-nas/Llama-3_1-Nemotron-Ultra-253B-v1]
468469
test_e2e.py::test_ptp_quickstart_advanced_deepseek_r1_8gpus[DeepSeek-R1-DeepSeek-R1/DeepSeek-R1]
469470
test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-image]
470471
test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-video]

tests/integration/test_lists/qa/llm_multinodes_function_test.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,6 @@ examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-405b-fp8-disa
1212
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-405b-fp8-disable_fp8-tp8pp2-infer]
1313
examples/test_mixtral.py::test_llm_mixtral_2nodes_8gpus[Mixtral-8x22B-v0.1-plugin-renormalize-tensor_parallel-build]
1414
examples/test_mixtral.py::test_llm_mixtral_2nodes_8gpus[Mixtral-8x22B-v0.1-plugin-renormalize-tensor_parallel-infer]
15+
test_e2e.py::test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus[DeepSeek-V3-DeepSeek-V3]
1516
test_e2e.py::test_openai_multinodes_chat_tp16pp1
1617
test_e2e.py::test_openai_multinodes_chat_tp8pp2

0 commit comments

Comments
 (0)