diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index c69384aacd0..b8d740a23b1 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -1517,6 +1517,31 @@ def test_ptq_quickstart_advanced_mtp(llm_root, llm_venv, model_name, ]) +@pytest.mark.skip_less_device_memory(80000) +@pytest.mark.skip_less_device(8) +@pytest.mark.parametrize("model_name,model_path", [ + pytest.param('DeepSeek-V3', 'DeepSeek-V3', marks=skip_pre_hopper), +]) +def test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus( + llm_root, llm_venv, model_name, model_path): + # "RCCA https://nvbugs/5163844" + print(f"Testing {model_name}.") + example_root = Path(os.path.join(llm_root, "examples", "pytorch")) + llm_venv.run_cmd([ + str(example_root / "quickstart_advanced.py"), + "--enable_overlap_scheduler", + "--model_dir", + f"{llm_models_root()}/{model_path}", + "--moe_ep_size=8", + "--tp_size=16", + "--use_cuda_graph", + "--kv_cache_fraction=0.5", + "--max_batch_size=32", + "--max_num_tokens=2048", + "--kv_cache_enable_block_reuse", + ]) + + @pytest.mark.parametrize("model_name,model_path,eagle_model_path", [ ("Llama-3.1-8b-Instruct", "llama-3.1-model/Llama-3.1-8B-Instruct", "EAGLE3-LLaMA3.1-Instruct-8B"), @@ -1580,6 +1605,9 @@ def test_ptp_quickstart_advanced_deepseek_r1_8gpus(llm_root, llm_venv, pytest.param('Mixtral-8x7B-NVFP4', 'nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1', marks=skip_pre_blackwell), + pytest.param('Nemotron-Ultra-253B', + 'nemotron-nas/Llama-3_1-Nemotron-Ultra-253B-v1', + marks=skip_pre_hopper), ]) def test_ptp_quickstart_advanced_8gpus(llm_root, llm_venv, model_name, model_path): diff --git a/tests/integration/test_lists/qa/examples_test_list.txt b/tests/integration/test_lists/qa/examples_test_list.txt index c04b81f7a67..f392317cccc 100644 --- a/tests/integration/test_lists/qa/examples_test_list.txt +++ b/tests/integration/test_lists/qa/examples_test_list.txt @@ -425,14 +425,14 @@ accuracy/test_llm_api.py::TestQwen2_5_7BInstruct::test_fp8 accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4 accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4 accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4 -accuracy/test_llm_api_pytorch.py::TestMistral_7B::test_auto_dtype +accuracy/test_llm_api_pytorch.py::TestMistral7B::test_auto_dtype accuracy/test_llm_api_pytorch.py::TestMixtral8x7B::test_fp8_tp2 accuracy/test_llm_api_pytorch.py::TestMixtral8x7B::test_nvfp4_tp2 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[] accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[] accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[] accuracy/test_llm_api_pytorch.py::TestMinitron4BBaseInstruct::test_fp8_prequantized -accuracy/test_llm_api_pytorch.py::TestNemotronNas::test_auto_dtype +accuracy/test_llm_api_pytorch.py::TestNemotronNas::test_auto_dtype_tp8 accuracy/test_llm_api_pytorch.py::TestQwen2_7BInstruct::test_auto_dtype test_e2e.py::test_benchmark_sanity[bert_base] # 127.18s @@ -465,6 +465,7 @@ test_e2e.py::test_ptp_quickstart_advanced_8gpus[Llama3.1-70B-FP8-llama-3.1-model test_e2e.py::test_ptp_quickstart_advanced_8gpus[Llama3.1-405B-FP8-llama-3.1-model/Llama-3.1-405B-Instruct-FP8] test_e2e.py::test_ptp_quickstart_advanced_8gpus[Mixtral-8x7B-BF16-Mixtral-8x7B-v0.1] test_e2e.py::test_ptp_quickstart_advanced_8gpus[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1] +test_e2e.py::test_ptp_quickstart_advanced_8gpus[Nemotron-Ultra-253B-nemotron-nas/Llama-3_1-Nemotron-Ultra-253B-v1] test_e2e.py::test_ptp_quickstart_advanced_deepseek_r1_8gpus[DeepSeek-R1-DeepSeek-R1/DeepSeek-R1] test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-image] test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-video] diff --git a/tests/integration/test_lists/qa/llm_multinodes_function_test.txt b/tests/integration/test_lists/qa/llm_multinodes_function_test.txt index 57a976ceb80..569d42f850a 100644 --- a/tests/integration/test_lists/qa/llm_multinodes_function_test.txt +++ b/tests/integration/test_lists/qa/llm_multinodes_function_test.txt @@ -12,5 +12,6 @@ examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-405b-fp8-disa examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-405b-fp8-disable_fp8-tp8pp2-infer] examples/test_mixtral.py::test_llm_mixtral_2nodes_8gpus[Mixtral-8x22B-v0.1-plugin-renormalize-tensor_parallel-build] examples/test_mixtral.py::test_llm_mixtral_2nodes_8gpus[Mixtral-8x22B-v0.1-plugin-renormalize-tensor_parallel-infer] +test_e2e.py::test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus[DeepSeek-V3-DeepSeek-V3] test_e2e.py::test_openai_multinodes_chat_tp16pp1 test_e2e.py::test_openai_multinodes_chat_tp8pp2