diff --git a/tests/integration/defs/examples/test_mixtral.py b/tests/integration/defs/examples/test_mixtral.py index 4429eeafb3f..f7712574380 100644 --- a/tests/integration/defs/examples/test_mixtral.py +++ b/tests/integration/defs/examples/test_mixtral.py @@ -890,9 +890,52 @@ def test_llm_mixtral_1gpu_fp4_llmapi( @skip_post_blackwell +@pytest.mark.parametrize("model_name", ['mixtral-8x7b-v0.1-AWQ']) +def test_llm_mixtral_int4_awq_1gpu_summary(llama_example_root, + llm_datasets_root, model_name, + llm_rouge_root, llm_venv, cmodel_dir, + engine_dir, + qcache_dir_without_install_package): + models_root = llm_models_root() + model_dir = os.path.join(models_root, model_name) + ckpt_dir = os.path.join(cmodel_dir, model_name) + + print("Convert checkpoint...") + convert_cmd = [ + f"{llama_example_root}/convert_checkpoint.py", + "--model_dir", + model_dir, + "--output_dir", + ckpt_dir, + ] + venv_check_call(llm_venv, convert_cmd) + + print("Build engines...") + build_cmd = [ + "trtllm-build", + f"--checkpoint_dir={ckpt_dir}", + f"--output_dir={engine_dir}", + ] + check_call(" ".join(build_cmd), shell=True, env=llm_venv._new_env) + + print("Run inference") + summary_cmd = generate_summary_cmd(llama_example_root, + hf_model_dir=model_dir, + data_type="fp16", + tensorrt_llm_rouge1_threshold=19.5, + engine_dir=engine_dir, + dataset_dir=llm_datasets_root, + rouge_dir=llm_rouge_root) + + venv_check_call(llm_venv, summary_cmd) + + +@skip_post_blackwell +@pytest.mark.skip_less_device(2) +@pytest.mark.skip_less_device_memory(80000) @pytest.mark.parametrize( "model_name", ['mixtral-8x7b-v0.1-AWQ', 'Mixtral-8x7B-Instruct-v0.1']) -def test_llm_mixtral_int4_awq_1gpu_summary(llama_example_root, +def test_llm_mixtral_int4_awq_2gpu_summary(llama_example_root, llm_datasets_root, model_name, llm_rouge_root, llm_venv, cmodel_dir, engine_dir, @@ -909,6 +952,8 @@ def test_llm_mixtral_int4_awq_1gpu_summary(llama_example_root, model_dir, "--output_dir", ckpt_dir, + "--tp_size", + 2, ] venv_check_call(llm_venv, convert_cmd) else: @@ -921,7 +966,7 @@ def test_llm_mixtral_int4_awq_1gpu_summary(llama_example_root, dtype="float16", qformat="int4_awq", quantize_dir=qcache_dir_without_install_package, - tp_size=1, + tp_size=2, calib_size=32) print("Build engines...") @@ -941,4 +986,5 @@ def test_llm_mixtral_int4_awq_1gpu_summary(llama_example_root, dataset_dir=llm_datasets_root, rouge_dir=llm_rouge_root) - venv_check_call(llm_venv, summary_cmd) + venv_mpi_check_call(llm_venv, ["mpirun", "-n", "2", "--allow-run-as-root"], + summary_cmd) diff --git a/tests/integration/test_lists/qa/examples_test_list.txt b/tests/integration/test_lists/qa/examples_test_list.txt index 604161f69a0..70db6577e6d 100644 --- a/tests/integration/test_lists/qa/examples_test_list.txt +++ b/tests/integration/test_lists/qa/examples_test_list.txt @@ -183,7 +183,7 @@ examples/test_mixtral.py::test_llm_mixtral_wo_2gpus_summary[Mixtral-8x7B-v0.1-in examples/test_mixtral.py::test_llm_mixtral_wo_2gpus_summary[Mixtral-8x7B-v0.1-int8-nb:4] examples/test_mixtral.py::test_llm_mixtral_1gpu_fp4_llmapi[Mixtral-8x7B-Instruct-v0.1] examples/test_mixtral.py::test_llm_mixtral_int4_awq_1gpu_summary[mixtral-8x7b-v0.1-AWQ] -examples/test_mixtral.py::test_llm_mixtral_int4_awq_1gpu_summary[Mixtral-8x7B-Instruct-v0.1] +examples/test_mixtral.py::test_llm_mixtral_int4_awq_2gpu_summary[Mixtral-8x7B-Instruct-v0.1] examples/test_multimodal.py::test_llm_multimodal_general[Phi-3-vision-128k-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] examples/test_multimodal.py::test_llm_multimodal_general[Phi-3-vision-128k-instruct-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1] examples/test_multimodal.py::test_llm_multimodal_general[Phi-3.5-vision-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]