NVIDIA · Barry-Delaney · Apr 21, 2025 · Apr 21, 2025
diff --git a/tests/integration/defs/examples/test_mixtral.py b/tests/integration/defs/examples/test_mixtral.py
@@ -888,9 +888,51 @@ def test_llm_mixtral_1gpu_fp4_llmapi(
     venv_check_call(llm_venv, mmlu_cmd)
 
 
+@pytest.mark.parametrize("model_name", ['mixtral-8x7b-v0.1-AWQ'])
+def test_llm_mixtral_int4_awq_1gpu_summary(llama_example_root,
+                                           llm_datasets_root, model_name,
+                                           llm_rouge_root, llm_venv, cmodel_dir,
+                                           engine_dir,
+                                           qcache_dir_without_install_package):
+    models_root = llm_models_root()
+    model_dir = os.path.join(models_root, model_name)
+    ckpt_dir = os.path.join(cmodel_dir, model_name)
+
+    print("Convert checkpoint...")
+    convert_cmd = [
+        f"{llama_example_root}/convert_checkpoint.py",
+        "--model_dir",
+        model_dir,
+        "--output_dir",
+        ckpt_dir,
+    ]
+    venv_check_call(llm_venv, convert_cmd)
+
+    print("Build engines...")
+    build_cmd = [
+        "trtllm-build",
+        f"--checkpoint_dir={ckpt_dir}",
+        f"--output_dir={engine_dir}",
+    ]
+    check_call(" ".join(build_cmd), shell=True, env=llm_venv._new_env)
+
+    print("Run inference")
+    summary_cmd = generate_summary_cmd(llama_example_root,
+                                       hf_model_dir=model_dir,
+                                       data_type="fp16",
+                                       tensorrt_llm_rouge1_threshold=19.5,
+                                       engine_dir=engine_dir,
+                                       dataset_dir=llm_datasets_root,
+                                       rouge_dir=llm_rouge_root)
+
+    venv_check_call(llm_venv, summary_cmd)
+
+
+@pytest.mark.skip_less_device(2)
+@pytest.mark.skip_less_device_memory(80000)
 @pytest.mark.parametrize(
     "model_name", ['mixtral-8x7b-v0.1-AWQ', 'Mixtral-8x7B-Instruct-v0.1'])
-def test_llm_mixtral_int4_awq_1gpu_summary(llama_example_root,
+def test_llm_mixtral_int4_awq_2gpu_summary(llama_example_root,
                                            llm_datasets_root, model_name,
                                            llm_rouge_root, llm_venv, cmodel_dir,
                                            engine_dir,
@@ -907,6 +949,8 @@ def test_llm_mixtral_int4_awq_1gpu_summary(llama_example_root,
             model_dir,
             "--output_dir",
             ckpt_dir,
+            "--tp_size",
+            2,
         ]
         venv_check_call(llm_venv, convert_cmd)
     else:
@@ -919,7 +963,7 @@ def test_llm_mixtral_int4_awq_1gpu_summary(llama_example_root,
             dtype="float16",
             qformat="int4_awq",
             quantize_dir=qcache_dir_without_install_package,
-            tp_size=1,
+            tp_size=2,
             calib_size=32)
 
     print("Build engines...")
@@ -939,4 +983,5 @@ def test_llm_mixtral_int4_awq_1gpu_summary(llama_example_root,
                                        dataset_dir=llm_datasets_root,
                                        rouge_dir=llm_rouge_root)
 
-    venv_check_call(llm_venv, summary_cmd)
+    venv_mpi_check_call(llm_venv, ["mpirun", "-n", "2", "--allow-run-as-root"],
+                        summary_cmd)
diff --git a/tests/integration/test_lists/qa/examples_test_list.txt b/tests/integration/test_lists/qa/examples_test_list.txt
@@ -183,7 +183,7 @@ examples/test_mixtral.py::test_llm_mixtral_wo_2gpus_summary[Mixtral-8x7B-v0.1-in
 examples/test_mixtral.py::test_llm_mixtral_wo_2gpus_summary[Mixtral-8x7B-v0.1-int8-nb:4]
 examples/test_mixtral.py::test_llm_mixtral_1gpu_fp4_llmapi[Mixtral-8x7B-Instruct-v0.1]
 examples/test_mixtral.py::test_llm_mixtral_int4_awq_1gpu_summary[mixtral-8x7b-v0.1-AWQ]
-examples/test_mixtral.py::test_llm_mixtral_int4_awq_1gpu_summary[Mixtral-8x7B-Instruct-v0.1]
+examples/test_mixtral.py::test_llm_mixtral_int4_awq_2gpu_summary[Mixtral-8x7B-Instruct-v0.1]
 examples/test_multimodal.py::test_llm_multimodal_general[Phi-3-vision-128k-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]
 examples/test_multimodal.py::test_llm_multimodal_general[Phi-3-vision-128k-instruct-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1]
 examples/test_multimodal.py::test_llm_multimodal_general[Phi-3.5-vision-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]