From 6e02527287fc407b73919dd331dacf026982c962 Mon Sep 17 00:00:00 2001 From: Barry Kang <43644113+Barry-Delaney@users.noreply.github.com> Date: Mon, 21 Apr 2025 03:21:50 +0000 Subject: [PATCH] Fix ModelOpt Mixtral AWQ OOM Signed-off-by: Barry Kang <43644113+Barry-Delaney@users.noreply.github.com> --- .../integration/defs/examples/test_mixtral.py | 51 +++++++++++++++++-- .../test_lists/qa/examples_test_list.txt | 2 +- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/tests/integration/defs/examples/test_mixtral.py b/tests/integration/defs/examples/test_mixtral.py index fb54dcaa83b..addb4ee4600 100644 --- a/tests/integration/defs/examples/test_mixtral.py +++ b/tests/integration/defs/examples/test_mixtral.py @@ -888,9 +888,51 @@ def test_llm_mixtral_1gpu_fp4_llmapi( venv_check_call(llm_venv, mmlu_cmd) +@pytest.mark.parametrize("model_name", ['mixtral-8x7b-v0.1-AWQ']) +def test_llm_mixtral_int4_awq_1gpu_summary(llama_example_root, + llm_datasets_root, model_name, + llm_rouge_root, llm_venv, cmodel_dir, + engine_dir, + qcache_dir_without_install_package): + models_root = llm_models_root() + model_dir = os.path.join(models_root, model_name) + ckpt_dir = os.path.join(cmodel_dir, model_name) + + print("Convert checkpoint...") + convert_cmd = [ + f"{llama_example_root}/convert_checkpoint.py", + "--model_dir", + model_dir, + "--output_dir", + ckpt_dir, + ] + venv_check_call(llm_venv, convert_cmd) + + print("Build engines...") + build_cmd = [ + "trtllm-build", + f"--checkpoint_dir={ckpt_dir}", + f"--output_dir={engine_dir}", + ] + check_call(" ".join(build_cmd), shell=True, env=llm_venv._new_env) + + print("Run inference") + summary_cmd = generate_summary_cmd(llama_example_root, + hf_model_dir=model_dir, + data_type="fp16", + tensorrt_llm_rouge1_threshold=19.5, + engine_dir=engine_dir, + dataset_dir=llm_datasets_root, + rouge_dir=llm_rouge_root) + + venv_check_call(llm_venv, summary_cmd) + + +@pytest.mark.skip_less_device(2) +@pytest.mark.skip_less_device_memory(80000) @pytest.mark.parametrize( "model_name", ['mixtral-8x7b-v0.1-AWQ', 'Mixtral-8x7B-Instruct-v0.1']) -def test_llm_mixtral_int4_awq_1gpu_summary(llama_example_root, +def test_llm_mixtral_int4_awq_2gpu_summary(llama_example_root, llm_datasets_root, model_name, llm_rouge_root, llm_venv, cmodel_dir, engine_dir, @@ -907,6 +949,8 @@ def test_llm_mixtral_int4_awq_1gpu_summary(llama_example_root, model_dir, "--output_dir", ckpt_dir, + "--tp_size", + 2, ] venv_check_call(llm_venv, convert_cmd) else: @@ -919,7 +963,7 @@ def test_llm_mixtral_int4_awq_1gpu_summary(llama_example_root, dtype="float16", qformat="int4_awq", quantize_dir=qcache_dir_without_install_package, - tp_size=1, + tp_size=2, calib_size=32) print("Build engines...") @@ -939,4 +983,5 @@ def test_llm_mixtral_int4_awq_1gpu_summary(llama_example_root, dataset_dir=llm_datasets_root, rouge_dir=llm_rouge_root) - venv_check_call(llm_venv, summary_cmd) + venv_mpi_check_call(llm_venv, ["mpirun", "-n", "2", "--allow-run-as-root"], + summary_cmd) diff --git a/tests/integration/test_lists/qa/examples_test_list.txt b/tests/integration/test_lists/qa/examples_test_list.txt index 1a18f33e6a2..6ad52dc11c1 100644 --- a/tests/integration/test_lists/qa/examples_test_list.txt +++ b/tests/integration/test_lists/qa/examples_test_list.txt @@ -183,7 +183,7 @@ examples/test_mixtral.py::test_llm_mixtral_wo_2gpus_summary[Mixtral-8x7B-v0.1-in examples/test_mixtral.py::test_llm_mixtral_wo_2gpus_summary[Mixtral-8x7B-v0.1-int8-nb:4] examples/test_mixtral.py::test_llm_mixtral_1gpu_fp4_llmapi[Mixtral-8x7B-Instruct-v0.1] examples/test_mixtral.py::test_llm_mixtral_int4_awq_1gpu_summary[mixtral-8x7b-v0.1-AWQ] -examples/test_mixtral.py::test_llm_mixtral_int4_awq_1gpu_summary[Mixtral-8x7B-Instruct-v0.1] +examples/test_mixtral.py::test_llm_mixtral_int4_awq_2gpu_summary[Mixtral-8x7B-Instruct-v0.1] examples/test_multimodal.py::test_llm_multimodal_general[Phi-3-vision-128k-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] examples/test_multimodal.py::test_llm_multimodal_general[Phi-3-vision-128k-instruct-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1] examples/test_multimodal.py::test_llm_multimodal_general[Phi-3.5-vision-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]