diff --git a/tests/lora/test_chatglm3_tp.py b/tests/lora/test_chatglm3_tp.py index d8058c5f87a8..f4f151180dec 100644 --- a/tests/lora/test_chatglm3_tp.py +++ b/tests/lora/test_chatglm3_tp.py @@ -58,7 +58,6 @@ def test_chatglm3_lora(chatglm3_lora_files): max_loras=4, max_lora_rank=64, trust_remote_code=True, - enable_chunked_prefill=True, ) output1 = do_sample(llm, chatglm3_lora_files, lora_id=1) @@ -70,7 +69,6 @@ def test_chatglm3_lora(chatglm3_lora_files): @multi_gpu_test(num_gpus=4) -@create_new_process_for_each_test() def test_chatglm3_lora_tp4(chatglm3_lora_files): llm = vllm.LLM( MODEL_PATH, @@ -81,7 +79,6 @@ def test_chatglm3_lora_tp4(chatglm3_lora_files): tensor_parallel_size=4, trust_remote_code=True, fully_sharded_loras=False, - enable_chunked_prefill=True, ) output1 = do_sample(llm, chatglm3_lora_files, lora_id=1) @@ -93,7 +90,6 @@ def test_chatglm3_lora_tp4(chatglm3_lora_files): @multi_gpu_test(num_gpus=4) -@create_new_process_for_each_test() def test_chatglm3_lora_tp4_fully_sharded_loras(chatglm3_lora_files): # https://github.com/NVIDIA/nccl/issues/1790, set a lower value for # gpu_memory_utilization here because NCCL >= 2.26.3 seems to use @@ -107,7 +103,6 @@ def test_chatglm3_lora_tp4_fully_sharded_loras(chatglm3_lora_files): tensor_parallel_size=4, trust_remote_code=True, fully_sharded_loras=True, - enable_chunked_prefill=True, gpu_memory_utilization=0.85, ) output1 = do_sample(llm, chatglm3_lora_files, lora_id=1) diff --git a/tests/lora/test_llama_tp.py b/tests/lora/test_llama_tp.py index 50fd63d35cde..e1d6a8674a01 100644 --- a/tests/lora/test_llama_tp.py +++ b/tests/lora/test_llama_tp.py @@ -113,7 +113,6 @@ def test_llama_lora(sql_lora_files): @multi_gpu_test(num_gpus=4) -@create_new_process_for_each_test() def test_llama_lora_tp4(sql_lora_files): llm = vllm.LLM( MODEL_PATH, @@ -127,7 +126,6 @@ def test_llama_lora_tp4(sql_lora_files): @multi_gpu_test(num_gpus=4) -@create_new_process_for_each_test() def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files): llm = vllm.LLM( MODEL_PATH, @@ -142,7 +140,6 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files): @multi_gpu_test(num_gpus=2) -@create_new_process_for_each_test() def test_tp2_serialize_and_deserialize_lora( tmp_path, sql_lora_files, sql_lora_huggingface_id ): diff --git a/tests/lora/test_minicpmv_tp.py b/tests/lora/test_minicpmv_tp.py index ce98fe2f8613..1cf8ed602b6a 100644 --- a/tests/lora/test_minicpmv_tp.py +++ b/tests/lora/test_minicpmv_tp.py @@ -8,7 +8,7 @@ from vllm.lora.request import LoRARequest from vllm.platforms import current_platform -from ..utils import create_new_process_for_each_test +from ..utils import multi_gpu_test MODEL_PATH = "openbmb/MiniCPM-Llama3-V-2_5" @@ -88,7 +88,7 @@ def test_minicpmv_lora(minicpmv_lora_files): current_platform.is_rocm(), reason="MiniCPM-V dependency xformers incompatible with ROCm", ) -@create_new_process_for_each_test() +@multi_gpu_test(num_gpus=4) def test_minicpmv_tp4_wo_fully_sharded_loras(minicpmv_lora_files): llm = vllm.LLM( MODEL_PATH, @@ -112,7 +112,7 @@ def test_minicpmv_tp4_wo_fully_sharded_loras(minicpmv_lora_files): current_platform.is_rocm(), reason="MiniCPM-V dependency xformers incompatible with ROCm", ) -@create_new_process_for_each_test() +@multi_gpu_test(num_gpus=4) def test_minicpmv_tp4_fully_sharded_loras(minicpmv_lora_files): llm = vllm.LLM( MODEL_PATH,