Skip to content

Commit e35fca4

Browse files
authored
[TRTQA-2920][chore] improve hang tests (#6781)
Signed-off-by: Xin He (SW-GPU) <[email protected]>
1 parent 8845e0f commit e35fca4

File tree

2 files changed

+10
-3
lines changed

2 files changed

+10
-3
lines changed

tests/integration/defs/accuracy/test_disaggregated_serving.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -505,18 +505,22 @@ def test_guided_decoding_with_eagle3(self, backend: str, mocker):
505505
task = JsonModeEval(self.MODEL_NAME)
506506
task.evaluate(llm)
507507

508-
@pytest.mark.skip_less_device(2)
509508
@pytest.mark.parametrize("tp,pp", [(1, 2), (2, 1), (2, 2)],
510509
ids=["tp1pp2", "tp2pp1", "tp2pp2"])
511510
@pytest.mark.parametrize("testset", ["GSM8K", "MMLU"])
512511
def test_tp_pp_symmetric(self, tp, pp, testset):
512+
if tp * pp * 2 > get_device_count():
513+
pytest.skip(f"Not enough devices for tp={tp}*pp={pp} test")
513514
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, pp, tp, pp,
514515
tp, get_accuracy_task(testset))
515516

516517
@parametrize_with_ids("ctx_pp", [2, 4])
517518
@parametrize_with_ids("gen_tp", [1, 2])
518519
@pytest.mark.parametrize("testset", ["GSM8K", "MMLU"])
519520
def test_ctx_pp_gen_tp_asymmetric(self, ctx_pp, gen_tp, testset):
521+
if ctx_pp * gen_tp * 2 > get_device_count():
522+
pytest.skip(
523+
f"Not enough devices for ctx_pp={ctx_pp}*gen_tp={gen_tp} test")
520524
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, ctx_pp, 1, 1,
521525
gen_tp, get_accuracy_task(testset))
522526

@@ -527,6 +531,7 @@ class TestLlama4ScoutInstruct(LlmapiAccuracyTestHarness):
527531
MODEL_NAME = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
528532
MODEL_PATH = f"{llm_models_root()}/llama4-models/Llama-4-Scout-17B-16E-Instruct"
529533

534+
@pytest.mark.skip_less_device(8)
530535
@pytest.mark.parametrize("overlap_scheduler", [False, True])
531536
def test_auto_dtype(self, overlap_scheduler):
532537
ctx_server_config = {"disable_overlap_scheduler": True}
@@ -565,6 +570,7 @@ class TestDeepSeekV3Lite(LlmapiAccuracyTestHarness):
565570
MODEL_NAME = "deepseek-ai/DeepSeek-V3-Lite"
566571
MODEL_PATH = f"{llm_models_root()}/DeepSeek-V3-Lite/bf16"
567572

573+
@pytest.mark.skip_less_device(8)
568574
@parametrize_with_ids("overlap_scheduler", [True, False])
569575
@parametrize_with_ids("mtp_nextn",
570576
[0, pytest.param(2, marks=skip_pre_hopper)])

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ def test_eagle3(self, overlap_scheduler, eagle3_one_model):
250250
enable_padding=True),
251251
)
252252
kv_cache_config = KvCacheConfig(
253-
enable_block_reuse=True
253+
enable_block_reuse=True, free_gpu_memory_fraction=0.8
254254
) # both one-model and two-model supports this feature
255255

256256
eagle_model_dir = f"{llm_models_root()}/EAGLE3-LLaMA3.1-Instruct-8B"
@@ -280,7 +280,8 @@ def test_ngram(self):
280280
cuda_graph_config=CudaGraphConfig(batch_sizes=[1]),
281281
)
282282

283-
kv_cache_config = KvCacheConfig(enable_block_reuse=False)
283+
kv_cache_config = KvCacheConfig(enable_block_reuse=False,
284+
free_gpu_memory_fraction=0.8)
284285

285286
spec_config = NGramDecodingConfig(
286287
max_draft_len=4,

0 commit comments

Comments
 (0)