Skip to content

Commit e05f79c

Browse files
committed
improve tests
Signed-off-by: Xin He (SW-GPU) <[email protected]>
1 parent 9c358c2 commit e05f79c

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

tests/integration/defs/accuracy/test_disaggregated_serving.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,7 @@ def test_ctx_pp_gen_tp_asymmetric(self, ctx_pp, gen_tp, testset):
523523

524524
@pytest.mark.skip_less_device_memory(140000)
525525
@pytest.mark.timeout(3600)
526+
@pytest.mark.skip_less_device(8)
526527
class TestLlama4ScoutInstruct(LlmapiAccuracyTestHarness):
527528
MODEL_NAME = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
528529
MODEL_PATH = f"{llm_models_root()}/llama4-models/Llama-4-Scout-17B-16E-Instruct"
@@ -561,6 +562,7 @@ def test_auto_dtype(self, overlap_scheduler):
561562

562563

563564
@pytest.mark.timeout(3600)
565+
@pytest.mark.skip_less_device(8)
564566
class TestDeepSeekV3Lite(LlmapiAccuracyTestHarness):
565567
MODEL_NAME = "deepseek-ai/DeepSeek-V3-Lite"
566568
MODEL_PATH = f"{llm_models_root()}/DeepSeek-V3-Lite/bf16"

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ def test_eagle3(self, overlap_scheduler, eagle3_one_model):
250250
enable_padding=True),
251251
)
252252
kv_cache_config = KvCacheConfig(
253-
enable_block_reuse=True
253+
enable_block_reuse=True, free_gpu_memory_fraction=0.8
254254
) # both one-model and two-model supports this feature
255255

256256
eagle_model_dir = f"{llm_models_root()}/EAGLE3-LLaMA3.1-Instruct-8B"
@@ -280,7 +280,8 @@ def test_ngram(self):
280280
cuda_graph_config=CudaGraphConfig(batch_sizes=[1]),
281281
)
282282

283-
kv_cache_config = KvCacheConfig(enable_block_reuse=False)
283+
kv_cache_config = KvCacheConfig(enable_block_reuse=False,
284+
free_gpu_memory_fraction=0.8)
284285

285286
spec_config = NGramDecodingConfig(
286287
max_draft_len=4,

0 commit comments

Comments
 (0)