Skip to content

Commit d9913e1

Browse files
committed
fix mem frac
Signed-off-by: Enwei Zhu <[email protected]>
1 parent 37ad65a commit d9913e1

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,13 +325,15 @@ def test_guided_decoding_4gpus(self, backend: str, mocker):
325325
@pytest.mark.parametrize("backend", ["xgrammar", "llguidance"])
326326
def test_guided_decoding_with_eagle3(self, backend: str, mocker):
327327
mocker.patch.dict(os.environ, {"TRTLLM_XGUIDANCE_LENIENT": "1"})
328+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.8)
328329
spec_config = EagleDecodingConfig(
329330
max_draft_len=3,
330331
speculative_model_dir=
331332
f"{llm_models_root()}/EAGLE3-LLaMA3.1-Instruct-8B",
332333
eagle3_one_model=False)
333334
llm = LLM(self.MODEL_PATH,
334335
guided_decoding_backend=backend,
336+
kv_cache_config=kv_cache_config,
335337
speculative_config=spec_config,
336338
disable_overlap_scheduler=True)
337339
with llm:
@@ -342,10 +344,12 @@ def test_guided_decoding_with_eagle3(self, backend: str, mocker):
342344
@pytest.mark.parametrize("backend", ["xgrammar", "llguidance"])
343345
def test_guided_decoding_with_ngram(self, backend: str, mocker):
344346
mocker.patch.dict(os.environ, {"TRTLLM_XGUIDANCE_LENIENT": "1"})
347+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.8)
345348
spec_config = NGramDecodingConfig(max_draft_len=3,
346349
max_matching_ngram_size=3)
347350
llm = LLM(self.MODEL_PATH,
348351
guided_decoding_backend=backend,
352+
kv_cache_config=kv_cache_config,
349353
speculative_config=spec_config,
350354
disable_overlap_scheduler=True)
351355
with llm:

0 commit comments

Comments
 (0)