Skip to content

Commit 93dc7ab

Browse files
committed
fix mem frac
Signed-off-by: Enwei Zhu <[email protected]>
1 parent 5b6c983 commit 93dc7ab

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,13 +323,15 @@ def test_guided_decoding_4gpus(self, backend: str, mocker):
323323
@pytest.mark.parametrize("backend", ["xgrammar", "llguidance"])
324324
def test_guided_decoding_with_eagle3(self, backend: str, mocker):
325325
mocker.patch.dict(os.environ, {"TRTLLM_XGUIDANCE_LENIENT": "1"})
326+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.8)
326327
spec_config = EagleDecodingConfig(
327328
max_draft_len=3,
328329
speculative_model_dir=
329330
f"{llm_models_root()}/EAGLE3-LLaMA3.1-Instruct-8B",
330331
eagle3_one_model=False)
331332
llm = LLM(self.MODEL_PATH,
332333
guided_decoding_backend=backend,
334+
kv_cache_config=kv_cache_config,
333335
speculative_config=spec_config,
334336
disable_overlap_scheduler=True)
335337
with llm:
@@ -340,10 +342,12 @@ def test_guided_decoding_with_eagle3(self, backend: str, mocker):
340342
@pytest.mark.parametrize("backend", ["xgrammar", "llguidance"])
341343
def test_guided_decoding_with_ngram(self, backend: str, mocker):
342344
mocker.patch.dict(os.environ, {"TRTLLM_XGUIDANCE_LENIENT": "1"})
345+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.8)
343346
spec_config = NGramDecodingConfig(max_draft_len=3,
344347
max_matching_ngram_size=3)
345348
llm = LLM(self.MODEL_PATH,
346349
guided_decoding_backend=backend,
350+
kv_cache_config=kv_cache_config,
347351
speculative_config=spec_config,
348352
disable_overlap_scheduler=True)
349353
with llm:

0 commit comments

Comments
 (0)