Skip to content

Commit 48eecac

Browse files
committed
add test
Signed-off-by: Enwei Zhu <[email protected]>
1 parent 19ea5aa commit 48eecac

File tree

3 files changed

+40
-4
lines changed

3 files changed

+40
-4
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
11
meta-llama/Llama-3.1-8B-Instruct:
22
- accuracy: 74.00
3+
- spec_dec_algo: Eagle
4+
accuracy: 74.00
5+
- spec_dec_algo: NGram
6+
accuracy: 74.00

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -302,9 +302,7 @@ def test_ngram(self):
302302
@pytest.mark.parametrize("backend", ["xgrammar", "llguidance"])
303303
def test_guided_decoding(self, backend: str, mocker):
304304
mocker.patch.dict(os.environ, {"TRTLLM_XGUIDANCE_LENIENT": "1"})
305-
llm = LLM(self.MODEL_PATH,
306-
guided_decoding_backend=backend,
307-
cuda_graph_config=CudaGraphConfig())
305+
llm = LLM(self.MODEL_PATH, guided_decoding_backend=backend)
308306
with llm:
309307
task = JsonModeEval(self.MODEL_NAME)
310308
task.evaluate(llm)
@@ -316,12 +314,42 @@ def test_guided_decoding_4gpus(self, backend: str, mocker):
316314
mocker.patch.dict(os.environ, {"TRTLLM_XGUIDANCE_LENIENT": "1"})
317315
with LLM(self.MODEL_PATH,
318316
guided_decoding_backend=backend,
319-
cuda_graph_config=CudaGraphConfig(),
320317
tensor_parallel_size=2,
321318
pipeline_parallel_size=2) as llm:
322319
task = JsonModeEval(self.MODEL_NAME)
323320
task.evaluate(llm)
324321

322+
@skip_pre_hopper
323+
@pytest.mark.parametrize("backend", ["xgrammar", "llguidance"])
324+
def test_guided_decoding_with_eagle3(self, backend: str, mocker):
325+
mocker.patch.dict(os.environ, {"TRTLLM_XGUIDANCE_LENIENT": "1"})
326+
spec_config = EagleDecodingConfig(
327+
max_draft_len=3,
328+
speculative_model_dir=
329+
f"{llm_models_root()}/EAGLE3-LLaMA3.1-Instruct-8B",
330+
eagle3_one_model=False)
331+
llm = LLM(self.MODEL_PATH,
332+
guided_decoding_backend=backend,
333+
speculative_config=spec_config,
334+
disable_overlap_scheduler=True)
335+
with llm:
336+
task = JsonModeEval(self.MODEL_NAME)
337+
task.evaluate(llm)
338+
339+
@skip_pre_hopper
340+
@pytest.mark.parametrize("backend", ["xgrammar", "llguidance"])
341+
def test_guided_decoding_with_ngram(self, backend: str, mocker):
342+
mocker.patch.dict(os.environ, {"TRTLLM_XGUIDANCE_LENIENT": "1"})
343+
spec_config = NGramDecodingConfig(max_draft_len=3,
344+
max_matching_ngram_size=3)
345+
llm = LLM(self.MODEL_PATH,
346+
guided_decoding_backend=backend,
347+
speculative_config=spec_config,
348+
disable_overlap_scheduler=True)
349+
with llm:
350+
task = JsonModeEval(self.MODEL_NAME)
351+
task.evaluate(llm)
352+
325353

326354
class TestLlama3_2_1B(LlmapiAccuracyTestHarness):
327355
MODEL_NAME = "meta-llama/Llama-3.2-1B"

tests/integration/test_lists/test-db/l0_h100.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ l0_h100:
3030
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16[attn_backend=TRTLLM-torch_compile=True]
3131
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_chunked_prefill[attn_backend=TRTLLM] TIMEOUT (90)
3232
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding[xgrammar]
33+
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[xgrammar]
3334
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=False-attn_backend=TRTLLM-torch_compile=False]
3435
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=False-attn_backend=TRTLLM-torch_compile=True]
3536
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=True-attn_backend=TRTLLM-torch_compile=False]
@@ -208,6 +209,9 @@ l0_h100:
208209
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency-torch_compile=False]
209210
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency-torch_compile=True]
210211
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding[llguidance]
212+
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[llguidance]
213+
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_ngram[xgrammar]
214+
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_ngram[llguidance]
211215
- test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image-True]
212216
- test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-mixture_text_image-True]
213217
- condition:

0 commit comments

Comments
 (0)