@@ -302,9 +302,7 @@ def test_ngram(self):
302302    @pytest .mark .parametrize ("backend" , ["xgrammar" , "llguidance" ]) 
303303    def  test_guided_decoding (self , backend : str , mocker ):
304304        mocker .patch .dict (os .environ , {"TRTLLM_XGUIDANCE_LENIENT" : "1" })
305-         llm  =  LLM (self .MODEL_PATH ,
306-                   guided_decoding_backend = backend ,
307-                   cuda_graph_config = CudaGraphConfig ())
305+         llm  =  LLM (self .MODEL_PATH , guided_decoding_backend = backend )
308306        with  llm :
309307            task  =  JsonModeEval (self .MODEL_NAME )
310308            task .evaluate (llm )
@@ -316,12 +314,42 @@ def test_guided_decoding_4gpus(self, backend: str, mocker):
316314        mocker .patch .dict (os .environ , {"TRTLLM_XGUIDANCE_LENIENT" : "1" })
317315        with  LLM (self .MODEL_PATH ,
318316                 guided_decoding_backend = backend ,
319-                  cuda_graph_config = CudaGraphConfig (),
320317                 tensor_parallel_size = 2 ,
321318                 pipeline_parallel_size = 2 ) as  llm :
322319            task  =  JsonModeEval (self .MODEL_NAME )
323320            task .evaluate (llm )
324321
322+     @skip_pre_hopper  
323+     @pytest .mark .parametrize ("backend" , ["xgrammar" , "llguidance" ]) 
324+     def  test_guided_decoding_with_eagle3 (self , backend : str , mocker ):
325+         mocker .patch .dict (os .environ , {"TRTLLM_XGUIDANCE_LENIENT" : "1" })
326+         spec_config  =  EagleDecodingConfig (
327+             max_draft_len = 3 ,
328+             speculative_model_dir = 
329+             f"{ llm_models_root ()}  ,
330+             eagle3_one_model = False )
331+         llm  =  LLM (self .MODEL_PATH ,
332+                   guided_decoding_backend = backend ,
333+                   speculative_config = spec_config ,
334+                   disable_overlap_scheduler = True )
335+         with  llm :
336+             task  =  JsonModeEval (self .MODEL_NAME )
337+             task .evaluate (llm )
338+ 
339+     @skip_pre_hopper  
340+     @pytest .mark .parametrize ("backend" , ["xgrammar" , "llguidance" ]) 
341+     def  test_guided_decoding_with_ngram (self , backend : str , mocker ):
342+         mocker .patch .dict (os .environ , {"TRTLLM_XGUIDANCE_LENIENT" : "1" })
343+         spec_config  =  NGramDecodingConfig (max_draft_len = 3 ,
344+                                           max_matching_ngram_size = 3 )
345+         llm  =  LLM (self .MODEL_PATH ,
346+                   guided_decoding_backend = backend ,
347+                   speculative_config = spec_config ,
348+                   disable_overlap_scheduler = True )
349+         with  llm :
350+             task  =  JsonModeEval (self .MODEL_NAME )
351+             task .evaluate (llm )
352+ 
325353
326354class  TestLlama3_2_1B (LlmapiAccuracyTestHarness ):
327355    MODEL_NAME  =  "meta-llama/Llama-3.2-1B" 
0 commit comments