77This module defines the default health check payload for TRT-LLM backends. 
88""" 
99
10+ import  logging 
11+ 
1012from  dynamo .health_check  import  HealthCheckPayload 
1113
14+ logger  =  logging .getLogger (__name__ )
15+ 
16+ 
17+ def  _get_bos_token_id_from_tokenizer (tokenizer ) ->  int :
18+     """ 
19+     Extract BOS token ID from the TRT-LLM tokenizer if available. 
20+ 
21+     Args: 
22+         tokenizer: TRT-LLM tokenizer object 
23+ 
24+     Returns: 
25+         BOS token ID from the tokenizer, or 1 as fallback 
26+ 
27+     Note: 
28+         The TransformersTokenizer class wraps a HuggingFace tokenizer. 
29+         While TransformersTokenizer doesn't expose bos_token_id directly, 
30+         the wrapped HuggingFace tokenizer (accessible via tokenizer.tokenizer) does. 
31+     """ 
32+     if  tokenizer  is  None :
33+         return  1 
34+ 
35+     try :
36+         if  hasattr (tokenizer , "tokenizer" ):
37+             inner_tokenizer  =  getattr (tokenizer , "tokenizer" )
38+             bos_token_id  =  getattr (inner_tokenizer , "bos_token_id" , None )
39+             if  bos_token_id  is  not None :
40+                 logger .info (
41+                     f"Using model's BOS token ID for health check: { bos_token_id }  
42+                 )
43+                 return  int (bos_token_id )
44+     except  Exception  as  e :
45+         logger .debug (f"Failed to get BOS token from tokenizer: { e }  )
46+ 
47+     logger .debug ("Using default BOS token ID (1) for health check" )
48+     return  1 
49+ 
1250
1351class  TrtllmHealthCheckPayload (HealthCheckPayload ):
1452    """ 
@@ -17,14 +55,20 @@ class TrtllmHealthCheckPayload(HealthCheckPayload):
1755    Provides TRT-LLM defaults and inherits environment override support from base class. 
1856    """ 
1957
20-     def  __init__ (self ):
58+     def  __init__ (self ,  tokenizer = None ):
2159        """ 
2260        Initialize TRT-LLM health check payload with TRT-LLM-specific defaults. 
61+ 
62+         Args: 
63+             tokenizer: Optional TRT-LLM tokenizer to extract BOS token from. 
64+                        If provided, will attempt to use the model's actual BOS token. 
2365        """ 
66+         bos_token_id  =  _get_bos_token_id_from_tokenizer (tokenizer )
67+ 
2468        # Set TensorRT-LLM default payload - minimal request that completes quickly 
2569        # The handler expects token_ids, stop_conditions, and sampling_options 
2670        self .default_payload  =  {
27-             "token_ids" : [1 ],   # Single token for minimal processing 
71+             "token_ids" : [bos_token_id ], 
2872            "stop_conditions" : {
2973                "max_tokens" : 1 ,  # Generate only 1 token 
3074                "stop" : None ,
0 commit comments