77This module defines the default health check payload for TRT-LLM backends.
88"""
99
10+ import logging
11+
1012from dynamo .health_check import HealthCheckPayload
1113
14+ logger = logging .getLogger (__name__ )
15+
16+
17+ def _get_bos_token_id_from_tokenizer (tokenizer ) -> int :
18+ """
19+ Extract BOS token ID from the TRT-LLM tokenizer if available.
20+
21+ Args:
22+ tokenizer: TRT-LLM tokenizer object
23+
24+ Returns:
25+ BOS token ID from the tokenizer, or 1 as fallback
26+
27+ Note:
28+ The TransformersTokenizer class wraps a HuggingFace tokenizer.
29+ While TransformersTokenizer doesn't expose bos_token_id directly,
30+ the wrapped HuggingFace tokenizer (accessible via tokenizer.tokenizer) does.
31+ """
32+ if tokenizer is None :
33+ return 1
34+
35+ try :
36+ if hasattr (tokenizer , "tokenizer" ):
37+ inner_tokenizer = getattr (tokenizer , "tokenizer" )
38+ bos_token_id = getattr (inner_tokenizer , "bos_token_id" , None )
39+ if bos_token_id is not None :
40+ logger .info (
41+ f"Using model's BOS token ID for health check: { bos_token_id } "
42+ )
43+ return int (bos_token_id )
44+ except Exception as e :
45+ logger .debug (f"Failed to get BOS token from tokenizer: { e } " )
46+
47+ logger .debug ("Using default BOS token ID (1) for health check" )
48+ return 1
49+
1250
1351class TrtllmHealthCheckPayload (HealthCheckPayload ):
1452 """
@@ -17,14 +55,20 @@ class TrtllmHealthCheckPayload(HealthCheckPayload):
1755 Provides TRT-LLM defaults and inherits environment override support from base class.
1856 """
1957
20- def __init__ (self ):
58+ def __init__ (self , tokenizer = None ):
2159 """
2260 Initialize TRT-LLM health check payload with TRT-LLM-specific defaults.
61+
62+ Args:
63+ tokenizer: Optional TRT-LLM tokenizer to extract BOS token from.
64+ If provided, will attempt to use the model's actual BOS token.
2365 """
66+ bos_token_id = _get_bos_token_id_from_tokenizer (tokenizer )
67+
2468 # Set TensorRT-LLM default payload - minimal request that completes quickly
2569 # The handler expects token_ids, stop_conditions, and sampling_options
2670 self .default_payload = {
27- "token_ids" : [1 ], # Single token for minimal processing
71+ "token_ids" : [bos_token_id ],
2872 "stop_conditions" : {
2973 "max_tokens" : 1 , # Generate only 1 token
3074 "stop" : None ,
0 commit comments