Skip to content

Commit fe6dcf3

Browse files
tzulingkkylehh
authored andcommitted
feat: Trtllm health check payload use bos_token_id (#3145)
Signed-off-by: [email protected] <[email protected]> Signed-off-by: Kyle H <[email protected]>
1 parent 3f9dc65 commit fe6dcf3

File tree

2 files changed

+47
-3
lines changed

2 files changed

+47
-3
lines changed

components/backends/trtllm/src/dynamo/trtllm/health_check.py

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,46 @@
77
This module defines the default health check payload for TRT-LLM backends.
88
"""
99

10+
import logging
11+
1012
from dynamo.health_check import HealthCheckPayload
1113

14+
logger = logging.getLogger(__name__)
15+
16+
17+
def _get_bos_token_id_from_tokenizer(tokenizer) -> int:
18+
"""
19+
Extract BOS token ID from the TRT-LLM tokenizer if available.
20+
21+
Args:
22+
tokenizer: TRT-LLM tokenizer object
23+
24+
Returns:
25+
BOS token ID from the tokenizer, or 1 as fallback
26+
27+
Note:
28+
The TransformersTokenizer class wraps a HuggingFace tokenizer.
29+
While TransformersTokenizer doesn't expose bos_token_id directly,
30+
the wrapped HuggingFace tokenizer (accessible via tokenizer.tokenizer) does.
31+
"""
32+
if tokenizer is None:
33+
return 1
34+
35+
try:
36+
if hasattr(tokenizer, "tokenizer"):
37+
inner_tokenizer = getattr(tokenizer, "tokenizer")
38+
bos_token_id = getattr(inner_tokenizer, "bos_token_id", None)
39+
if bos_token_id is not None:
40+
logger.info(
41+
f"Using model's BOS token ID for health check: {bos_token_id}"
42+
)
43+
return int(bos_token_id)
44+
except Exception as e:
45+
logger.debug(f"Failed to get BOS token from tokenizer: {e}")
46+
47+
logger.debug("Using default BOS token ID (1) for health check")
48+
return 1
49+
1250

1351
class TrtllmHealthCheckPayload(HealthCheckPayload):
1452
"""
@@ -17,14 +55,20 @@ class TrtllmHealthCheckPayload(HealthCheckPayload):
1755
Provides TRT-LLM defaults and inherits environment override support from base class.
1856
"""
1957

20-
def __init__(self):
58+
def __init__(self, tokenizer=None):
2159
"""
2260
Initialize TRT-LLM health check payload with TRT-LLM-specific defaults.
61+
62+
Args:
63+
tokenizer: Optional TRT-LLM tokenizer to extract BOS token from.
64+
If provided, will attempt to use the model's actual BOS token.
2365
"""
66+
bos_token_id = _get_bos_token_id_from_tokenizer(tokenizer)
67+
2468
# Set TensorRT-LLM default payload - minimal request that completes quickly
2569
# The handler expects token_ids, stop_conditions, and sampling_options
2670
self.default_payload = {
27-
"token_ids": [1], # Single token for minimal processing
71+
"token_ids": [bos_token_id],
2872
"stop_conditions": {
2973
"max_tokens": 1, # Generate only 1 token
3074
"stop": None,

components/backends/trtllm/src/dynamo/trtllm/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ async def init(runtime: DistributedRuntime, config: Config):
318318
)
319319

320320
# Get health check payload (checks env var and falls back to TensorRT-LLM default)
321-
health_check_payload = TrtllmHealthCheckPayload().to_dict()
321+
health_check_payload = TrtllmHealthCheckPayload(tokenizer=tokenizer).to_dict()
322322

323323
if config.publish_events_and_metrics and is_first_worker(config):
324324
# Initialize and pass in the publisher to the request handler to

0 commit comments

Comments
 (0)