@@ -234,7 +234,9 @@ async def start(self):
234234
235235 If the engine is already running, do nothing.
236236 """
237- from vllm .entrypoints .chat_utils import resolve_chat_template_content_format
237+ from vllm .entrypoints .chat_utils import (
238+ resolve_chat_template_content_format as _resolve_chat_template_content_format ,
239+ )
238240
239241 if self .running :
240242 # The engine is already running!
@@ -246,7 +248,21 @@ async def start(self):
246248 self .model_config = await self .engine .get_model_config ()
247249
248250 self ._tokenizer = await self .engine .get_tokenizer ()
251+
252+ def resolve_chat_template_content_format (model_config , ** kwargs ):
253+ try :
254+ return _resolve_chat_template_content_format (
255+ model_config = model_config , ** kwargs
256+ )
257+ except TypeError :
258+ # Legacy API before vLLM 0.9.0.
259+ # TODO(#52975): Remove this try-except once vLLM <0.9.0 is no longer supported.
260+ return _resolve_chat_template_content_format (
261+ trust_remote_code = model_config .trust_remote_code , ** kwargs
262+ )
263+
249264 self ._resolved_content_format = resolve_chat_template_content_format (
265+ model_config = self .model_config ,
250266 # Use HF to get the chat template so set it to None here.
251267 chat_template = None ,
252268 # Default to None, change when it's needed.
@@ -255,7 +271,6 @@ async def start(self):
255271 # Let vLLM decide the content format.
256272 given_format = "auto" ,
257273 tokenizer = self ._tokenizer ,
258- trust_remote_code = self .model_config .trust_remote_code ,
259274 )
260275
261276 logger .info ("Started vLLM engine." )
@@ -505,7 +520,7 @@ async def prepare_request(
505520 ) -> GenerationRequest :
506521 from vllm .entrypoints .chat_utils import (
507522 parse_chat_messages_futures ,
508- apply_hf_chat_template ,
523+ apply_hf_chat_template as _apply_hf_chat_template ,
509524 )
510525
511526 model_config = self .model_config
@@ -521,14 +536,25 @@ async def prepare_request(
521536 )
522537 mm_data = await mm_futures
523538
539+ def apply_hf_chat_template (model_config , ** kwargs ):
540+ try :
541+ return _apply_hf_chat_template (model_config = model_config , ** kwargs )
542+ except TypeError :
543+ # Legacy API before vLLM 0.9.0.
544+ # TODO(#52975): Remove above once vLLM <0.9.0 is no longer supported.
545+ return _apply_hf_chat_template (
546+ trust_remote_code = model_config .trust_remote_code , ** kwargs
547+ )
548+
524549 prompt_text = apply_hf_chat_template (
550+ model_config = model_config ,
525551 tokenizer = self ._tokenizer ,
526552 conversation = conversation ,
527553 chat_template = None ,
528554 tools = None ,
529- trust_remote_code = model_config .trust_remote_code ,
530555 tokenize = False ,
531556 # **kwargs for tokenizer.apply_chat_template
557+ trust_remote_code = model_config .trust_remote_code ,
532558 add_generation_prompt = True ,
533559 continue_final_message = False ,
534560 )
0 commit comments