@@ -121,7 +121,7 @@ class ConversationMessage(TypedDict, total=False):
121121 role : Required [str ]
122122 """The role of the message's author."""
123123
124- content : Optional [str ]
124+ content : Union [ Optional [str ], List [ Dict [ str , str ]] ]
125125 """The contents of the message"""
126126
127127 tool_call_id : Optional [str ]
@@ -431,7 +431,7 @@ def _get_full_multimodal_text_prompt(placeholder_counts: Dict[str, int],
431431def _parse_chat_message_content_mm_part (
432432 part : ChatCompletionContentPartParam ) -> Tuple [str , str ]:
433433 """
434- Parses a given multi modal content part based on its type.
434+ Parses a given multi- modal content part based on its type.
435435
436436 Args:
437437 part: A dict containing the content part, with a potential 'type' field.
@@ -485,21 +485,26 @@ def _parse_chat_message_content_parts(
485485 role : str ,
486486 parts : Iterable [ChatCompletionContentPartParam ],
487487 mm_tracker : BaseMultiModalItemTracker ,
488+ chat_template_text_format : str ,
488489) -> List [ConversationMessage ]:
489490 content : List [Union [str , Dict [str , str ]]] = []
490491
491492 mm_parser = mm_tracker .create_parser ()
492- keep_multimodal_content = \
493+ wrap_dicts = \
493494 mm_tracker ._model_config .hf_config .model_type in \
494- MODEL_KEEP_MULTI_MODAL_CONTENT
495+ MODEL_KEEP_MULTI_MODAL_CONTENT or \
496+ (chat_template_text_format == "openai" )
495497
496498 for part in parts :
497499 parse_res = _parse_chat_message_content_part (
498- part , mm_parser , wrap_dicts = keep_multimodal_content )
500+ part ,
501+ mm_parser ,
502+ wrap_dicts = wrap_dicts ,
503+ )
499504 if parse_res :
500505 content .append (parse_res )
501506
502- if keep_multimodal_content :
507+ if wrap_dicts :
503508 # Parsing wraps images and texts as interleaved dictionaries
504509 return [ConversationMessage (role = role ,
505510 content = content )] # type: ignore
@@ -560,6 +565,7 @@ def _parse_chat_message_content_part(
560565def _parse_chat_message_content (
561566 message : ChatCompletionMessageParam ,
562567 mm_tracker : BaseMultiModalItemTracker ,
568+ chat_template_text_format : str ,
563569) -> List [ConversationMessage ]:
564570 role = message ["role" ]
565571 content = message .get ("content" )
@@ -575,6 +581,7 @@ def _parse_chat_message_content(
575581 role ,
576582 content , # type: ignore
577583 mm_tracker ,
584+ chat_template_text_format ,
578585 )
579586
580587 for result_msg in result :
@@ -618,7 +625,11 @@ def parse_chat_messages(
618625 mm_tracker = MultiModalItemTracker (model_config , tokenizer )
619626
620627 for msg in messages :
621- sub_messages = _parse_chat_message_content (msg , mm_tracker )
628+ sub_messages = _parse_chat_message_content (
629+ msg ,
630+ mm_tracker ,
631+ model_config .chat_template_text_format ,
632+ )
622633
623634 conversation .extend (sub_messages )
624635
@@ -636,7 +647,11 @@ def parse_chat_messages_futures(
636647 mm_tracker = AsyncMultiModalItemTracker (model_config , tokenizer )
637648
638649 for msg in messages :
639- sub_messages = _parse_chat_message_content (msg , mm_tracker )
650+ sub_messages = _parse_chat_message_content (
651+ msg ,
652+ mm_tracker ,
653+ model_config .chat_template_text_format ,
654+ )
640655
641656 conversation .extend (sub_messages )
642657
0 commit comments