verl-project · wuxibin89 · Nov 27, 2025 · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025
@@ -491,9 +491,7 @@ async def _agent_loop_postprocess(self, output, **kwargs) -> _InternalAgentLoopO
         # Only support Qwen2VLImageProcessor for multi-modal processing currently
         # TODO: support other multi-modal inputs
         multi_modal_inputs = None
-        if self.processor is not None and "Qwen2VLImageProcessor" in self.processor.image_processor.__class__.__name__:
-            from verl.models.transformers.qwen2_vl import get_rope_index
-
+        if self.processor is not None:
             images = getattr(output, "multi_modal_data", {}).get("image", None)
             current_text = self.tokenizer.decode(input_ids.squeeze(0), skip_special_tokens=True)
             multi_modal_inputs = self.processor(text=[current_text], images=images, return_tensors="pt")
@@ -502,7 +500,9 @@ async def _agent_loop_postprocess(self, output, **kwargs) -> _InternalAgentLoopO
 
             # We must use dict(multi_modal_inputs) to convert BatchFeature values to a new dict
             # because np.array() only keeps the keys for BatchFeature.
-            multi_modal_inputs = dict(multi_modal_inputs)
+            multi_modal_inputs = dict(multi_modal_inputs.convert_to_tensors("pt"))
+        if self.processor is not None and "Qwen2VLImageProcessor" in self.processor.image_processor.__class__.__name__:
+            from verl.models.transformers.qwen2_vl import get_rope_index
 
             image_grid_thw = multi_modal_inputs.get("image_grid_thw")
             video_grid_thw = multi_modal_inputs.get("video_grid_thw")

@@ -27,6 +27,7 @@
 from verl.interactions.utils.interaction_registry import initialize_interactions_from_config
 from verl.tools.schemas import ToolResponse
 from verl.tools.utils.tool_registry import initialize_tools_from_config
+from verl.utils.chat_template import initialize_system_prompt
 from verl.utils.profiler import simple_timer
 from verl.utils.rollout_trace import rollout_trace_op
 
@@ -108,9 +109,8 @@ def init_class(cls, config, tokenizer, processor, **kwargs):
         cls.apply_chat_template_kwargs = config.data.get("apply_chat_template_kwargs", {})
         cls.prompt_length = config.actor_rollout_ref.rollout.prompt_length
         cls.response_length = config.actor_rollout_ref.rollout.response_length
-        cls.system_prompt = tokenizer.apply_chat_template(
-            [{}], add_generation_prompt=False, tokenize=True, **cls.apply_chat_template_kwargs
-        )
+        cls.system_prompt = initialize_system_prompt(cls.tokenizer, **cls.apply_chat_template_kwargs)
+
         # Initialize interactions from config file
         cls.interaction_config_file = config.actor_rollout_ref.rollout.multi_turn.interaction_config_path
         if cls.interaction_config_file:

@@ -506,6 +506,7 @@ reward_model:
     path: ~/models/FsfairX-LLaMA3-RM-v0.1
     external_lib: ${actor_rollout_ref.model.external_lib}
     trust_remote_code: false
+    override_config: {}
   micro_batch_size: null
   micro_batch_size_per_gpu: null
   max_length: null

@@ -437,6 +437,7 @@ reward_model:
     path: ~/models/FsfairX-LLaMA3-RM-v0.1
     external_lib: ${actor_rollout_ref.model.external_lib}
     trust_remote_code: false
+    override_config: {}
     use_shm: false
     use_remove_padding: false
     use_fused_kernels: ${actor_rollout_ref.model.use_fused_kernels}

@@ -34,6 +34,9 @@ model:
   # Whether to enable loading a remote code model, default to False
   trust_remote_code: False
 
+  # override hf config
+  override_config: {}
+
 # [Deprecated] Global micro batch size
 # will be deprecated, use micro_batch_size_per_gpu
 micro_batch_size: null

diff --git a/verl/utils/chat_template.py b/verl/utils/chat_template.py
@@ -0,0 +1,28 @@
+# Copyright 2025 Bytedance Ltd. and/or its affiliates
+import logging
+import os
+
+from jinja2 import TemplateError
+
+logger = logging.getLogger(__name__)
+logger.setLevel(os.getenv("VERL_LOGGING_LEVEL", "WARN"))
+
+
+def initialize_system_prompt(tokenizer, **apply_chat_template_kwargs) -> list[int]:
+    """
+    Initialize system prompt tokens for chat templates that support them.
+
+    Args:
+        tokenizer: The tokenizer with a chat template
+        **apply_chat_template_kwargs: Additional arguments for apply_chat_template
+
+    Returns:
+        List of token IDs for the system prompt, or empty list if not supported
+    """
+    try:
+        return tokenizer.apply_chat_template(
+            [{}], add_generation_prompt=False, tokenize=True, **apply_chat_template_kwargs
+        )
+    except TemplateError as e:
+        logger.warning(f"Chat template does not support system prompt: {e}")
+        return []