Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions verl/experimental/agent_loop/agent_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,9 +491,7 @@ async def _agent_loop_postprocess(self, output, **kwargs) -> _InternalAgentLoopO
# Only support Qwen2VLImageProcessor for multi-modal processing currently
# TODO: support other multi-modal inputs
multi_modal_inputs = None
if self.processor is not None and "Qwen2VLImageProcessor" in self.processor.image_processor.__class__.__name__:
from verl.models.transformers.qwen2_vl import get_rope_index

if self.processor is not None:
images = getattr(output, "multi_modal_data", {}).get("image", None)
current_text = self.tokenizer.decode(input_ids.squeeze(0), skip_special_tokens=True)
multi_modal_inputs = self.processor(text=[current_text], images=images, return_tensors="pt")
Expand All @@ -502,7 +500,9 @@ async def _agent_loop_postprocess(self, output, **kwargs) -> _InternalAgentLoopO

# We must use dict(multi_modal_inputs) to convert BatchFeature values to a new dict
# because np.array() only keeps the keys for BatchFeature.
multi_modal_inputs = dict(multi_modal_inputs)
multi_modal_inputs = dict(multi_modal_inputs.convert_to_tensors("pt"))
if self.processor is not None and "Qwen2VLImageProcessor" in self.processor.image_processor.__class__.__name__:
from verl.models.transformers.qwen2_vl import get_rope_index

image_grid_thw = multi_modal_inputs.get("image_grid_thw")
video_grid_thw = multi_modal_inputs.get("video_grid_thw")
Expand Down
6 changes: 3 additions & 3 deletions verl/experimental/agent_loop/tool_agent_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from verl.interactions.utils.interaction_registry import initialize_interactions_from_config
from verl.tools.schemas import ToolResponse
from verl.tools.utils.tool_registry import initialize_tools_from_config
from verl.utils.chat_template import initialize_system_prompt
from verl.utils.profiler import simple_timer
from verl.utils.rollout_trace import rollout_trace_op

Expand Down Expand Up @@ -108,9 +109,8 @@ def init_class(cls, config, tokenizer, processor, **kwargs):
cls.apply_chat_template_kwargs = config.data.get("apply_chat_template_kwargs", {})
cls.prompt_length = config.actor_rollout_ref.rollout.prompt_length
cls.response_length = config.actor_rollout_ref.rollout.response_length
cls.system_prompt = tokenizer.apply_chat_template(
[{}], add_generation_prompt=False, tokenize=True, **cls.apply_chat_template_kwargs
)
cls.system_prompt = initialize_system_prompt(cls.tokenizer, **cls.apply_chat_template_kwargs)

# Initialize interactions from config file
cls.interaction_config_file = config.actor_rollout_ref.rollout.multi_turn.interaction_config_path
if cls.interaction_config_file:
Expand Down
1 change: 1 addition & 0 deletions verl/trainer/config/_generated_ppo_megatron_trainer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ reward_model:
path: ~/models/FsfairX-LLaMA3-RM-v0.1
external_lib: ${actor_rollout_ref.model.external_lib}
trust_remote_code: false
override_config: {}
micro_batch_size: null
micro_batch_size_per_gpu: null
max_length: null
Expand Down
1 change: 1 addition & 0 deletions verl/trainer/config/_generated_ppo_trainer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,7 @@ reward_model:
path: ~/models/FsfairX-LLaMA3-RM-v0.1
external_lib: ${actor_rollout_ref.model.external_lib}
trust_remote_code: false
override_config: {}
use_shm: false
use_remove_padding: false
use_fused_kernels: ${actor_rollout_ref.model.use_fused_kernels}
Expand Down
3 changes: 3 additions & 0 deletions verl/trainer/config/reward_model/reward_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ model:
# Whether to enable loading a remote code model, default to False
trust_remote_code: False

# override hf config
override_config: {}

# [Deprecated] Global micro batch size
# will be deprecated, use micro_batch_size_per_gpu
micro_batch_size: null
Expand Down
28 changes: 28 additions & 0 deletions verl/utils/chat_template.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright 2025 Bytedance Ltd. and/or its affiliates
import logging
import os

from jinja2 import TemplateError

logger = logging.getLogger(__name__)
logger.setLevel(os.getenv("VERL_LOGGING_LEVEL", "WARN"))


def initialize_system_prompt(tokenizer, **apply_chat_template_kwargs) -> list[int]:
"""
Initialize system prompt tokens for chat templates that support them.

Args:
tokenizer: The tokenizer with a chat template
**apply_chat_template_kwargs: Additional arguments for apply_chat_template

Returns:
List of token IDs for the system prompt, or empty list if not supported
"""
try:
return tokenizer.apply_chat_template(
[{}], add_generation_prompt=False, tokenize=True, **apply_chat_template_kwargs
)
except TemplateError as e:
logger.warning(f"Chat template does not support system prompt: {e}")
return []