From e93a1b7ad12f458d029a2e7a2d695b4c22f20e6d Mon Sep 17 00:00:00 2001 From: "peng.wu" Date: Wed, 26 Nov 2025 07:02:32 +0800 Subject: [PATCH 1/4] compat Signed-off-by: peng.wu --- verl/experimental/agent_loop/agent_loop.py | 8 ++++---- .../_generated_ppo_megatron_trainer.yaml | 1 + .../config/_generated_ppo_trainer.yaml | 1 + .../config/reward_model/reward_model.yaml | 3 +++ verl/utils/chat_template.py | 19 +++++++++++++++++++ 5 files changed, 28 insertions(+), 4 deletions(-) create mode 100644 verl/utils/chat_template.py diff --git a/verl/experimental/agent_loop/agent_loop.py b/verl/experimental/agent_loop/agent_loop.py index 03d5415134f..883534bdc1f 100644 --- a/verl/experimental/agent_loop/agent_loop.py +++ b/verl/experimental/agent_loop/agent_loop.py @@ -491,9 +491,7 @@ async def _agent_loop_postprocess(self, output, **kwargs) -> _InternalAgentLoopO # Only support Qwen2VLImageProcessor for multi-modal processing currently # TODO: support other multi-modal inputs multi_modal_inputs = None - if self.processor is not None and "Qwen2VLImageProcessor" in self.processor.image_processor.__class__.__name__: - from verl.models.transformers.qwen2_vl import get_rope_index - + if self.processor is not None: images = getattr(output, "multi_modal_data", {}).get("image", None) current_text = self.tokenizer.decode(input_ids.squeeze(0), skip_special_tokens=True) multi_modal_inputs = self.processor(text=[current_text], images=images, return_tensors="pt") @@ -502,7 +500,9 @@ async def _agent_loop_postprocess(self, output, **kwargs) -> _InternalAgentLoopO # We must use dict(multi_modal_inputs) to convert BatchFeature values to a new dict # because np.array() only keeps the keys for BatchFeature. - multi_modal_inputs = dict(multi_modal_inputs) + multi_modal_inputs = dict(multi_modal_inputs.convert_to_tensors("pt")) + if self.processor is not None and "Qwen2VLImageProcessor" in self.processor.image_processor.__class__.__name__: + from verl.models.transformers.qwen2_vl import get_rope_index image_grid_thw = multi_modal_inputs.get("image_grid_thw") video_grid_thw = multi_modal_inputs.get("video_grid_thw") diff --git a/verl/trainer/config/_generated_ppo_megatron_trainer.yaml b/verl/trainer/config/_generated_ppo_megatron_trainer.yaml index f92ba0586af..1c42f66b22d 100644 --- a/verl/trainer/config/_generated_ppo_megatron_trainer.yaml +++ b/verl/trainer/config/_generated_ppo_megatron_trainer.yaml @@ -506,6 +506,7 @@ reward_model: path: ~/models/FsfairX-LLaMA3-RM-v0.1 external_lib: ${actor_rollout_ref.model.external_lib} trust_remote_code: false + override_config: {} micro_batch_size: null micro_batch_size_per_gpu: null max_length: null diff --git a/verl/trainer/config/_generated_ppo_trainer.yaml b/verl/trainer/config/_generated_ppo_trainer.yaml index c9f88f013da..3ce021d675a 100644 --- a/verl/trainer/config/_generated_ppo_trainer.yaml +++ b/verl/trainer/config/_generated_ppo_trainer.yaml @@ -437,6 +437,7 @@ reward_model: path: ~/models/FsfairX-LLaMA3-RM-v0.1 external_lib: ${actor_rollout_ref.model.external_lib} trust_remote_code: false + override_config: {} use_shm: false use_remove_padding: false use_fused_kernels: ${actor_rollout_ref.model.use_fused_kernels} diff --git a/verl/trainer/config/reward_model/reward_model.yaml b/verl/trainer/config/reward_model/reward_model.yaml index e9ffc60fbc6..dde8a814f14 100644 --- a/verl/trainer/config/reward_model/reward_model.yaml +++ b/verl/trainer/config/reward_model/reward_model.yaml @@ -34,6 +34,9 @@ model: # Whether to enable loading a remote code model, default to False trust_remote_code: False + # override hf config + override_config: {} + # [Deprecated] Global micro batch size # will be deprecated, use micro_batch_size_per_gpu micro_batch_size: null diff --git a/verl/utils/chat_template.py b/verl/utils/chat_template.py new file mode 100644 index 00000000000..086b2bb77c3 --- /dev/null +++ b/verl/utils/chat_template.py @@ -0,0 +1,19 @@ +# Copyright 2025 Bytedance Ltd. and/or its affiliates +import logging +import os + +from jinja2 import TemplateError + +logger = logging.getLogger(__file__) +logger.setLevel(os.getenv("VERL_LOGGING_LEVEL", "WARN")) + + +def initialize_system_prompt(tokenizer, apply_chat_template_kwargs): + try: + # {% if loop.first and message['role'] != 'system' %} matches the system prompt + return tokenizer.apply_chat_template( + [{}], add_generation_prompt=False, tokenize=True, **apply_chat_template_kwargs + ) + except TemplateError as e: + print(f"chat_template not support system prompt: {e}") + return [] From 89d9925a7e07b833795a35884b106eea8843b762 Mon Sep 17 00:00:00 2001 From: "peng.wu" Date: Wed, 26 Nov 2025 07:06:21 +0800 Subject: [PATCH 2/4] allow no sys prompt Signed-off-by: peng.wu --- verl/experimental/agent_loop/tool_agent_loop.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/verl/experimental/agent_loop/tool_agent_loop.py b/verl/experimental/agent_loop/tool_agent_loop.py index 73ebaa35929..5f4fd78a661 100644 --- a/verl/experimental/agent_loop/tool_agent_loop.py +++ b/verl/experimental/agent_loop/tool_agent_loop.py @@ -27,6 +27,7 @@ from verl.interactions.utils.interaction_registry import initialize_interactions_from_config from verl.tools.schemas import ToolResponse from verl.tools.utils.tool_registry import initialize_tools_from_config +from verl.utils.chat_template import initialize_system_prompt from verl.utils.profiler import simple_timer from verl.utils.rollout_trace import rollout_trace_op @@ -108,9 +109,8 @@ def init_class(cls, config, tokenizer, processor, **kwargs): cls.apply_chat_template_kwargs = config.data.get("apply_chat_template_kwargs", {}) cls.prompt_length = config.actor_rollout_ref.rollout.prompt_length cls.response_length = config.actor_rollout_ref.rollout.response_length - cls.system_prompt = tokenizer.apply_chat_template( - [{}], add_generation_prompt=False, tokenize=True, **cls.apply_chat_template_kwargs - ) + cls.system_prompt = initialize_system_prompt(cls.tokenizer, cls.apply_chat_template_kwargs) + # Initialize interactions from config file cls.interaction_config_file = config.actor_rollout_ref.rollout.multi_turn.interaction_config_path if cls.interaction_config_file: From e3a1fabea10b44fbcd3678109c56f951dc0060e1 Mon Sep 17 00:00:00 2001 From: "peng.wu" Date: Wed, 26 Nov 2025 07:17:02 +0800 Subject: [PATCH 3/4] template Signed-off-by: peng.wu --- .../agent_loop/tool_agent_loop.py | 2 +- verl/utils/chat_template.py | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/verl/experimental/agent_loop/tool_agent_loop.py b/verl/experimental/agent_loop/tool_agent_loop.py index 5f4fd78a661..80d1ee3d0c2 100644 --- a/verl/experimental/agent_loop/tool_agent_loop.py +++ b/verl/experimental/agent_loop/tool_agent_loop.py @@ -109,7 +109,7 @@ def init_class(cls, config, tokenizer, processor, **kwargs): cls.apply_chat_template_kwargs = config.data.get("apply_chat_template_kwargs", {}) cls.prompt_length = config.actor_rollout_ref.rollout.prompt_length cls.response_length = config.actor_rollout_ref.rollout.response_length - cls.system_prompt = initialize_system_prompt(cls.tokenizer, cls.apply_chat_template_kwargs) + cls.system_prompt = initialize_system_prompt(cls.tokenizer, **cls.apply_chat_template_kwargs) # Initialize interactions from config file cls.interaction_config_file = config.actor_rollout_ref.rollout.multi_turn.interaction_config_path diff --git a/verl/utils/chat_template.py b/verl/utils/chat_template.py index 086b2bb77c3..f4bc44e2187 100644 --- a/verl/utils/chat_template.py +++ b/verl/utils/chat_template.py @@ -4,16 +4,25 @@ from jinja2 import TemplateError -logger = logging.getLogger(__file__) +logger = logging.getLogger(__name__) logger.setLevel(os.getenv("VERL_LOGGING_LEVEL", "WARN")) -def initialize_system_prompt(tokenizer, apply_chat_template_kwargs): +def initialize_system_prompt(tokenizer, **apply_chat_template_kwargs) -> list[int]: + """ + Initialize system prompt tokens for chat templates that support them. + + Args: + tokenizer: The tokenizer with a chat template + **apply_chat_template_kwargs: Additional arguments for apply_chat_template + + Returns: + List of token IDs for the system prompt, or empty list if not supported + """ try: - # {% if loop.first and message['role'] != 'system' %} matches the system prompt return tokenizer.apply_chat_template( [{}], add_generation_prompt=False, tokenize=True, **apply_chat_template_kwargs ) - except TemplateError as e: - print(f"chat_template not support system prompt: {e}") + except (TemplateError, Exception) as e: + logger.warning(f"Chat template does not support system prompt: {e}") return [] From 75bc8e50705a3a8f98b8e6964732f5b808d2eebc Mon Sep 17 00:00:00 2001 From: "peng.wu" Date: Wed, 26 Nov 2025 07:27:30 +0800 Subject: [PATCH 4/4] resolve comments Signed-off-by: peng.wu --- verl/utils/chat_template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/verl/utils/chat_template.py b/verl/utils/chat_template.py index f4bc44e2187..70b30452c01 100644 --- a/verl/utils/chat_template.py +++ b/verl/utils/chat_template.py @@ -23,6 +23,6 @@ def initialize_system_prompt(tokenizer, **apply_chat_template_kwargs) -> list[in return tokenizer.apply_chat_template( [{}], add_generation_prompt=False, tokenize=True, **apply_chat_template_kwargs ) - except (TemplateError, Exception) as e: + except TemplateError as e: logger.warning(f"Chat template does not support system prompt: {e}") return []