diff --git a/verl/experimental/fully_async_policy/agent_loop/agent_loop.py b/verl/experimental/fully_async_policy/agent_loop/agent_loop.py index 2502de62fff..5918624f031 100644 --- a/verl/experimental/fully_async_policy/agent_loop/agent_loop.py +++ b/verl/experimental/fully_async_policy/agent_loop/agent_loop.py @@ -17,7 +17,6 @@ from typing import Any, Optional import ray -import torch from omegaconf import DictConfig from verl.experimental.agent_loop.agent_loop import ( @@ -96,11 +95,10 @@ async def generate( final_output.token_ids.extend(output.token_ids) if output.log_probs is not None: final_output.log_probs.extend(output.log_probs) + # sglang returns routed_experts for the full sequence (prompt + all tokens), + # so on partial rollout resume the new output already covers all positions. if output.routed_experts is not None: - if final_output.routed_experts is None: - final_output.routed_experts = output.routed_experts - else: - final_output.routed_experts = torch.cat([final_output.routed_experts, output.routed_experts], dim=0) + final_output.routed_experts = output.routed_experts if output.num_preempted is not None: final_output.num_preempted += output.num_preempted final_output.stop_reason = output.stop_reason