From 5b54789053481b67a672f5ce69dced2190b283c0 Mon Sep 17 00:00:00 2001 From: ZhihaoSun Date: Thu, 2 Apr 2026 13:18:58 +0800 Subject: [PATCH 1/2] fix(trainer): supplement dfed770 by adding missing update_weights in sdk trainer to fix vllm engine weight loss and Ascend PositionEmbedding OOB error --- rllm/trainer/verl/agent_sdk_trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rllm/trainer/verl/agent_sdk_trainer.py b/rllm/trainer/verl/agent_sdk_trainer.py index 0d59b7c19..d46e978f0 100644 --- a/rllm/trainer/verl/agent_sdk_trainer.py +++ b/rllm/trainer/verl/agent_sdk_trainer.py @@ -175,6 +175,7 @@ def fit_agent(self): self.global_steps = 0 self._load_checkpoint() + self.checkpoint_manager.update_weights(self.global_steps) start_time = time.time() if self.config.trainer.get("val_before_train", True): From af297cac89ed84c408a326b4d58b39665b96f448 Mon Sep 17 00:00:00 2001 From: ZhihaoSun Date: Sat, 4 Apr 2026 11:04:01 +0800 Subject: [PATCH 2/2] additional fixes of sdk trainer --- rllm/trainer/verl/agent_sdk_trainer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rllm/trainer/verl/agent_sdk_trainer.py b/rllm/trainer/verl/agent_sdk_trainer.py index d46e978f0..7880f6f76 100644 --- a/rllm/trainer/verl/agent_sdk_trainer.py +++ b/rllm/trainer/verl/agent_sdk_trainer.py @@ -214,6 +214,7 @@ def fit_agent(self): with marked_timer("step", timing_raw): # generate trajectories final_gen_batch_output = self.generate_trajectories(batch=new_batch, timing_raw=timing_raw) + self.checkpoint_manager.sleep_replicas() # need to repeat to make shape match repeat_counts = final_gen_batch_output.meta_info["repeat_counts"] @@ -474,6 +475,9 @@ def fit_agent(self): with marked_timer("save_checkpoint", timing_raw, color="green"): self._save_checkpoint() + # update weights from trainer to rollout + with marked_timer("update_weights", timing_raw, color="red"): + self.checkpoint_manager.update_weights(self.global_steps) # Visualize some sample trajectories if batch is not None and len(batch) > 0: # Randomly select a few samples to visualize