diff --git a/verl/utils/torch_functional.py b/verl/utils/torch_functional.py index 726ca1d5609..9754d989344 100644 --- a/verl/utils/torch_functional.py +++ b/verl/utils/torch_functional.py @@ -473,7 +473,9 @@ def get_constant_schedule_with_warmup( last_epoch: int = -1, ): def lr_lambda(current_step): - return min(1, float(current_step) / float(max(1, num_warmup_steps))) + if current_step < num_warmup_steps: + return float(current_step) / float(max(1.0, num_warmup_steps)) + return 1.0 return LambdaLR(optimizer, lr_lambda, last_epoch) diff --git a/verl/workers/fsdp_workers.py b/verl/workers/fsdp_workers.py index fffce53c062..99422f6fcfc 100644 --- a/verl/workers/fsdp_workers.py +++ b/verl/workers/fsdp_workers.py @@ -584,9 +584,9 @@ def update_actor(self, data: DataProto): metrics["perf/max_memory_reserved_gb"] = torch.cuda.max_memory_reserved() / (1024**3) metrics["perf/cpu_memory_used_gb"] = psutil.virtual_memory().used / (1024**3) - self.actor_lr_scheduler.step() lr = self.actor_lr_scheduler.get_last_lr()[0] metrics["actor/lr"] = lr + self.actor_lr_scheduler.step() # TODO: here, we should return all metrics output = DataProto(meta_info={"metrics": metrics})