fix bug

崔博 · 崔博 · commit 13da23dd7b22 · 2025-07-01T07:19:59.000Z
diff --git a/verl/workers/rollout/sglang_rollout/sglang_rollout.py b/verl/workers/rollout/sglang_rollout/sglang_rollout.py
@@ -35,7 +35,8 @@
     ResumeMemoryOccupationReqInput,
     UpdateWeightsFromTensorReqInput,
 )
-from sglang.srt.openai_api.protocol import Tool
+
+# from sglang.srt.openai_api.protocol import Tool
 from sglang.srt.sampling.sampling_params import SamplingParams
 from sglang.srt.server_args import ServerArgs
 from sglang.srt.utils import (
@@ -135,9 +136,6 @@ def __init__(self, **kwargs):
 
     async def release_memory_occupation(self, tags: Optional[list[str]] = None):
         """Release GPU occupation temporarily."""
-        if self._need_reload:
-            await self.release_memory_occupation()
-            self._need_reload = False
         if tags is None:
             obj = ReleaseMemoryOccupationReqInput()
         else:
@@ -149,7 +147,9 @@ async def resume_memory_occupation(self, tags: Optional[list[str]] = None):
         # because __init__ is a sync method, it can not call the async release_memory_occupation
         # have to move release_memory_occupation from __init__ to here
         # For multi-stage awake, we run release weight and kv_cache when we resume weights for the first time.
-        await self.release_memory_occupation()
+        if self._need_reload:
+            await self.release_memory_occupation()
+            self._need_reload = False
 
         if tags is None:
             obj = ResumeMemoryOccupationReqInput()
diff --git a/verl/workers/sharding_manager/megatron_sglang.py b/verl/workers/sharding_manager/megatron_sglang.py
@@ -110,6 +110,7 @@ def offload_manager(self):
             if self.offload_param:
                 offload_megatron_model_to_cpu(self.actor_module)
             get_torch_device().empty_cache()
+            torch.distributed.barrier()
 
             if self.multi_stage_wake_up:
                 loop.run_until_complete(self.resume_memory(tags=["kv_cache"]))