Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/reducing_memory_usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -293,3 +293,5 @@ training_args = RLOOConfig(..., vllm_enable_sleep_mode=True)

</hfoption>
</hfoptions>

Offloading the vLLM weights and cache helps keep GPU memory usage low, which can be particularly beneficial when training large models or using limited GPU resources. However, waking the vLLM engine from sleep mode introduces some host–device transfer latency, which may slightly impact training speed.
7 changes: 4 additions & 3 deletions trl/experimental/gold/gold_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ class GOLDConfig(SFTConfig):
Frequency (in training steps) to synchronize student model weights to vLLM engine. Set to 1 to sync after
every step.
vllm_enable_sleep_mode (`bool`, *optional*, defaults to `False`):
Whether to enable sleep mode for the student vLLM engine. If set to `True`, the engine will enter sleep
mode after each training step to save resources.
Enable vLLM sleep mode to offload student weights/cache during the optimizer step. Keeps GPU memory usage
low, but waking the engine adds host–device transfer latency.
"""

_VALID_DICT_FIELDS = TrainingArguments._VALID_DICT_FIELDS + ["teacher_model_init_kwargs"]
Expand Down Expand Up @@ -313,7 +313,8 @@ class GOLDConfig(SFTConfig):
vllm_enable_sleep_mode: bool = field(
default=False,
metadata={
"help": "Whether to enable sleep mode for the colocated vLLM engine. When `True`, the engine sleeps during the optimizer step and wakes for weight sync and generation."
"help": "Enable vLLM sleep mode to offload student weights/cache during the optimizer step. Keeps GPU "
"memory usage low, but waking the engine adds host–device transfer latency."
},
)
# Parameters that control the logging
Expand Down
8 changes: 4 additions & 4 deletions trl/trainer/grpo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ class GRPOConfig(TrainingArguments):
`"colocate"`. If you are using `vllm_mode="server"`, this parameter must be passed separately when
launching the vLLM server via the `--vllm_tensor_parallel_size` flag.
vllm_enable_sleep_mode (`bool`, *optional*, defaults to `False`):
Whether to enable sleep mode for vLLM. If `True`, vLLM will sleep during the optimization step and woken
for weight sync and generation.
Enable vLLM sleep mode to offload weights/cache during the optimizer step. Keeps GPU memory usage low, but
waking the engine adds host–device transfer latency.

> Parameters that control the training

Expand Down Expand Up @@ -455,8 +455,8 @@ class GRPOConfig(TrainingArguments):
vllm_enable_sleep_mode: bool = field(
default=False,
metadata={
"help": "Whether to enable sleep mode for vLLM. If `True`, vLLM will sleep during the optimization step "
"and woken for weight sync and generation."
"help": "Enable vLLM sleep mode to offload weights/cache during the optimizer step. Keeps GPU memory "
"usage low, but waking the engine adds host–device transfer latency."
},
)
vllm_guided_decoding_regex: Optional[str] = field(
Expand Down
8 changes: 4 additions & 4 deletions trl/trainer/rloo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ class RLOOConfig(TrainingArguments):
`"colocate"`. If you are using `vllm_mode="server"`, this parameter must be passed separately when
launching the vLLM server via the `--vllm_tensor_parallel_size` flag.
vllm_enable_sleep_mode (`bool`, *optional*, defaults to `False`):
Whether to enable sleep mode for vLLM. If `True`, vLLM will sleep during the optimization step and woken
for weight sync and generation.
Enable vLLM sleep mode to offload weights/cache during the optimizer step. Keeps GPU memory usage low, but
waking the engine adds host–device transfer latency.

> Parameters that control the training

Expand Down Expand Up @@ -386,8 +386,8 @@ class RLOOConfig(TrainingArguments):
vllm_enable_sleep_mode: bool = field(
default=False,
metadata={
"help": "Whether to enable sleep mode for vLLM. If `True`, vLLM will sleep during the optimization step "
"and woken for weight sync and generation."
"help": "Enable vLLM sleep mode to offload weights/cache during the optimizer step. Keeps GPU memory "
"usage low, but waking the engine adds host–device transfer latency."
},
)
vllm_guided_decoding_regex: Optional[str] = field(
Expand Down
Loading