update trl version (#3117)

modelscope · Feb 14, 2025 · b84854b · b84854b
1 parent e9503bb
commit b84854b
Show file tree

Hide file tree

Showing 11 changed files with 12 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -114,7 +114,7 @@ Running Environment:
 | transformers | >=4.33               | 4.48.3      |                                           |
 | modelscope   | >=1.19               |             |                                           |
 | peft         | >=0.11.0,<0.15.0     |             |                                           |
-| trl          | >=0.13,<0.16         | 0.14.0      | RLHF                                      |
+| trl          | >=0.13,<0.16         | 0.15      | RLHF                                      |
 | deepspeed    | >=0.14 |  | Training                                  |
 | vllm         | >=0.5.1              | 0.6.5       | Inference/Deployment/Evaluation           |
 | lmdeploy     | lmdeploy>=0.5,<0.6.5 | 0.6.4       | Inference/Deployment/Evaluation           |

diff --git a/README_CN.md b/README_CN.md
@@ -109,7 +109,7 @@ pip install -e .
 | transformers | >=4.33 | 4.48.3 ||
 | modelscope | >=1.19 |  ||
 | peft | >=0.11.0,<0.15.0 | ||
-| trl | >=0.13,<0.16 | 0.14.0 |RLHF|
+| trl | >=0.13,<0.16 | 0.15 |RLHF|
 | deepspeed | >=0.14 |  |训练|
 | vllm | >=0.5.1 | 0.6.5 |推理/部署/评测|
 | lmdeploy | lmdeploy>=0.5,<0.6.5 | 0.6.4 |推理/部署/评测|

diff --git a/docs/source/GetStarted/SWIFT安装.md b/docs/source/GetStarted/SWIFT安装.md
@@ -60,7 +60,7 @@ pip install ms-swift==2.*
 | transformers | >=4.33 | 4.48.3 ||
 | modelscope | >=1.19 |  ||
 | peft | >=0.11.0,<0.15.0 | ||
-| trl | >=0.13,<0.16 | 0.14.0 |RLHF|
+| trl | >=0.13,<0.16 | 0.15 |RLHF|
 | deepspeed | >=0.14 |  |训练|
 | vllm | >=0.5.1 | 0.6.5 |推理/部署/评测|
 | lmdeploy | lmdeploy>=0.5,<0.6.5 | 0.6.4 |推理/部署/评测|

diff --git a/docs/source/Instruction/GRPO.md b/docs/source/Instruction/GRPO.md
@@ -7,7 +7,7 @@
 环境安装
 ```bash
 pip install math_verify # reward function
-pip install git+https://github.com/huggingface/trl.git # trl>=0.15.0.dev0
+pip install "trl>=0.15"
 ```
 
 **注意**：训练过程中 loss 接近0 是正常情况， 参考[issue](https://github.com/huggingface/open-r1/issues/239#issuecomment-2646297851)

diff --git a/docs/source_en/GetStarted/SWIFT-installation.md b/docs/source_en/GetStarted/SWIFT-installation.md
@@ -61,7 +61,7 @@ You can view the image [here](https://modelscope.cn/docs/intro/environment-setup
 | transformers | >=4.33               | 4.48.3      |                                           |
 | modelscope   | >=1.19               |             |                                           |
 | peft         | >=0.11.0,<0.15.0     |             |                                           |
-| trl          | >=0.13,<0.16         | 0.14.0      | RLHF                                      |
+| trl          | >=0.13,<0.16         | 0.15      | RLHF                                      |
 | deepspeed    | >=0.14 |  | Training                                  |
 | vllm         | >=0.5.1              | 0.6.5       | Inference/Deployment/Evaluation           |
 | lmdeploy     | lmdeploy>=0.5,<0.6.5 | 0.6.4       | Inference/Deployment/Evaluation           |

diff --git a/docs/source_en/Instruction/GRPO.md b/docs/source_en/Instruction/GRPO.md
@@ -8,7 +8,7 @@ environments
 
 ```bash
 pip install math_verify # reward function
-pip install git+https://github.com/huggingface/trl.git # trl>=0.15.0.dev0
+pip install "trl>=0.15"
 ```
 
 **Note**: It is normal for the loss to approach zero during training. Refer to this [issue](https://github.com/huggingface/open-r1/issues/239#issuecomment-2646297851) for more details.

diff --git a/examples/train/grpo/full_vllm.sh b/examples/train/grpo/full_vllm.sh
@@ -1,6 +1,6 @@
 # One GPU is left for vLLM inference acceleration.
 # pip install math_verify # reward function
-# pip install git+https://github.com/huggingface/trl.git # trl>=0.15.0.dev0
+# pip install "trl>=0.15"
 # GPU memory: 8 * 80GiB
 
 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \

diff --git a/examples/train/grpo/grpo.sh b/examples/train/grpo/grpo.sh
@@ -1,5 +1,5 @@
 # pip install math_verify # reward function
-# pip install git+https://github.com/huggingface/trl.git # trl>=0.15.0.dev0
+# pip install "trl>=0.15"
 # GPU memory: 80GiB
 # You can set `--reward_model` to use a reward model to provide rewards.
 CUDA_VISIBLE_DEVICES=0 \

diff --git a/examples/train/grpo/multi_node/multi_node1.sh b/examples/train/grpo/multi_node/multi_node1.sh
@@ -1,5 +1,5 @@
 # pip install math_verify # reward function
-# pip install git+https://github.com/huggingface/trl.git # trl>=0.15.0.dev0
+# pip install "trl>=0.15"
 export CUDA_VISIBLE_DEVICES=0,1,2,3
 export NNODES=2
 export NODE_RANK=0

diff --git a/examples/train/grpo/plugin/run_external_rm.sh b/examples/train/grpo/plugin/run_external_rm.sh
@@ -1,5 +1,5 @@
 # pip install math_verify # reward function
-# pip install git+https://github.com/huggingface/trl.git # trl>=0.15.0.dev0
+# pip install "trl>=0.15"
 # GPU memory: 80GiB
 
 CUDA_VISIBLE_DEVICES=0 \

diff --git a/swift/trainers/rlhf_trainer/grpo_trainer.py b/swift/trainers/rlhf_trainer/grpo_trainer.py
@@ -11,6 +11,7 @@
 import torch.nn as nn
 from accelerate.utils import broadcast_object_list, gather, gather_object
 from transformers import PreTrainedModel
+from transformers.utils.versions import require_version
 from trl import GRPOTrainer as HFGRPOTrainer
 from trl.models import unwrap_model_for_generation
 
@@ -38,7 +39,7 @@ def __init__(self,
                  reward_funcs: Optional[List[Union[str, Callable]]] = None,
                  *_args,
                  **kwargs):
-
+        require_version('trl>=0.15')
         args = kwargs['args']
 
         self.processing_class = kwargs.get('template').tokenizer