From fb0a5b034a0d92942e633f4326a4411f49ffd0f4 Mon Sep 17 00:00:00 2001 From: Behrooz Date: Sun, 2 Nov 2025 19:37:38 -0800 Subject: [PATCH 1/2] fix: Remove chat template setting from non-SFT trainer scripts Resolves #4404 - Remove SIMPLE_CHAT_TEMPLATE import from 7 trainer scripts - Remove chat template setting for non-SFT trainers (DPO, CPO, ORPO, PPO, Nash-MD, XPO, Online DPO) - Chat templates only make sense for SFT (instruction tuning), not for preference optimization or reward-based training - Scripts modified: - examples/scripts/online_dpo.py - examples/scripts/orpo.py - examples/scripts/cpo.py - examples/scripts/nash_md.py - examples/scripts/xpo.py - examples/scripts/ppo/ppo.py - examples/scripts/ppo/ppo_tldr.py --- examples/scripts/cpo.py | 3 --- examples/scripts/nash_md.py | 3 --- examples/scripts/online_dpo.py | 3 --- examples/scripts/orpo.py | 3 --- examples/scripts/ppo/ppo.py | 3 --- examples/scripts/ppo/ppo_tldr.py | 3 --- examples/scripts/xpo.py | 3 --- 7 files changed, 21 deletions(-) diff --git a/examples/scripts/cpo.py b/examples/scripts/cpo.py index 2d9049136c6..fef9cdf1247 100644 --- a/examples/scripts/cpo.py +++ b/examples/scripts/cpo.py @@ -64,7 +64,6 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser from trl import CPOConfig, CPOTrainer, ModelConfig, ScriptArguments, get_peft_config -from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE # Enable logging in a Hugging Face Space @@ -90,8 +89,6 @@ # Dataset ################ dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config) - if tokenizer.chat_template is None: - tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE ################ # Training diff --git a/examples/scripts/nash_md.py b/examples/scripts/nash_md.py index e3f1486c75d..fdb8ca09a3e 100644 --- a/examples/scripts/nash_md.py +++ b/examples/scripts/nash_md.py @@ -73,7 +73,6 @@ get_kbit_device_map, get_quantization_config, ) -from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE # Enable logging in a Hugging Face Space @@ -128,8 +127,6 @@ ) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token - if tokenizer.chat_template is None: - tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config) diff --git a/examples/scripts/online_dpo.py b/examples/scripts/online_dpo.py index 91569c8b4f1..4ed7afe884d 100644 --- a/examples/scripts/online_dpo.py +++ b/examples/scripts/online_dpo.py @@ -69,7 +69,6 @@ get_peft_config, get_quantization_config, ) -from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE # Enable logging in a Hugging Face Space @@ -131,8 +130,6 @@ trust_remote_code=model_args.trust_remote_code, **model_kwargs, ) - if tokenizer.chat_template is None: - tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE if tokenizer.pad_token_id is None: tokenizer.pad_token = tokenizer.eos_token diff --git a/examples/scripts/orpo.py b/examples/scripts/orpo.py index d392bb7bf1b..e256a4277ad 100644 --- a/examples/scripts/orpo.py +++ b/examples/scripts/orpo.py @@ -64,7 +64,6 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser from trl import ModelConfig, ORPOConfig, ORPOTrainer, ScriptArguments, get_peft_config -from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE # Enable logging in a Hugging Face Space @@ -91,8 +90,6 @@ # Dataset ################ dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config) - if tokenizer.chat_template is None: - tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE ################ # Training diff --git a/examples/scripts/ppo/ppo.py b/examples/scripts/ppo/ppo.py index 5dfcda55429..2f5471996c2 100644 --- a/examples/scripts/ppo/ppo.py +++ b/examples/scripts/ppo/ppo.py @@ -43,7 +43,6 @@ get_peft_config, get_quantization_config, ) -from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE # Enable logging in a Hugging Face Space @@ -106,8 +105,6 @@ model_args.model_name_or_path, padding_side="left", trust_remote_code=model_args.trust_remote_code ) tokenizer.add_special_tokens({"pad_token": "[PAD]"}) - if tokenizer.chat_template is None: - tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE value_model = AutoModelForSequenceClassification.from_pretrained( training_args.reward_model_path, trust_remote_code=model_args.trust_remote_code, num_labels=1 ) diff --git a/examples/scripts/ppo/ppo_tldr.py b/examples/scripts/ppo/ppo_tldr.py index 4ef1cf4e7b6..7962758ec40 100644 --- a/examples/scripts/ppo/ppo_tldr.py +++ b/examples/scripts/ppo/ppo_tldr.py @@ -43,7 +43,6 @@ get_peft_config, get_quantization_config, ) -from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE # Enable logging in a Hugging Face Space @@ -113,8 +112,6 @@ model_args.model_name_or_path, padding_side="left", trust_remote_code=model_args.trust_remote_code ) tokenizer.add_special_tokens({"pad_token": "[PAD]"}) - if tokenizer.chat_template is None: - tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE value_model = AutoModelForSequenceClassification.from_pretrained( training_args.reward_model_path, trust_remote_code=model_args.trust_remote_code, num_labels=1 ) diff --git a/examples/scripts/xpo.py b/examples/scripts/xpo.py index e4e7c6301a6..70c13226c5d 100644 --- a/examples/scripts/xpo.py +++ b/examples/scripts/xpo.py @@ -57,7 +57,6 @@ get_kbit_device_map, get_quantization_config, ) -from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE # Enable logging in a Hugging Face Space @@ -113,8 +112,6 @@ ) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token - if tokenizer.chat_template is None: - tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config) From ee5827cbe5ea579b8afe5913d8c017a1d1f7b01b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Mon, 3 Nov 2025 15:26:46 +0000 Subject: [PATCH 2/2] same for test --- tests/test_cpo_trainer.py | 3 --- tests/test_gkd_trainer.py | 5 ----- tests/test_orpo_trainer.py | 3 --- tests/test_ppo_trainer.py | 4 ---- trl/trainer/utils.py | 3 --- 5 files changed, 18 deletions(-) diff --git a/tests/test_cpo_trainer.py b/tests/test_cpo_trainer.py index 19833a414ff..01c581eb53f 100644 --- a/tests/test_cpo_trainer.py +++ b/tests/test_cpo_trainer.py @@ -17,7 +17,6 @@ from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer from trl import CPOConfig, CPOTrainer -from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE from .testing_utils import TrlTestCase, require_peft @@ -33,7 +32,6 @@ def setup_method(self): model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration" self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id) - self.t5_tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE @pytest.mark.parametrize( "name, loss_type, config_name", @@ -41,7 +39,6 @@ def setup_method(self): ("qwen", "sigmoid", "standard_preference"), ("t5", "hinge", "standard_implicit_prompt_preference"), ("qwen", "ipo", "conversational_preference"), - ("t5", "ipo", "conversational_implicit_prompt_preference"), ("qwen", "simpo", "standard_preference"), ("t5", "simpo", "standard_implicit_prompt_preference"), ("qwen", "hinge", "conversational_preference"), diff --git a/tests/test_gkd_trainer.py b/tests/test_gkd_trainer.py index 0a13d9db12a..6516a88f94e 100644 --- a/tests/test_gkd_trainer.py +++ b/tests/test_gkd_trainer.py @@ -21,7 +21,6 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig from trl import GKDConfig, GKDTrainer -from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE from .testing_utils import TrlTestCase, require_liger_kernel @@ -206,10 +205,6 @@ def setup_method(self): self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer.pad_token = self.tokenizer.eos_token - # Ensure the tokenizer has a chat template - if not hasattr(self.tokenizer, "chat_template") or self.tokenizer.chat_template is None: - self.tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE - def test_gkd_trainer(self): training_args = GKDConfig( output_dir=self.tmp_dir, diff --git a/tests/test_orpo_trainer.py b/tests/test_orpo_trainer.py index f882cf756f8..d91654fc7ba 100644 --- a/tests/test_orpo_trainer.py +++ b/tests/test_orpo_trainer.py @@ -17,7 +17,6 @@ from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer from trl import ORPOConfig, ORPOTrainer -from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE from .testing_utils import TrlTestCase, require_peft @@ -33,7 +32,6 @@ def setup_method(self): model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration" self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id) - self.t5_tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE @pytest.mark.parametrize( "name, config_name", @@ -41,7 +39,6 @@ def setup_method(self): ("qwen", "standard_preference"), ("t5", "standard_implicit_prompt_preference"), ("qwen", "conversational_preference"), - ("t5", "conversational_implicit_prompt_preference"), ], ) def test_orpo_trainer(self, name, config_name): diff --git a/tests/test_ppo_trainer.py b/tests/test_ppo_trainer.py index 6e62e742115..317bab51351 100644 --- a/tests/test_ppo_trainer.py +++ b/tests/test_ppo_trainer.py @@ -19,7 +19,6 @@ from transformers.utils import is_peft_available from trl import PPOConfig, PPOTrainer -from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE from .testing_utils import TrlTestCase, require_peft @@ -37,9 +36,6 @@ def setup_method(self): self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, padding_side="left") self.tokenizer.add_special_tokens({"pad_token": "[PAD]"}) - if self.tokenizer.chat_template is None: - self.tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE - # Add reward and value models as in ppo.py reward_model_id = "trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5" self.value_model = AutoModelForSequenceClassification.from_pretrained(reward_model_id, num_labels=1) diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py index e8130d694d7..f9014b61506 100644 --- a/trl/trainer/utils.py +++ b/trl/trainer/utils.py @@ -738,9 +738,6 @@ def print_rich_table(df: pd.DataFrame) -> None: console.print(table) -SIMPLE_SFT_CHAT_TEMPLATE = "{% for message in messages %}{{' ' + message['content']}}{% endfor %}{{ eos_token }}" -# SIMPLE_SFT_CHAT_TEMPLATE simply ends things with an EOS token, this helps the SFT model learn to end the completions with EOS tokens - SIMPLE_CHAT_TEMPLATE = "{% for message in messages %}{{message['role'].capitalize() + ': ' + message['content'] + '\n\n'}}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}"