From 1e8b331fa2c222a699cb3563f44f5702a7d6f50b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 1 Oct 2024 18:44:03 +0000 Subject: [PATCH 01/12] `eval_strategy="steps" if eval_dataset else "no"` --- tests/test_bco_trainer.py | 2 +- tests/test_kto_trainer.py | 2 +- tests/test_sft_trainer.py | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/test_bco_trainer.py b/tests/test_bco_trainer.py index 14e97c1c82a..6aea7e993c1 100644 --- a/tests/test_bco_trainer.py +++ b/tests/test_bco_trainer.py @@ -64,7 +64,7 @@ def test_bco_trainer(self, name, pre_compute, eval_dataset, config_name): max_steps=3, gradient_accumulation_steps=1, learning_rate=9e-1, - eval_strategy="steps", + eval_strategy="steps" if eval_dataset else "no", beta=0.1, precompute_ref_log_probs=pre_compute, report_to="none", diff --git a/tests/test_kto_trainer.py b/tests/test_kto_trainer.py index 6529877092b..ae43bd0c85d 100644 --- a/tests/test_kto_trainer.py +++ b/tests/test_kto_trainer.py @@ -61,7 +61,7 @@ def test_kto_trainer(self, name, loss_type, pre_compute, eval_dataset): remove_unused_columns=False, gradient_accumulation_steps=1, learning_rate=9e-1, - eval_strategy="steps", + eval_strategy="steps" if eval_dataset else "no", beta=0.1, precompute_ref_log_probs=pre_compute, loss_type=loss_type, diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 5b499b982c6..1057b815600 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -246,7 +246,6 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, - eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, @@ -1193,7 +1192,6 @@ def test_sft_trainer_skip_prepare_dataset_with_no_packing(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, - eval_strategy="steps", max_steps=4, eval_steps=2, save_steps=2, From 44558f84cc43e20254b567d608b44d059a14913b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 1 Oct 2024 19:10:06 +0000 Subject: [PATCH 02/12] tmp skip test --- tests/test_dpo_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py index 4518718f1cb..8f035297c4a 100644 --- a/tests/test_dpo_trainer.py +++ b/tests/test_dpo_trainer.py @@ -1049,7 +1049,7 @@ class DPOVisionTrainerTester(unittest.TestCase): @parameterized.expand( [ ["trl-internal-testing/tiny-random-idefics2"], - ["trl-internal-testing/tiny-random-paligemma"], + # ["trl-internal-testing/tiny-random-paligemma"], # temporarily disabled due to flaky tests ["trl-internal-testing/tiny-random-llava-1.5"], ] ) From a1ef7016286649fce10b3665159abcbfac2219e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 1 Oct 2024 19:10:39 +0000 Subject: [PATCH 03/12] drop `eval_strategy` in `test_sft_trainer_uncorrect_data` --- tests/test_sft_trainer.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 1057b815600..08a6656e940 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -264,7 +264,6 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, - eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, @@ -284,7 +283,6 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, - eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, @@ -302,7 +300,6 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, - eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, @@ -321,7 +318,6 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, - eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, @@ -339,7 +335,6 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, - eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, @@ -359,7 +354,6 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, - eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, @@ -381,7 +375,6 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, - eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, @@ -400,7 +393,6 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, - eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, From cb7fafa874b108ba91b29f15944b7c4a41705d6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 1 Oct 2024 19:30:24 +0000 Subject: [PATCH 04/12] remove eval strategy --- tests/test_sft_trainer.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 08a6656e940..20d59a5e02c 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -439,7 +439,6 @@ def test_sft_trainer_with_model_num_train_epochs(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, - eval_strategy="steps", max_steps=2, save_steps=1, num_train_epochs=2, @@ -466,7 +465,6 @@ def test_sft_trainer_with_model_num_train_epochs(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, - eval_strategy="steps", max_steps=2, save_steps=1, num_train_epochs=2, @@ -518,7 +516,6 @@ def test_sft_trainer_with_model(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, - eval_strategy="steps", max_steps=2, save_steps=1, per_device_train_batch_size=2, @@ -545,7 +542,6 @@ def test_sft_trainer_with_model(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, - eval_strategy="steps", max_steps=2, save_steps=1, per_device_train_batch_size=2, @@ -572,7 +568,6 @@ def test_sft_trainer_with_model(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, - eval_strategy="steps", max_steps=2, save_steps=1, per_device_train_batch_size=2, @@ -596,7 +591,6 @@ def test_sft_trainer_with_model(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, - eval_strategy="steps", max_steps=2, save_steps=1, per_device_train_batch_size=2, From 8c4d30789d18a0f762c347dec25d5a18703cda8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Thu, 24 Oct 2024 15:45:28 +0000 Subject: [PATCH 05/12] Add parameterized test for generate method --- tests/test_modeling_value_head.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_modeling_value_head.py b/tests/test_modeling_value_head.py index ede1b916175..28d8fb6a7ed 100644 --- a/tests/test_modeling_value_head.py +++ b/tests/test_modeling_value_head.py @@ -19,7 +19,7 @@ import pytest import torch from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM - +from parameterized import parameterized from trl import AutoModelForCausalLMWithValueHead, AutoModelForSeq2SeqLMWithValueHead, create_reference_model @@ -248,16 +248,16 @@ def test_dropout_kwargs(self): # Check if v head of the model has the same dropout as the config assert model.v_head.dropout.p == 0.5 - def test_generate(self): + @parameterized.expand(ALL_CAUSAL_LM_MODELS) + def test_generate(self, model_name): r""" Test if `generate` works for every model """ - for model_name in self.all_model_names: - model = self.trl_model_class.from_pretrained(model_name) - input_ids = torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) + model = self.trl_model_class.from_pretrained(model_name) + input_ids = torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) - # Just check if the generation works - _ = model.generate(input_ids) + # Just check if the generation works + _ = model.generate(input_ids) def test_raise_error_not_causallm(self): # Test with a model without a LM head From 64b312afab8db062b267633b2117a1b27d81b483 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Thu, 24 Oct 2024 15:47:05 +0000 Subject: [PATCH 06/12] Revert "`eval_strategy="steps" if eval_dataset else "no"`" This reverts commit 1e8b331fa2c222a699cb3563f44f5702a7d6f50b. --- tests/test_bco_trainer.py | 2 +- tests/test_kto_trainer.py | 2 +- tests/test_sft_trainer.py | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_bco_trainer.py b/tests/test_bco_trainer.py index 6aea7e993c1..14e97c1c82a 100644 --- a/tests/test_bco_trainer.py +++ b/tests/test_bco_trainer.py @@ -64,7 +64,7 @@ def test_bco_trainer(self, name, pre_compute, eval_dataset, config_name): max_steps=3, gradient_accumulation_steps=1, learning_rate=9e-1, - eval_strategy="steps" if eval_dataset else "no", + eval_strategy="steps", beta=0.1, precompute_ref_log_probs=pre_compute, report_to="none", diff --git a/tests/test_kto_trainer.py b/tests/test_kto_trainer.py index ae43bd0c85d..6529877092b 100644 --- a/tests/test_kto_trainer.py +++ b/tests/test_kto_trainer.py @@ -61,7 +61,7 @@ def test_kto_trainer(self, name, loss_type, pre_compute, eval_dataset): remove_unused_columns=False, gradient_accumulation_steps=1, learning_rate=9e-1, - eval_strategy="steps" if eval_dataset else "no", + eval_strategy="steps", beta=0.1, precompute_ref_log_probs=pre_compute, loss_type=loss_type, diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 20d59a5e02c..8e847ef410c 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -246,6 +246,7 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, + eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, @@ -1178,6 +1179,7 @@ def test_sft_trainer_skip_prepare_dataset_with_no_packing(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, + eval_strategy="steps", max_steps=4, eval_steps=2, save_steps=2, From 830dbcc1a17b75d0f8e2168edd2e00314e42a158 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Thu, 24 Oct 2024 15:47:14 +0000 Subject: [PATCH 07/12] Revert "tmp skip test" This reverts commit 44558f84cc43e20254b567d608b44d059a14913b. --- tests/test_dpo_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py index 8f035297c4a..4518718f1cb 100644 --- a/tests/test_dpo_trainer.py +++ b/tests/test_dpo_trainer.py @@ -1049,7 +1049,7 @@ class DPOVisionTrainerTester(unittest.TestCase): @parameterized.expand( [ ["trl-internal-testing/tiny-random-idefics2"], - # ["trl-internal-testing/tiny-random-paligemma"], # temporarily disabled due to flaky tests + ["trl-internal-testing/tiny-random-paligemma"], ["trl-internal-testing/tiny-random-llava-1.5"], ] ) From 605144b1550a70bf4cbc2c44a74772b8130e473e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Thu, 24 Oct 2024 15:47:22 +0000 Subject: [PATCH 08/12] Revert "drop `eval_strategy` in `test_sft_trainer_uncorrect_data`" This reverts commit a1ef7016286649fce10b3665159abcbfac2219e3. --- tests/test_sft_trainer.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 8e847ef410c..4f0add7fd1a 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -265,6 +265,7 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, + eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, @@ -284,6 +285,7 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, + eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, @@ -301,6 +303,7 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, + eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, @@ -319,6 +322,7 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, + eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, @@ -336,6 +340,7 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, + eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, @@ -355,6 +360,7 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, + eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, @@ -376,6 +382,7 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, + eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, @@ -394,6 +401,7 @@ def test_sft_trainer_uncorrect_data(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, + eval_strategy="steps", max_steps=2, eval_steps=1, save_steps=1, From 01860b6e48c6ac053387223897acc324db474f5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Thu, 24 Oct 2024 15:47:31 +0000 Subject: [PATCH 09/12] Revert "remove eval strategy" This reverts commit cb7fafa874b108ba91b29f15944b7c4a41705d6d. --- tests/test_sft_trainer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 4f0add7fd1a..5b499b982c6 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -448,6 +448,7 @@ def test_sft_trainer_with_model_num_train_epochs(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, + eval_strategy="steps", max_steps=2, save_steps=1, num_train_epochs=2, @@ -474,6 +475,7 @@ def test_sft_trainer_with_model_num_train_epochs(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, + eval_strategy="steps", max_steps=2, save_steps=1, num_train_epochs=2, @@ -525,6 +527,7 @@ def test_sft_trainer_with_model(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, + eval_strategy="steps", max_steps=2, save_steps=1, per_device_train_batch_size=2, @@ -551,6 +554,7 @@ def test_sft_trainer_with_model(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, + eval_strategy="steps", max_steps=2, save_steps=1, per_device_train_batch_size=2, @@ -577,6 +581,7 @@ def test_sft_trainer_with_model(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, + eval_strategy="steps", max_steps=2, save_steps=1, per_device_train_batch_size=2, @@ -600,6 +605,7 @@ def test_sft_trainer_with_model(self): training_args = SFTConfig( output_dir=tmp_dir, dataloader_drop_last=True, + eval_strategy="steps", max_steps=2, save_steps=1, per_device_train_batch_size=2, From e1d759c84786aae1253819487c98d1b4cd8189db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Thu, 24 Oct 2024 15:48:18 +0000 Subject: [PATCH 10/12] style --- tests/test_modeling_value_head.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_modeling_value_head.py b/tests/test_modeling_value_head.py index 28d8fb6a7ed..98f6f77a943 100644 --- a/tests/test_modeling_value_head.py +++ b/tests/test_modeling_value_head.py @@ -18,8 +18,9 @@ import pytest import torch -from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM from parameterized import parameterized +from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM + from trl import AutoModelForCausalLMWithValueHead, AutoModelForSeq2SeqLMWithValueHead, create_reference_model From 08bcf9aeb089a2218935cd51121160ab5719d265 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Thu, 24 Oct 2024 16:10:37 +0000 Subject: [PATCH 11/12] Refactor test_generate method in test_modeling_value_head.py --- tests/test_modeling_value_head.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_modeling_value_head.py b/tests/test_modeling_value_head.py index 98f6f77a943..81c51d41595 100644 --- a/tests/test_modeling_value_head.py +++ b/tests/test_modeling_value_head.py @@ -371,17 +371,17 @@ def test_dropout_kwargs(self): # Check if v head of the model has the same dropout as the config assert model.v_head.dropout.p == 0.5 - def test_generate(self): + @parameterized.expand(ALL_SEQ2SEQ_MODELS) + def test_generate(self, model_name): r""" Test if `generate` works for every model """ - for model_name in self.all_model_names: - model = self.trl_model_class.from_pretrained(model_name) - input_ids = torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) - decoder_input_ids = torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) + model = self.trl_model_class.from_pretrained(model_name) + input_ids = torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) + decoder_input_ids = torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) - # Just check if the generation works - _ = model.generate(input_ids, decoder_input_ids=decoder_input_ids) + # Just check if the generation works + _ = model.generate(input_ids, decoder_input_ids=decoder_input_ids) def test_raise_error_not_causallm(self): # Test with a model without a LM head From c8937b643217028895795d1cc959cac9e29a5603 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Thu, 24 Oct 2024 17:33:45 +0000 Subject: [PATCH 12/12] `max_new_tokens=9` --- tests/test_modeling_value_head.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_modeling_value_head.py b/tests/test_modeling_value_head.py index 81c51d41595..a1d4503540f 100644 --- a/tests/test_modeling_value_head.py +++ b/tests/test_modeling_value_head.py @@ -19,7 +19,7 @@ import pytest import torch from parameterized import parameterized -from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM +from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM, GenerationConfig from trl import AutoModelForCausalLMWithValueHead, AutoModelForSeq2SeqLMWithValueHead, create_reference_model @@ -254,11 +254,12 @@ def test_generate(self, model_name): r""" Test if `generate` works for every model """ + generation_config = GenerationConfig(max_new_tokens=9) model = self.trl_model_class.from_pretrained(model_name) input_ids = torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) # Just check if the generation works - _ = model.generate(input_ids) + _ = model.generate(input_ids, generation_config=generation_config) def test_raise_error_not_causallm(self): # Test with a model without a LM head @@ -376,12 +377,13 @@ def test_generate(self, model_name): r""" Test if `generate` works for every model """ + generation_config = GenerationConfig(max_new_tokens=9) model = self.trl_model_class.from_pretrained(model_name) input_ids = torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) decoder_input_ids = torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) # Just check if the generation works - _ = model.generate(input_ids, decoder_input_ids=decoder_input_ids) + _ = model.generate(input_ids, decoder_input_ids=decoder_input_ids, generation_config=generation_config) def test_raise_error_not_causallm(self): # Test with a model without a LM head