diff --git a/.github/workflows/slow-tests.yml b/.github/workflows/slow-tests.yml index a302ec16041..c506231123c 100644 --- a/.github/workflows/slow-tests.yml +++ b/.github/workflows/slow-tests.yml @@ -47,7 +47,7 @@ jobs: run: | source .venv/bin/activate uv pip install ".[dev]" - uv pip install pytest-reportlog parameterized + uv pip install pytest-reportlog - name: Run slow SFT tests on single GPU if: always() @@ -95,7 +95,7 @@ jobs: run: | source .venv/bin/activate uv pip install ".[dev]" - uv pip install pytest-reportlog parameterized + uv pip install pytest-reportlog - name: Run slow SFT tests on Multi GPU if: always() diff --git a/pyproject.toml b/pyproject.toml index 787415bec79..c1a94413f64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,7 +71,6 @@ scikit = [ "scikit-learn" ] test = [ - "parameterized", "pytest-cov", "pytest-rerunfailures==15.1", "pytest-xdist", @@ -112,7 +111,6 @@ dev = [ "bitsandbytes", # scikit: included in bco # test - "parameterized", "pytest-cov", "pytest-rerunfailures==15.1", "pytest-xdist", diff --git a/tests/slow/test_dpo_slow.py b/tests/slow/test_dpo_slow.py index 26feb388c6b..199f702bf23 100644 --- a/tests/slow/test_dpo_slow.py +++ b/tests/slow/test_dpo_slow.py @@ -13,13 +13,11 @@ # limitations under the License. import gc -import itertools import pytest import torch from accelerate.utils.memory import release_memory from datasets import load_dataset -from parameterized import parameterized from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from transformers.testing_utils import backend_empty_cache, require_torch_accelerator, torch_device from transformers.utils import is_peft_available @@ -54,7 +52,9 @@ def teardown_method(self): backend_empty_cache(torch_device) gc.collect() - @parameterized.expand(list(itertools.product(MODELS_TO_TEST, DPO_LOSS_TYPES, DPO_PRECOMPUTE_LOGITS))) + @pytest.mark.parametrize("pre_compute_logits", DPO_PRECOMPUTE_LOGITS) + @pytest.mark.parametrize("loss_type", DPO_LOSS_TYPES) + @pytest.mark.parametrize("model_id", MODELS_TO_TEST) def test_dpo_bare_model(self, model_id, loss_type, pre_compute_logits): """ A test that tests the simple usage of `DPOTrainer` using a bare model in full precision. @@ -98,16 +98,10 @@ def test_dpo_bare_model(self, model_id, loss_type, pre_compute_logits): release_memory(model, trainer) - @parameterized.expand( - list( - itertools.product( - MODELS_TO_TEST, - DPO_LOSS_TYPES, - DPO_PRECOMPUTE_LOGITS, - GRADIENT_CHECKPOINTING_KWARGS, - ) - ) - ) + @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS) + @pytest.mark.parametrize("pre_compute_logits", DPO_PRECOMPUTE_LOGITS) + @pytest.mark.parametrize("loss_type", DPO_LOSS_TYPES) + @pytest.mark.parametrize("model_id", MODELS_TO_TEST) @require_peft def test_dpo_peft_model(self, model_id, loss_type, pre_compute_logits, gradient_checkpointing_kwargs): """ @@ -160,16 +154,10 @@ def test_dpo_peft_model(self, model_id, loss_type, pre_compute_logits, gradient_ release_memory(model, trainer) - @parameterized.expand( - list( - itertools.product( - MODELS_TO_TEST, - DPO_LOSS_TYPES, - DPO_PRECOMPUTE_LOGITS, - GRADIENT_CHECKPOINTING_KWARGS, - ) - ) - ) + @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS) + @pytest.mark.parametrize("pre_compute_logits", DPO_PRECOMPUTE_LOGITS) + @pytest.mark.parametrize("loss_type", DPO_LOSS_TYPES) + @pytest.mark.parametrize("model_id", MODELS_TO_TEST) @require_bitsandbytes @require_peft def test_dpo_peft_model_qlora(self, model_id, loss_type, pre_compute_logits, gradient_checkpointing_kwargs): diff --git a/tests/slow/test_grpo_slow.py b/tests/slow/test_grpo_slow.py index 7ba974423ca..745dc869028 100644 --- a/tests/slow/test_grpo_slow.py +++ b/tests/slow/test_grpo_slow.py @@ -23,7 +23,6 @@ from accelerate.utils.memory import release_memory from datasets import Dataset, Features, Image, Value, load_dataset from packaging.version import Version -from parameterized import parameterized from transformers import ( AutoModelForCausalLM, AutoModelForImageTextToText, @@ -64,7 +63,7 @@ def teardown_method(self): backend_empty_cache(torch_device) gc.collect() - @parameterized.expand(MODELS_TO_TEST) + @pytest.mark.parametrize("model_name", MODELS_TO_TEST) @require_liger_kernel def test_training_with_liger_grpo_loss(self, model_name): training_args = GRPOConfig( @@ -104,7 +103,7 @@ def test_training_with_liger_grpo_loss(self, model_name): release_memory(model, trainer) - @parameterized.expand(MODELS_TO_TEST) + @pytest.mark.parametrize("model_name", MODELS_TO_TEST) @require_liger_kernel @require_peft def test_training_with_liger_grpo_loss_and_peft(self, model_name): @@ -168,7 +167,7 @@ def test_training_with_liger_grpo_loss_and_peft(self, model_name): release_memory(model, trainer) - @parameterized.expand(MODELS_TO_TEST) + @pytest.mark.parametrize("model_name", MODELS_TO_TEST) def test_training_with_transformers_paged(self, model_name): """Test that training works with transformers paged implementation (requires GPU).""" if Version(transformers.__version__) < Version("4.57.0"): @@ -206,10 +205,11 @@ def test_training_with_transformers_paged(self, model_name): release_memory(model, trainer) - @parameterized.expand( + @pytest.mark.parametrize( + "model_name", [ - ("HuggingFaceTB/SmolVLM-Instruct",), # Only test the smaller model to avoid OOM - ] + "HuggingFaceTB/SmolVLM-Instruct", # Only test the smaller model to avoid OOM + ], ) @require_flash_attn @require_bitsandbytes diff --git a/tests/slow/test_sft_slow.py b/tests/slow/test_sft_slow.py index 13d9c7ce635..979d61218c1 100755 --- a/tests/slow/test_sft_slow.py +++ b/tests/slow/test_sft_slow.py @@ -13,13 +13,11 @@ # limitations under the License. import gc -import itertools import pytest import torch from accelerate.utils.memory import release_memory from datasets import load_dataset -from parameterized import parameterized from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from transformers.testing_utils import ( backend_empty_cache, @@ -61,7 +59,8 @@ def teardown_method(self): backend_empty_cache(torch_device) gc.collect() - @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS))) + @pytest.mark.parametrize("packing", PACKING_OPTIONS) + @pytest.mark.parametrize("model_name", MODELS_TO_TEST) def test_sft_trainer_str(self, model_name, packing): """ Simply tests if passing a simple str to `SFTTrainer` loads and runs the trainer as expected. @@ -85,7 +84,8 @@ def test_sft_trainer_str(self, model_name, packing): trainer.train() - @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS))) + @pytest.mark.parametrize("packing", PACKING_OPTIONS) + @pytest.mark.parametrize("model_name", MODELS_TO_TEST) def test_sft_trainer_transformers(self, model_name, packing): """ Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer as expected. @@ -115,7 +115,8 @@ def test_sft_trainer_transformers(self, model_name, packing): release_memory(model, trainer) - @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS))) + @pytest.mark.parametrize("packing", PACKING_OPTIONS) + @pytest.mark.parametrize("model_name", MODELS_TO_TEST) @require_peft def test_sft_trainer_peft(self, model_name, packing): """ @@ -151,7 +152,8 @@ def test_sft_trainer_peft(self, model_name, packing): release_memory(model, trainer) - @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS))) + @pytest.mark.parametrize("packing", PACKING_OPTIONS) + @pytest.mark.parametrize("model_name", MODELS_TO_TEST) def test_sft_trainer_transformers_mp(self, model_name, packing): """ Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer as expected in mixed @@ -183,7 +185,9 @@ def test_sft_trainer_transformers_mp(self, model_name, packing): release_memory(model, trainer) - @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS))) + @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS) + @pytest.mark.parametrize("packing", PACKING_OPTIONS) + @pytest.mark.parametrize("model_name", MODELS_TO_TEST) def test_sft_trainer_transformers_mp_gc(self, model_name, packing, gradient_checkpointing_kwargs): """ Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer as expected in mixed @@ -217,7 +221,9 @@ def test_sft_trainer_transformers_mp_gc(self, model_name, packing, gradient_chec release_memory(model, trainer) - @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS))) + @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS) + @pytest.mark.parametrize("packing", PACKING_OPTIONS) + @pytest.mark.parametrize("model_name", MODELS_TO_TEST) @require_peft def test_sft_trainer_transformers_mp_gc_peft(self, model_name, packing, gradient_checkpointing_kwargs): """ @@ -255,9 +261,10 @@ def test_sft_trainer_transformers_mp_gc_peft(self, model_name, packing, gradient release_memory(model, trainer) - @parameterized.expand( - list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS, DEVICE_MAP_OPTIONS)) - ) + @pytest.mark.parametrize("device_map", DEVICE_MAP_OPTIONS) + @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS) + @pytest.mark.parametrize("packing", PACKING_OPTIONS) + @pytest.mark.parametrize("model_name", MODELS_TO_TEST) @require_torch_multi_accelerator def test_sft_trainer_transformers_mp_gc_device_map( self, model_name, packing, gradient_checkpointing_kwargs, device_map @@ -294,7 +301,9 @@ def test_sft_trainer_transformers_mp_gc_device_map( release_memory(model, trainer) - @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS))) + @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS) + @pytest.mark.parametrize("packing", PACKING_OPTIONS) + @pytest.mark.parametrize("model_name", MODELS_TO_TEST) @require_peft @require_bitsandbytes def test_sft_trainer_transformers_mp_gc_peft_qlora(self, model_name, packing, gradient_checkpointing_kwargs): @@ -335,7 +344,8 @@ def test_sft_trainer_transformers_mp_gc_peft_qlora(self, model_name, packing, gr release_memory(model, trainer) - @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS))) + @pytest.mark.parametrize("packing", PACKING_OPTIONS) + @pytest.mark.parametrize("model_name", MODELS_TO_TEST) @require_peft @require_bitsandbytes def test_sft_trainer_with_chat_format_qlora(self, model_name, packing): @@ -375,7 +385,8 @@ def test_sft_trainer_with_chat_format_qlora(self, model_name, packing): release_memory(model, trainer) - @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS))) + @pytest.mark.parametrize("packing", PACKING_OPTIONS) + @pytest.mark.parametrize("model_name", MODELS_TO_TEST) @require_liger_kernel def test_sft_trainer_with_liger(self, model_name, packing): """ @@ -419,7 +430,8 @@ def cleanup_liger_patches(trainer): finally: cleanup_liger_patches(trainer) - @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS))) + @pytest.mark.parametrize("packing", PACKING_OPTIONS) + @pytest.mark.parametrize("model_name", MODELS_TO_TEST) @require_torch_accelerator def test_train_offloading(self, model_name, packing): """Test that activation offloading works with SFTTrainer.""" diff --git a/tests/test_bco_trainer.py b/tests/test_bco_trainer.py index 7b7f0414438..79febde2884 100644 --- a/tests/test_bco_trainer.py +++ b/tests/test_bco_trainer.py @@ -18,7 +18,6 @@ import torch from accelerate import Accelerator from datasets import load_dataset -from parameterized import parameterized from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer from transformers.utils import is_peft_available @@ -33,15 +32,16 @@ class TestBCOTrainer(TrlTestCase): - @parameterized.expand( + @pytest.mark.parametrize( + "config_name", [ - ("standard_preference",), - ("standard_implicit_prompt_preference",), - ("standard_unpaired_preference",), - ("conversational_preference",), - ("conversational_implicit_prompt_preference",), - ("conversational_unpaired_preference",), - ] + "standard_preference", + "standard_implicit_prompt_preference", + "standard_unpaired_preference", + "conversational_preference", + "conversational_implicit_prompt_preference", + "conversational_unpaired_preference", + ], ) @require_sklearn def test_train(self, config_name): diff --git a/tests/test_cpo_trainer.py b/tests/test_cpo_trainer.py index 56792f608dc..19833a414ff 100644 --- a/tests/test_cpo_trainer.py +++ b/tests/test_cpo_trainer.py @@ -11,11 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - +import pytest import torch from datasets import load_dataset -from parameterized import parameterized from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer from trl import CPOConfig, CPOTrainer @@ -37,7 +35,8 @@ def setup_method(self): self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id) self.t5_tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE - @parameterized.expand( + @pytest.mark.parametrize( + "name, loss_type, config_name", [ ("qwen", "sigmoid", "standard_preference"), ("t5", "hinge", "standard_implicit_prompt_preference"), @@ -46,7 +45,7 @@ def setup_method(self): ("qwen", "simpo", "standard_preference"), ("t5", "simpo", "standard_implicit_prompt_preference"), ("qwen", "hinge", "conversational_preference"), - ] + ], ) def test_cpo_trainer(self, name, loss_type, config_name): training_args = CPOConfig( @@ -93,13 +92,14 @@ def test_cpo_trainer(self, name, loss_type, config_name): if param.sum() != 0: # ignore 0 biases assert not torch.equal(param, new_param) - @parameterized.expand( + @pytest.mark.parametrize( + "config_name", [ - ("standard_preference",), - ("standard_implicit_prompt_preference",), - ("conversational_preference",), - ("conversational_implicit_prompt_preference",), - ] + "standard_preference", + "standard_implicit_prompt_preference", + "conversational_preference", + "conversational_implicit_prompt_preference", + ], ) @require_peft def test_cpo_trainer_with_lora(self, config_name): diff --git a/tests/test_data_utils.py b/tests/test_data_utils.py index eff62650bb4..72a4d3e993f 100644 --- a/tests/test_data_utils.py +++ b/tests/test_data_utils.py @@ -13,12 +13,11 @@ # limitations under the License. import copy -import itertools import textwrap from time import strftime +import pytest from datasets import Dataset, DatasetDict -from parameterized import parameterized from transformers import AutoProcessor, AutoTokenizer from trl.data_utils import ( @@ -247,11 +246,11 @@ class TestIsConversational(TrlTestCase): {"prompt": "The sky is", "completion": " blue.", "label": True}, ] - @parameterized.expand(itertools.product(conversational_examples)) + @pytest.mark.parametrize("example", conversational_examples) def test_conversational(self, example): assert is_conversational(example) - @parameterized.expand(itertools.product(non_conversational_examples)) + @pytest.mark.parametrize("example", non_conversational_examples) def test_non_conversational(self, example): assert not is_conversational(example) @@ -345,7 +344,8 @@ class TestApplyChatTemplate(TrlTestCase): {"prompt": "The sky is", "completion": " blue.", "label": True}, # Unpaired preference ] - @parameterized.expand(itertools.product(tokenizers, conversational_examples)) + @pytest.mark.parametrize("example", conversational_examples) + @pytest.mark.parametrize("tokenizer_id", tokenizers) def test_apply_chat_template(self, tokenizer_id, example): tokenizer = AutoTokenizer.from_pretrained(tokenizer_id) result = apply_chat_template(example, tokenizer) @@ -371,7 +371,8 @@ def test_apply_chat_template(self, tokenizer_id, example): assert result["label"] == example["label"] # both conversational and non-conversational examples - @parameterized.expand(itertools.product(tokenizers, conversational_examples + non_conversational_examples)) + @pytest.mark.parametrize("example", conversational_examples + non_conversational_examples) + @pytest.mark.parametrize("tokenizer_id", tokenizers) def test_maybe_apply_chat_template(self, tokenizer_id, example): tokenizer = AutoTokenizer.from_pretrained(tokenizer_id) result = maybe_apply_chat_template(example, tokenizer) diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py index e33eba91c3d..7fdc6091960 100644 --- a/tests/test_dpo_trainer.py +++ b/tests/test_dpo_trainer.py @@ -20,7 +20,6 @@ import pytest import torch from datasets import Dataset, features, load_dataset -from parameterized import parameterized from transformers import ( AutoModelForCausalLM, AutoModelForImageTextToText, @@ -192,22 +191,23 @@ def test_train(self): if param.sum() != 0: # ignore 0 biases assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12) - @parameterized.expand( + @pytest.mark.parametrize( + "loss_type", [ - ("sigmoid",), - ("hinge",), - ("ipo",), - ("exo_pair",), - ("nca_pair",), - ("robust",), - ("bco_pair",), - ("sppo_hard",), - ("aot",), - ("aot_pair",), - ("discopop",), - ("apo_zero",), - ("apo_down",), - ] + "sigmoid", + "hinge", + "ipo", + "exo_pair", + "nca_pair", + "robust", + "bco_pair", + "sppo_hard", + "aot", + "aot_pair", + "discopop", + "apo_zero", + "apo_down", + ], ) def test_train_loss_types(self, loss_type): model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" @@ -345,7 +345,7 @@ def test_wrong_loss_weights_length(self): loss_weights=[1.0, 0.5, 0.1], # Wrong length ) - @parameterized.expand([(None,), (0.5,)]) + @pytest.mark.parametrize("rpo_alpha", [None, 0.5]) def test_dpo_trainer_without_providing_ref_model(self, rpo_alpha): training_args = DPOConfig( output_dir=self.tmp_dir, @@ -692,7 +692,8 @@ def test_dpo_lora_bf16_autocast_llama(self): # save peft adapter trainer.save_model() - @parameterized.expand( + @pytest.mark.parametrize( + "loss_type, pre_compute, gen_during_eval", [ ("sigmoid", False, False), ("sigmoid", False, True), @@ -718,7 +719,7 @@ def test_dpo_lora_bf16_autocast_llama(self): ("robust", False, True), ("robust", True, False), ("robust", True, True), - ] + ], ) @require_bitsandbytes @require_peft @@ -1297,7 +1298,8 @@ def test_train_with_length_desensitization(self): if param.sum() != 0: # ignore 0 biases assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12) - @parameterized.expand( + @pytest.mark.parametrize( + "beta, loss_type", [ (0.1, "sigmoid"), (0.1, "apo_zero"), @@ -1309,7 +1311,7 @@ def test_train_with_length_desensitization(self): (0.5, "apo_down"), (0.5, "sppo_hard"), (0.5, "nca_pair"), - ] + ], ) @require_liger_kernel @pytest.mark.skipif(not (sys.version_info >= (3, 10)), reason="Liger kernel is not supported on Python 3.9") @@ -1416,14 +1418,15 @@ def test_train_with_iterable_dataset(self): @require_vision class TestDPOVisionTrainer(TrlTestCase): - @parameterized.expand( + @pytest.mark.parametrize( + "model_id", [ - # ("trl-internal-testing/tiny-Idefics2ForConditionalGeneration",), device issue from transformers, see https://github.com/huggingface/transformers/pull/39975 - # ("trl-internal-testing/tiny-PaliGemmaForConditionalGeneration",), - ("trl-internal-testing/tiny-LlavaForConditionalGeneration",), - ("trl-internal-testing/tiny-LlavaNextForConditionalGeneration",), - ("trl-internal-testing/tiny-Gemma3ForConditionalGeneration",), - ] + # "trl-internal-testing/tiny-Idefics2ForConditionalGeneration", device issue from transformers, see https://github.com/huggingface/transformers/pull/39975 + # "trl-internal-testing/tiny-PaliGemmaForConditionalGeneration", + "trl-internal-testing/tiny-LlavaForConditionalGeneration", + "trl-internal-testing/tiny-LlavaNextForConditionalGeneration", + "trl-internal-testing/tiny-Gemma3ForConditionalGeneration", + ], ) def test_vdpo_trainer(self, model_id): # fmt: off @@ -1509,7 +1512,8 @@ def test_vdpo_trainer(self, model_id): class TestDPOConfig(TrlTestCase): - @parameterized.expand([(f_div_type, as_str) for f_div_type in list(FDivergenceType) for as_str in [False, True]]) + @pytest.mark.parametrize("as_string", [False, True]) + @pytest.mark.parametrize("f_divergence_type", list(FDivergenceType)) def test_f_divergence_type(self, f_divergence_type, as_string: bool): training_args = DPOConfig( output_dir=self.tmp_dir, diff --git a/tests/test_grpo_trainer.py b/tests/test_grpo_trainer.py index a89c5f9bac0..d27a4b1d415 100644 --- a/tests/test_grpo_trainer.py +++ b/tests/test_grpo_trainer.py @@ -17,7 +17,6 @@ import pytest import torch from datasets import load_dataset -from parameterized import parameterized from transformers import ( AutoModelForCausalLM, AutoModelForImageTextToText, @@ -124,7 +123,7 @@ def test_init_minimal(self): train_dataset=dataset, ) - @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)]) + @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) def test_training(self, config_name): dataset = load_dataset("trl-internal-testing/zen", config_name, split="train") @@ -154,7 +153,7 @@ def test_training(self, config_name): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - @parameterized.expand([("bnpo",), ("dr_grpo",), ("dapo",)]) + @pytest.mark.parametrize("loss_type", ["bnpo", "dr_grpo", "dapo"]) def test_training_loss_types(self, loss_type): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -907,7 +906,7 @@ def test_training_vllm_with_additional_generation_kwargs(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - @parameterized.expand([(False,), ("group",), ("batch",), (True,), ("none",)]) + @pytest.mark.parametrize("scale_rewards", [False, "group", "batch", True, "none"]) def test_training_scale_rewards(self, scale_rewards): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1251,14 +1250,15 @@ def test_prepare_input_called_with_correct_data(self): for i in range(8, 16): assert mock_prepare.call_args_list[i].args[1] == expected_second_generation_batch - @parameterized.expand( + @pytest.mark.parametrize( + "model_id", [ - ("trl-internal-testing/tiny-Gemma3ForConditionalGeneration",), - ("trl-internal-testing/tiny-LlavaNextForConditionalGeneration",), - ("trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration",), - ("trl-internal-testing/tiny-Qwen2VLForConditionalGeneration",), - # ("trl-internal-testing/tiny-SmolVLMForConditionalGeneration",), seems not to support bf16 properly - ] + "trl-internal-testing/tiny-Gemma3ForConditionalGeneration", + "trl-internal-testing/tiny-LlavaNextForConditionalGeneration", + "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + "trl-internal-testing/tiny-Qwen2VLForConditionalGeneration", + # "trl-internal-testing/tiny-SmolVLMForConditionalGeneration", seems not to support bf16 properly + ], ) @require_vision def test_training_vlm(self, model_id): @@ -1471,11 +1471,12 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - @parameterized.expand( + @pytest.mark.parametrize( + "model_id", [ - ("trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration",), - ("trl-internal-testing/tiny-Gemma3ForConditionalGeneration",), - ] + "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + "trl-internal-testing/tiny-Gemma3ForConditionalGeneration", + ], ) @require_vision @require_vllm diff --git a/tests/test_kto_trainer.py b/tests/test_kto_trainer.py index e2c325149f2..eee6366bf15 100644 --- a/tests/test_kto_trainer.py +++ b/tests/test_kto_trainer.py @@ -16,7 +16,6 @@ import pytest import torch from datasets import load_dataset -from parameterized import parameterized from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer from transformers.testing_utils import require_liger_kernel @@ -40,7 +39,8 @@ def setup_method(self): self.t5_ref_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id) - @parameterized.expand( + @pytest.mark.parametrize( + "name, config_name, loss_type, pre_compute, eval_dataset", [ ("qwen", "standard_preference", "kto", True, True), # ("t5", "standard_implicit_prompt_preference", "kto", True, False), # KTO broken for enc-dec @@ -50,7 +50,7 @@ def setup_method(self): # ("t5", "conversational_unpaired_preference", "apo_zero_unpaired", True, False), ("qwen", "standard_unpaired_preference", "apo_zero_unpaired", False, True), # ("t5", "conversational_unpaired_preference", "apo_zero_unpaired", False, False), - ] + ], ) def test_kto_trainer(self, name, config_name, loss_type, pre_compute, eval_dataset): training_args = KTOConfig( diff --git a/tests/test_modeling_value_head.py b/tests/test_modeling_value_head.py index fd9d4ff0b3f..a8a462504d7 100644 --- a/tests/test_modeling_value_head.py +++ b/tests/test_modeling_value_head.py @@ -18,7 +18,6 @@ import torch import transformers from packaging import version -from parameterized import parameterized from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, GenerationConfig from trl import AutoModelForCausalLMWithValueHead, AutoModelForSeq2SeqLMWithValueHead, create_reference_model @@ -304,7 +303,7 @@ def test_dropout_kwargs(self): # Check if v head of the model has the same dropout as the config assert model.v_head.dropout.p == 0.5 - @parameterized.expand(ALL_CAUSAL_LM_MODELS) + @pytest.mark.parametrize("model_name", ALL_CAUSAL_LM_MODELS) def test_generate(self, model_name): r""" Test if `generate` works for every model @@ -436,7 +435,7 @@ def test_dropout_kwargs(self): # Check if v head of the model has the same dropout as the config assert model.v_head.dropout.p == 0.5 - @parameterized.expand(ALL_SEQ2SEQ_MODELS) + @pytest.mark.parametrize("model_name", ALL_SEQ2SEQ_MODELS) def test_generate(self, model_name): r""" Test if `generate` works for every model diff --git a/tests/test_nash_md_trainer.py b/tests/test_nash_md_trainer.py index d6026e73443..936db5bbdf5 100644 --- a/tests/test_nash_md_trainer.py +++ b/tests/test_nash_md_trainer.py @@ -11,10 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - +import pytest from datasets import load_dataset -from parameterized import parameterized from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer from transformers.utils import is_peft_available @@ -36,7 +34,7 @@ def setup_method(self): self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer.pad_token = self.tokenizer.eos_token - @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)]) + @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) def test_nash_md_trainer_training(self, config_name): training_args = NashMDConfig( output_dir=self.tmp_dir, @@ -184,7 +182,7 @@ def test_training_pre_pefted_model_implicit_ref_with_reward_model(self): assert "train_loss" in trainer.state.log_history[-1] - @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)]) + @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) @require_llm_blender def test_nash_md_trainer_judge_training(self, config_name): training_args = NashMDConfig( diff --git a/tests/test_online_dpo_trainer.py b/tests/test_online_dpo_trainer.py index f8706770371..b742bec8d1a 100644 --- a/tests/test_online_dpo_trainer.py +++ b/tests/test_online_dpo_trainer.py @@ -16,7 +16,6 @@ import transformers from datasets import Dataset, features, load_dataset from packaging.version import Version -from parameterized import parameterized from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer from transformers.testing_utils import require_torch_accelerator from transformers.utils import is_peft_available, is_vision_available @@ -55,7 +54,7 @@ def setup_method(self): self.reward_tokenizer = AutoTokenizer.from_pretrained(self.reward_model_id) self.reward_tokenizer.pad_token = self.reward_tokenizer.eos_token - @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)]) + @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) def test_training(self, config_name): training_args = OnlineDPOConfig( output_dir=self.tmp_dir, @@ -244,7 +243,7 @@ def test_training_with_peft_model_and_peft_config(self): # Check if training loss is available assert "train_loss" in trainer.state.log_history[-1] - @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)]) + @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) @require_llm_blender def test_training_with_judge(self, config_name): training_args = OnlineDPOConfig( @@ -270,7 +269,7 @@ def test_training_with_judge(self, config_name): # Check if training loss is available assert "train_loss" in trainer.state.log_history[-1] - @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)]) + @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) @require_torch_accelerator @require_vllm @pytest.mark.slow @@ -425,7 +424,7 @@ def test_generation_config_setup(self): assert trainer.generation_config.max_new_tokens == 64 assert not trainer.generation_config.do_sample # From generation_kwargs - @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)]) + @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) @require_torch_accelerator def test_training_with_transformers_paged(self, config_name): if Version(transformers.__version__) < Version("4.57.0"): @@ -455,7 +454,7 @@ def test_training_with_transformers_paged(self, config_name): # Check if training loss is available assert "train_loss" in trainer.state.log_history[-1] - @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)]) + @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) def test_training_with_reward_funcs(self, config_name): def simple_reward_func(prompts, completions, completion_ids, **kwargs): return [0.5 for _ in prompts] @@ -490,11 +489,12 @@ def simple_reward_func(prompts, completions, completion_ids, **kwargs): @require_vision class TestOnlineDPOVisionTrainer(TrlTestCase): - @parameterized.expand( + @pytest.mark.parametrize( + "model_id", [ - ("trl-internal-testing/tiny-Idefics2ForConditionalGeneration",), - ("trl-internal-testing/tiny-LlavaForConditionalGeneration",), - ] + "trl-internal-testing/tiny-Idefics2ForConditionalGeneration", + "trl-internal-testing/tiny-LlavaForConditionalGeneration", + ], ) def test_online_dpo_vlm_trainer(self, model_id): dataset_dict = { diff --git a/tests/test_orpo_trainer.py b/tests/test_orpo_trainer.py index dedfc4c36c9..f882cf756f8 100644 --- a/tests/test_orpo_trainer.py +++ b/tests/test_orpo_trainer.py @@ -11,11 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - +import pytest import torch from datasets import load_dataset -from parameterized import parameterized from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer from trl import ORPOConfig, ORPOTrainer @@ -37,13 +35,14 @@ def setup_method(self): self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id) self.t5_tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE - @parameterized.expand( + @pytest.mark.parametrize( + "name, config_name", [ ("qwen", "standard_preference"), ("t5", "standard_implicit_prompt_preference"), ("qwen", "conversational_preference"), ("t5", "conversational_implicit_prompt_preference"), - ] + ], ) def test_orpo_trainer(self, name, config_name): training_args = ORPOConfig( @@ -88,13 +87,14 @@ def test_orpo_trainer(self, name, config_name): if param.sum() != 0: # ignore 0 biases assert not torch.equal(param, new_param) - @parameterized.expand( + @pytest.mark.parametrize( + "config_name", [ - ("standard_preference",), - ("standard_implicit_prompt_preference",), - ("conversational_preference",), - ("conversational_implicit_prompt_preference",), - ] + "standard_preference", + "standard_implicit_prompt_preference", + "conversational_preference", + "conversational_implicit_prompt_preference", + ], ) @require_peft def test_orpo_trainer_with_lora(self, config_name): diff --git a/tests/test_prm_trainer.py b/tests/test_prm_trainer.py index 16876c6df62..963df7b30f7 100644 --- a/tests/test_prm_trainer.py +++ b/tests/test_prm_trainer.py @@ -14,9 +14,9 @@ from unittest.mock import MagicMock +import pytest import torch from datasets import Dataset, load_dataset -from parameterized import parameterized from transformers import AutoModelForTokenClassification, AutoTokenizer, PreTrainedTokenizerBase from transformers.utils import is_peft_available @@ -209,7 +209,7 @@ def setup_method(self): self.model = AutoModelForTokenClassification.from_pretrained(model_id) self.tokenizer = AutoTokenizer.from_pretrained(model_id) - @parameterized.expand([True, False]) + @pytest.mark.parametrize("train_on_last_step_only", [True, False]) def test_train_full(self, train_on_last_step_only): dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_stepwise_supervision", split="train") training_args = PRMConfig( diff --git a/tests/test_reward_trainer.py b/tests/test_reward_trainer.py index ab6d6656e99..e12ce18355d 100644 --- a/tests/test_reward_trainer.py +++ b/tests/test_reward_trainer.py @@ -17,7 +17,6 @@ import pytest import torch from datasets import load_dataset -from parameterized import parameterized from transformers import AutoModelForSequenceClassification, AutoTokenizer from transformers.utils import is_peft_available @@ -108,12 +107,13 @@ def test_collate_with_margin(self): class TestRewardTrainer(TrlTestCase): - @parameterized.expand( + @pytest.mark.parametrize( + "model_id", [ - ("trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",), - ("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification",), - ("trl-internal-testing/tiny-LlamaForSequenceClassification-3.2",), - ] + "trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", + "trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", + "trl-internal-testing/tiny-LlamaForSequenceClassification-3.2", + ], ) def test_train(self, model_id): # Get the dataset @@ -137,13 +137,14 @@ def test_train(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @parameterized.expand( + @pytest.mark.parametrize( + "config_name", [ - ("standard_preference",), - ("conversational_preference",), - ("standard_implicit_prompt_preference",), - ("conversational_implicit_prompt_preference",), - ] + "standard_preference", + "conversational_preference", + "standard_implicit_prompt_preference", + "conversational_implicit_prompt_preference", + ], ) def test_train_dataset_types(self, config_name): # Get the dataset diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py index c02f199e1c3..476fcfb0e72 100644 --- a/tests/test_rloo_trainer.py +++ b/tests/test_rloo_trainer.py @@ -17,7 +17,6 @@ import pytest import torch from datasets import load_dataset -from parameterized import parameterized from transformers import ( AutoModelForCausalLM, AutoModelForImageTextToText, @@ -45,7 +44,7 @@ def test_init_minimal(self): train_dataset=dataset, ) - @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)]) + @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) def test_training(self, config_name): dataset = load_dataset("trl-internal-testing/zen", config_name, split="train") @@ -1075,14 +1074,15 @@ def test_prepare_input_called_with_correct_data(self): for i in range(8, 16): assert mock_prepare.call_args_list[i].args[1] == expected_second_generation_batch - @parameterized.expand( + @pytest.mark.parametrize( + "model_id", [ - ("trl-internal-testing/tiny-Gemma3ForConditionalGeneration",), - ("trl-internal-testing/tiny-LlavaNextForConditionalGeneration",), - ("trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration",), - ("trl-internal-testing/tiny-Qwen2VLForConditionalGeneration",), - # ("trl-internal-testing/tiny-SmolVLMForConditionalGeneration",), seems not to support bf16 properly - ] + "trl-internal-testing/tiny-Gemma3ForConditionalGeneration", + "trl-internal-testing/tiny-LlavaNextForConditionalGeneration", + "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + "trl-internal-testing/tiny-Qwen2VLForConditionalGeneration", + # "trl-internal-testing/tiny-SmolVLMForConditionalGeneration", seems not to support bf16 properly + ], ) @require_vision def test_training_vlm(self, model_id): @@ -1212,11 +1212,12 @@ def reward_func(completions, **kwargs): elif "base_layer" not in n: # We expect the peft params to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed." - @parameterized.expand( + @pytest.mark.parametrize( + "model_id", [ - ("trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration",), - ("trl-internal-testing/tiny-Gemma3ForConditionalGeneration",), - ] + "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + "trl-internal-testing/tiny-Gemma3ForConditionalGeneration", + ], ) @require_vision @require_vllm diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 87e47b911d2..21ff5386190 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -20,7 +20,6 @@ import transformers from datasets import load_dataset from packaging.version import parse as parse_version -from parameterized import parameterized from transformers import AutoModelForCausalLM, AutoTokenizer from transformers.testing_utils import require_flash_attn, require_liger_kernel from transformers.utils import is_peft_available @@ -250,12 +249,13 @@ def test_multiple_examples(self): class TestSFTTrainer(TrlTestCase): - @parameterized.expand( + @pytest.mark.parametrize( + "model_id", [ - ("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",), - ("trl-internal-testing/tiny-Qwen3MoeForCausalLM",), - ("trl-internal-testing/tiny-GptOssForCausalLM",), - ] + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + "trl-internal-testing/tiny-Qwen3MoeForCausalLM", + "trl-internal-testing/tiny-GptOssForCausalLM", + ], ) def test_train(self, model_id): # Get the dataset @@ -497,12 +497,13 @@ def test_train_dense_with_peft_config_lora(self): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @parameterized.expand( + @pytest.mark.parametrize( + "peft_type", [ - ("prompt_tuning",), - ("prefix_tuning",), - ("prompt_encoder",), - ] + "prompt_tuning", + "prefix_tuning", + "prompt_encoder", + ], ) @require_peft def test_train_with_peft_config_prompt_tuning(self, peft_type): @@ -881,7 +882,7 @@ def test_train_padding_free(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @parameterized.expand([("bfd",), ("wrapped",)]) + @pytest.mark.parametrize("packing_strategy", ["bfd", "wrapped"]) @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning) @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning) def test_train_packing(self, packing_strategy): @@ -1321,17 +1322,18 @@ def test_tag_added_peft(self): for tag in ["sft", "trl"]: assert tag in trainer.model.model_tags - @parameterized.expand( + @pytest.mark.parametrize( + "model_id", [ - ("trl-internal-testing/tiny-Gemma3ForConditionalGeneration",), - # ("trl-internal-testing/tiny-Idefics2ForConditionalGeneration",), device issue from transformers, see https://github.com/huggingface/transformers/pull/39975 - # ("trl-internal-testing/tiny-Idefics3ForConditionalGeneration",), device issue from transformers, see https://github.com/huggingface/transformers/pull/39975 - ("trl-internal-testing/tiny-LlavaForConditionalGeneration",), - ("trl-internal-testing/tiny-LlavaNextForConditionalGeneration",), - ("trl-internal-testing/tiny-Qwen2VLForConditionalGeneration",), - ("trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration",), - # ("trl-internal-testing/tiny-SmolVLMForConditionalGeneration",), device issue from transformers, see https://github.com/huggingface/transformers/pull/39975 - ] + "trl-internal-testing/tiny-Gemma3ForConditionalGeneration", + # "trl-internal-testing/tiny-Idefics2ForConditionalGeneration", device issue from transformers, see https://github.com/huggingface/transformers/pull/39975 + # "trl-internal-testing/tiny-Idefics3ForConditionalGeneration", device issue from transformers, see https://github.com/huggingface/transformers/pull/39975 + "trl-internal-testing/tiny-LlavaForConditionalGeneration", + "trl-internal-testing/tiny-LlavaNextForConditionalGeneration", + "trl-internal-testing/tiny-Qwen2VLForConditionalGeneration", + "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + # "trl-internal-testing/tiny-SmolVLMForConditionalGeneration", device issue from transformers, see https://github.com/huggingface/transformers/pull/39975 + ], ) @require_vision def test_train_vlm(self, model_id): diff --git a/tests/test_trainers_args.py b/tests/test_trainers_args.py index b76110d5f17..2005b54337c 100644 --- a/tests/test_trainers_args.py +++ b/tests/test_trainers_args.py @@ -11,10 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - +import pytest from datasets import load_dataset -from parameterized import parameterized from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer from trl import ( @@ -246,7 +244,7 @@ def test_kto(self): assert trainer.args.ref_model_init_kwargs == {"trust_remote_code": True} assert trainer.args.dataset_num_proc == 4 - @parameterized.expand([(False,), (True,)]) + @pytest.mark.parametrize("mixtures_coef_list", [False, True]) def test_nash_md(self, mixtures_coef_list): model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -268,7 +266,7 @@ def test_nash_md(self, mixtures_coef_list): ) assert trainer.args.mixture_coef == (0.5 if not mixtures_coef_list else [0.5, 0.6]) - @parameterized.expand([(False,), (True,)]) + @pytest.mark.parametrize("beta_list", [False, True]) def test_online_dpo(self, beta_list): model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -372,7 +370,7 @@ def test_sft(self): assert trainer.args.dataset_kwargs["append_concat_token"] assert trainer.args.eval_packing - @parameterized.expand([(False,), (True,)]) + @pytest.mark.parametrize("alpha_list", [False, True]) def test_xpo(self, alpha_list): model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" tokenizer = AutoTokenizer.from_pretrained(model_id) diff --git a/tests/test_utils.py b/tests/test_utils.py index 60d9b9dcefb..b9590ebaddd 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -20,7 +20,6 @@ import pytest import torch from datasets import load_dataset -from parameterized import parameterized from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig from transformers.utils import is_peft_available @@ -629,14 +628,9 @@ def test_sampler_with_mini_repeat_count_and_batch_size_3(self): class TestEntropyFromLogits(TrlTestCase): - @parameterized.expand( - [ - (dtype, chunk_size, shape) - for dtype in (torch.float64, torch.float32, torch.float16, torch.bfloat16) - for chunk_size in (1, 16) - for shape in [(768,), (32, 768), (8, 16, 768), (2, 4, 8, 768)] - ] - ) + @pytest.mark.parametrize("shape", [(768,), (32, 768), (8, 16, 768), (2, 4, 8, 768)]) + @pytest.mark.parametrize("chunk_size", [1, 16]) + @pytest.mark.parametrize("dtype", [torch.float64, torch.float32, torch.float16, torch.bfloat16]) def test_entropy_from_logits_2_dims(self, dtype, chunk_size, shape): logits = torch.randn(*shape, dtype=dtype) if dtype in (torch.float64, torch.float32): @@ -803,7 +797,7 @@ def test_print_messages_with_tools(self, mock_stdout): class TestSelectiveLogSoftmax(TrlTestCase): - @parameterized.expand([(torch.float64,), (torch.float32,), (torch.float16,), (torch.bfloat16,)]) + @pytest.mark.parametrize("dtype", [torch.float64, torch.float32, torch.float16, torch.bfloat16]) def test_selective_log_softmax(self, dtype): """Test selective_log_softmax with logits of different dtypes""" vocab_size = 1024 diff --git a/tests/test_xpo_trainer.py b/tests/test_xpo_trainer.py index 4d41471187c..0c6f3ab02bf 100644 --- a/tests/test_xpo_trainer.py +++ b/tests/test_xpo_trainer.py @@ -11,10 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - +import pytest from datasets import load_dataset -from parameterized import parameterized from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer from transformers.utils import is_peft_available @@ -36,7 +34,7 @@ def setup_method(self): self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer.pad_token = self.tokenizer.eos_token - @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)]) + @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) def test_xpo_trainer_training(self, config_name): training_args = XPOConfig( output_dir=self.tmp_dir, @@ -182,7 +180,7 @@ def test_training_pre_pefted_model_implicit_ref(self): assert "train_loss" in trainer.state.log_history[-1] - @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)]) + @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) @require_llm_blender def test_xpo_trainer_judge_training(self, config_name): training_args = XPOConfig(