huggingface · albertvillanova · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025
diff --git a/.github/workflows/slow-tests.yml b/.github/workflows/slow-tests.yml
@@ -47,7 +47,7 @@ jobs:
         run: |
           source .venv/bin/activate
           uv pip install ".[dev]"
-          uv pip install pytest-reportlog parameterized
+          uv pip install pytest-reportlog
 
       - name: Run slow SFT tests on single GPU
         if: always()
@@ -95,7 +95,7 @@ jobs:
         run: |
           source .venv/bin/activate
           uv pip install ".[dev]"
-          uv pip install pytest-reportlog parameterized
+          uv pip install pytest-reportlog
 
       - name: Run slow SFT tests on Multi GPU
         if: always()

diff --git a/pyproject.toml b/pyproject.toml
@@ -71,7 +71,6 @@ scikit = [
     "scikit-learn"
 ]
 test = [
-    "parameterized",
     "pytest-cov",
     "pytest-rerunfailures==15.1",
     "pytest-xdist",
@@ -112,7 +111,6 @@ dev = [
     "bitsandbytes",
     # scikit: included in bco
     # test
-    "parameterized",
     "pytest-cov",
     "pytest-rerunfailures==15.1",
     "pytest-xdist",

diff --git a/tests/slow/test_dpo_slow.py b/tests/slow/test_dpo_slow.py
@@ -13,13 +13,11 @@
 # limitations under the License.
 
 import gc
-import itertools
 
 import pytest
 import torch
 from accelerate.utils.memory import release_memory
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from transformers.testing_utils import backend_empty_cache, require_torch_accelerator, torch_device
 from transformers.utils import is_peft_available
@@ -54,7 +52,9 @@ def teardown_method(self):
         backend_empty_cache(torch_device)
         gc.collect()
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, DPO_LOSS_TYPES, DPO_PRECOMPUTE_LOGITS)))
+    @pytest.mark.parametrize("pre_compute_logits", DPO_PRECOMPUTE_LOGITS)
+    @pytest.mark.parametrize("loss_type", DPO_LOSS_TYPES)
+    @pytest.mark.parametrize("model_id", MODELS_TO_TEST)
     def test_dpo_bare_model(self, model_id, loss_type, pre_compute_logits):
         """
         A test that tests the simple usage of `DPOTrainer` using a bare model in full precision.
@@ -98,16 +98,10 @@ def test_dpo_bare_model(self, model_id, loss_type, pre_compute_logits):
 
         release_memory(model, trainer)
 
-    @parameterized.expand(
-        list(
-            itertools.product(
-                MODELS_TO_TEST,
-                DPO_LOSS_TYPES,
-                DPO_PRECOMPUTE_LOGITS,
-                GRADIENT_CHECKPOINTING_KWARGS,
-            )
-        )
-    )
+    @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS)
+    @pytest.mark.parametrize("pre_compute_logits", DPO_PRECOMPUTE_LOGITS)
+    @pytest.mark.parametrize("loss_type", DPO_LOSS_TYPES)
+    @pytest.mark.parametrize("model_id", MODELS_TO_TEST)
     @require_peft
     def test_dpo_peft_model(self, model_id, loss_type, pre_compute_logits, gradient_checkpointing_kwargs):
         """
@@ -160,16 +154,10 @@ def test_dpo_peft_model(self, model_id, loss_type, pre_compute_logits, gradient_
 
         release_memory(model, trainer)
 
-    @parameterized.expand(
-        list(
-            itertools.product(
-                MODELS_TO_TEST,
-                DPO_LOSS_TYPES,
-                DPO_PRECOMPUTE_LOGITS,
-                GRADIENT_CHECKPOINTING_KWARGS,
-            )
-        )
-    )
+    @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS)
+    @pytest.mark.parametrize("pre_compute_logits", DPO_PRECOMPUTE_LOGITS)
+    @pytest.mark.parametrize("loss_type", DPO_LOSS_TYPES)
+    @pytest.mark.parametrize("model_id", MODELS_TO_TEST)
     @require_bitsandbytes
     @require_peft
     def test_dpo_peft_model_qlora(self, model_id, loss_type, pre_compute_logits, gradient_checkpointing_kwargs):

diff --git a/tests/slow/test_grpo_slow.py b/tests/slow/test_grpo_slow.py
@@ -23,7 +23,6 @@
 from accelerate.utils.memory import release_memory
 from datasets import Dataset, Features, Image, Value, load_dataset
 from packaging.version import Version
-from parameterized import parameterized
 from transformers import (
     AutoModelForCausalLM,
     AutoModelForImageTextToText,
@@ -64,7 +63,7 @@ def teardown_method(self):
         backend_empty_cache(torch_device)
         gc.collect()
 
-    @parameterized.expand(MODELS_TO_TEST)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_liger_kernel
     def test_training_with_liger_grpo_loss(self, model_name):
         training_args = GRPOConfig(
@@ -104,7 +103,7 @@ def test_training_with_liger_grpo_loss(self, model_name):
 
         release_memory(model, trainer)
 
-    @parameterized.expand(MODELS_TO_TEST)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_liger_kernel
     @require_peft
     def test_training_with_liger_grpo_loss_and_peft(self, model_name):
@@ -168,7 +167,7 @@ def test_training_with_liger_grpo_loss_and_peft(self, model_name):
 
         release_memory(model, trainer)
 
-    @parameterized.expand(MODELS_TO_TEST)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     def test_training_with_transformers_paged(self, model_name):
         """Test that training works with transformers paged implementation (requires GPU)."""
         if Version(transformers.__version__) < Version("4.57.0"):
@@ -206,10 +205,11 @@ def test_training_with_transformers_paged(self, model_name):
 
         release_memory(model, trainer)
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "model_name",
         [
-            ("HuggingFaceTB/SmolVLM-Instruct",),  # Only test the smaller model to avoid OOM
-        ]
+            "HuggingFaceTB/SmolVLM-Instruct",  # Only test the smaller model to avoid OOM
+        ],
     )
     @require_flash_attn
     @require_bitsandbytes

diff --git a/tests/slow/test_sft_slow.py b/tests/slow/test_sft_slow.py
@@ -13,13 +13,11 @@
 # limitations under the License.
 
 import gc
-import itertools
 
 import pytest
 import torch
 from accelerate.utils.memory import release_memory
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from transformers.testing_utils import (
     backend_empty_cache,
@@ -61,7 +59,8 @@ def teardown_method(self):
         backend_empty_cache(torch_device)
         gc.collect()
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     def test_sft_trainer_str(self, model_name, packing):
         """
         Simply tests if passing a simple str to `SFTTrainer` loads and runs the trainer as expected.
@@ -85,7 +84,8 @@ def test_sft_trainer_str(self, model_name, packing):
 
         trainer.train()
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     def test_sft_trainer_transformers(self, model_name, packing):
         """
         Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer as expected.
@@ -115,7 +115,8 @@ def test_sft_trainer_transformers(self, model_name, packing):
 
         release_memory(model, trainer)
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_peft
     def test_sft_trainer_peft(self, model_name, packing):
         """
@@ -151,7 +152,8 @@ def test_sft_trainer_peft(self, model_name, packing):
 
         release_memory(model, trainer)
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     def test_sft_trainer_transformers_mp(self, model_name, packing):
         """
         Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer as expected in mixed
@@ -183,7 +185,9 @@ def test_sft_trainer_transformers_mp(self, model_name, packing):
 
         release_memory(model, trainer)
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS)))
+    @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS)
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     def test_sft_trainer_transformers_mp_gc(self, model_name, packing, gradient_checkpointing_kwargs):
         """
         Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer as expected in mixed
@@ -217,7 +221,9 @@ def test_sft_trainer_transformers_mp_gc(self, model_name, packing, gradient_chec
 
         release_memory(model, trainer)
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS)))
+    @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS)
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_peft
     def test_sft_trainer_transformers_mp_gc_peft(self, model_name, packing, gradient_checkpointing_kwargs):
         """
@@ -255,9 +261,10 @@ def test_sft_trainer_transformers_mp_gc_peft(self, model_name, packing, gradient
 
         release_memory(model, trainer)
 
-    @parameterized.expand(
-        list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS, DEVICE_MAP_OPTIONS))
-    )
+    @pytest.mark.parametrize("device_map", DEVICE_MAP_OPTIONS)
+    @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS)
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_torch_multi_accelerator
     def test_sft_trainer_transformers_mp_gc_device_map(
         self, model_name, packing, gradient_checkpointing_kwargs, device_map
@@ -294,7 +301,9 @@ def test_sft_trainer_transformers_mp_gc_device_map(
 
         release_memory(model, trainer)
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS)))
+    @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS)
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_peft
     @require_bitsandbytes
     def test_sft_trainer_transformers_mp_gc_peft_qlora(self, model_name, packing, gradient_checkpointing_kwargs):
@@ -335,7 +344,8 @@ def test_sft_trainer_transformers_mp_gc_peft_qlora(self, model_name, packing, gr
 
         release_memory(model, trainer)
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_peft
     @require_bitsandbytes
     def test_sft_trainer_with_chat_format_qlora(self, model_name, packing):
@@ -375,7 +385,8 @@ def test_sft_trainer_with_chat_format_qlora(self, model_name, packing):
 
         release_memory(model, trainer)
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_liger_kernel
     def test_sft_trainer_with_liger(self, model_name, packing):
         """
@@ -419,7 +430,8 @@ def cleanup_liger_patches(trainer):
         finally:
             cleanup_liger_patches(trainer)
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_torch_accelerator
     def test_train_offloading(self, model_name, packing):
         """Test that activation offloading works with SFTTrainer."""

diff --git a/tests/test_bco_trainer.py b/tests/test_bco_trainer.py
@@ -18,7 +18,6 @@
 import torch
 from accelerate import Accelerator
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer
 from transformers.utils import is_peft_available
 
@@ -33,15 +32,16 @@
 
 
 class TestBCOTrainer(TrlTestCase):
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "config_name",
         [
-            ("standard_preference",),
-            ("standard_implicit_prompt_preference",),
-            ("standard_unpaired_preference",),
-            ("conversational_preference",),
-            ("conversational_implicit_prompt_preference",),
-            ("conversational_unpaired_preference",),
-        ]
+            "standard_preference",
+            "standard_implicit_prompt_preference",
+            "standard_unpaired_preference",
+            "conversational_preference",
+            "conversational_implicit_prompt_preference",
+            "conversational_unpaired_preference",
+        ],
     )
     @require_sklearn
     def test_train(self, config_name):

diff --git a/tests/test_cpo_trainer.py b/tests/test_cpo_trainer.py
@@ -11,11 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-
+import pytest
 import torch
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
 
 from trl import CPOConfig, CPOTrainer
@@ -37,7 +35,8 @@ def setup_method(self):
         self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.t5_tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "name, loss_type, config_name",
         [
             ("qwen", "sigmoid", "standard_preference"),
             ("t5", "hinge", "standard_implicit_prompt_preference"),
@@ -46,7 +45,7 @@ def setup_method(self):
             ("qwen", "simpo", "standard_preference"),
             ("t5", "simpo", "standard_implicit_prompt_preference"),
             ("qwen", "hinge", "conversational_preference"),
-        ]
+        ],
     )
     def test_cpo_trainer(self, name, loss_type, config_name):
         training_args = CPOConfig(
@@ -93,13 +92,14 @@ def test_cpo_trainer(self, name, loss_type, config_name):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.equal(param, new_param)
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "config_name",
         [
-            ("standard_preference",),
-            ("standard_implicit_prompt_preference",),
-            ("conversational_preference",),
-            ("conversational_implicit_prompt_preference",),
-        ]
+            "standard_preference",
+            "standard_implicit_prompt_preference",
+            "conversational_preference",
+            "conversational_implicit_prompt_preference",
+        ],
     )
     @require_peft
     def test_cpo_trainer_with_lora(self, config_name):