diff --git a/.github/workflows/slow-tests.yml b/.github/workflows/slow-tests.yml
index a302ec16041..c506231123c 100644
--- a/.github/workflows/slow-tests.yml
+++ b/.github/workflows/slow-tests.yml
@@ -47,7 +47,7 @@ jobs:
         run: |
           source .venv/bin/activate
           uv pip install ".[dev]"
-          uv pip install pytest-reportlog parameterized
+          uv pip install pytest-reportlog
 
       - name: Run slow SFT tests on single GPU
         if: always()
@@ -95,7 +95,7 @@ jobs:
         run: |
           source .venv/bin/activate
           uv pip install ".[dev]"
-          uv pip install pytest-reportlog parameterized
+          uv pip install pytest-reportlog
 
       - name: Run slow SFT tests on Multi GPU
         if: always()
diff --git a/pyproject.toml b/pyproject.toml
index 787415bec79..c1a94413f64 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,7 +71,6 @@ scikit = [
     "scikit-learn"
 ]
 test = [
-    "parameterized",
     "pytest-cov",
     "pytest-rerunfailures==15.1",
     "pytest-xdist",
@@ -112,7 +111,6 @@ dev = [
     "bitsandbytes",
     # scikit: included in bco
     # test
-    "parameterized",
     "pytest-cov",
     "pytest-rerunfailures==15.1",
     "pytest-xdist",
diff --git a/tests/slow/test_dpo_slow.py b/tests/slow/test_dpo_slow.py
index 26feb388c6b..199f702bf23 100644
--- a/tests/slow/test_dpo_slow.py
+++ b/tests/slow/test_dpo_slow.py
@@ -13,13 +13,11 @@
 # limitations under the License.
 
 import gc
-import itertools
 
 import pytest
 import torch
 from accelerate.utils.memory import release_memory
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from transformers.testing_utils import backend_empty_cache, require_torch_accelerator, torch_device
 from transformers.utils import is_peft_available
@@ -54,7 +52,9 @@ def teardown_method(self):
         backend_empty_cache(torch_device)
         gc.collect()
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, DPO_LOSS_TYPES, DPO_PRECOMPUTE_LOGITS)))
+    @pytest.mark.parametrize("pre_compute_logits", DPO_PRECOMPUTE_LOGITS)
+    @pytest.mark.parametrize("loss_type", DPO_LOSS_TYPES)
+    @pytest.mark.parametrize("model_id", MODELS_TO_TEST)
     def test_dpo_bare_model(self, model_id, loss_type, pre_compute_logits):
         """
         A test that tests the simple usage of `DPOTrainer` using a bare model in full precision.
@@ -98,16 +98,10 @@ def test_dpo_bare_model(self, model_id, loss_type, pre_compute_logits):
 
         release_memory(model, trainer)
 
-    @parameterized.expand(
-        list(
-            itertools.product(
-                MODELS_TO_TEST,
-                DPO_LOSS_TYPES,
-                DPO_PRECOMPUTE_LOGITS,
-                GRADIENT_CHECKPOINTING_KWARGS,
-            )
-        )
-    )
+    @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS)
+    @pytest.mark.parametrize("pre_compute_logits", DPO_PRECOMPUTE_LOGITS)
+    @pytest.mark.parametrize("loss_type", DPO_LOSS_TYPES)
+    @pytest.mark.parametrize("model_id", MODELS_TO_TEST)
     @require_peft
     def test_dpo_peft_model(self, model_id, loss_type, pre_compute_logits, gradient_checkpointing_kwargs):
         """
@@ -160,16 +154,10 @@ def test_dpo_peft_model(self, model_id, loss_type, pre_compute_logits, gradient_
 
         release_memory(model, trainer)
 
-    @parameterized.expand(
-        list(
-            itertools.product(
-                MODELS_TO_TEST,
-                DPO_LOSS_TYPES,
-                DPO_PRECOMPUTE_LOGITS,
-                GRADIENT_CHECKPOINTING_KWARGS,
-            )
-        )
-    )
+    @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS)
+    @pytest.mark.parametrize("pre_compute_logits", DPO_PRECOMPUTE_LOGITS)
+    @pytest.mark.parametrize("loss_type", DPO_LOSS_TYPES)
+    @pytest.mark.parametrize("model_id", MODELS_TO_TEST)
     @require_bitsandbytes
     @require_peft
     def test_dpo_peft_model_qlora(self, model_id, loss_type, pre_compute_logits, gradient_checkpointing_kwargs):
diff --git a/tests/slow/test_grpo_slow.py b/tests/slow/test_grpo_slow.py
index 7ba974423ca..745dc869028 100644
--- a/tests/slow/test_grpo_slow.py
+++ b/tests/slow/test_grpo_slow.py
@@ -23,7 +23,6 @@
 from accelerate.utils.memory import release_memory
 from datasets import Dataset, Features, Image, Value, load_dataset
 from packaging.version import Version
-from parameterized import parameterized
 from transformers import (
     AutoModelForCausalLM,
     AutoModelForImageTextToText,
@@ -64,7 +63,7 @@ def teardown_method(self):
         backend_empty_cache(torch_device)
         gc.collect()
 
-    @parameterized.expand(MODELS_TO_TEST)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_liger_kernel
     def test_training_with_liger_grpo_loss(self, model_name):
         training_args = GRPOConfig(
@@ -104,7 +103,7 @@ def test_training_with_liger_grpo_loss(self, model_name):
 
         release_memory(model, trainer)
 
-    @parameterized.expand(MODELS_TO_TEST)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_liger_kernel
     @require_peft
     def test_training_with_liger_grpo_loss_and_peft(self, model_name):
@@ -168,7 +167,7 @@ def test_training_with_liger_grpo_loss_and_peft(self, model_name):
 
         release_memory(model, trainer)
 
-    @parameterized.expand(MODELS_TO_TEST)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     def test_training_with_transformers_paged(self, model_name):
         """Test that training works with transformers paged implementation (requires GPU)."""
         if Version(transformers.__version__) < Version("4.57.0"):
@@ -206,10 +205,11 @@ def test_training_with_transformers_paged(self, model_name):
 
         release_memory(model, trainer)
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "model_name",
         [
-            ("HuggingFaceTB/SmolVLM-Instruct",),  # Only test the smaller model to avoid OOM
-        ]
+            "HuggingFaceTB/SmolVLM-Instruct",  # Only test the smaller model to avoid OOM
+        ],
     )
     @require_flash_attn
     @require_bitsandbytes
diff --git a/tests/slow/test_sft_slow.py b/tests/slow/test_sft_slow.py
index 13d9c7ce635..979d61218c1 100755
--- a/tests/slow/test_sft_slow.py
+++ b/tests/slow/test_sft_slow.py
@@ -13,13 +13,11 @@
 # limitations under the License.
 
 import gc
-import itertools
 
 import pytest
 import torch
 from accelerate.utils.memory import release_memory
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from transformers.testing_utils import (
     backend_empty_cache,
@@ -61,7 +59,8 @@ def teardown_method(self):
         backend_empty_cache(torch_device)
         gc.collect()
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     def test_sft_trainer_str(self, model_name, packing):
         """
         Simply tests if passing a simple str to `SFTTrainer` loads and runs the trainer as expected.
@@ -85,7 +84,8 @@ def test_sft_trainer_str(self, model_name, packing):
 
         trainer.train()
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     def test_sft_trainer_transformers(self, model_name, packing):
         """
         Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer as expected.
@@ -115,7 +115,8 @@ def test_sft_trainer_transformers(self, model_name, packing):
 
         release_memory(model, trainer)
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_peft
     def test_sft_trainer_peft(self, model_name, packing):
         """
@@ -151,7 +152,8 @@ def test_sft_trainer_peft(self, model_name, packing):
 
         release_memory(model, trainer)
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     def test_sft_trainer_transformers_mp(self, model_name, packing):
         """
         Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer as expected in mixed
@@ -183,7 +185,9 @@ def test_sft_trainer_transformers_mp(self, model_name, packing):
 
         release_memory(model, trainer)
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS)))
+    @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS)
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     def test_sft_trainer_transformers_mp_gc(self, model_name, packing, gradient_checkpointing_kwargs):
         """
         Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer as expected in mixed
@@ -217,7 +221,9 @@ def test_sft_trainer_transformers_mp_gc(self, model_name, packing, gradient_chec
 
         release_memory(model, trainer)
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS)))
+    @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS)
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_peft
     def test_sft_trainer_transformers_mp_gc_peft(self, model_name, packing, gradient_checkpointing_kwargs):
         """
@@ -255,9 +261,10 @@ def test_sft_trainer_transformers_mp_gc_peft(self, model_name, packing, gradient
 
         release_memory(model, trainer)
 
-    @parameterized.expand(
-        list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS, DEVICE_MAP_OPTIONS))
-    )
+    @pytest.mark.parametrize("device_map", DEVICE_MAP_OPTIONS)
+    @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS)
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_torch_multi_accelerator
     def test_sft_trainer_transformers_mp_gc_device_map(
         self, model_name, packing, gradient_checkpointing_kwargs, device_map
@@ -294,7 +301,9 @@ def test_sft_trainer_transformers_mp_gc_device_map(
 
         release_memory(model, trainer)
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS)))
+    @pytest.mark.parametrize("gradient_checkpointing_kwargs", GRADIENT_CHECKPOINTING_KWARGS)
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_peft
     @require_bitsandbytes
     def test_sft_trainer_transformers_mp_gc_peft_qlora(self, model_name, packing, gradient_checkpointing_kwargs):
@@ -335,7 +344,8 @@ def test_sft_trainer_transformers_mp_gc_peft_qlora(self, model_name, packing, gr
 
         release_memory(model, trainer)
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_peft
     @require_bitsandbytes
     def test_sft_trainer_with_chat_format_qlora(self, model_name, packing):
@@ -375,7 +385,8 @@ def test_sft_trainer_with_chat_format_qlora(self, model_name, packing):
 
         release_memory(model, trainer)
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_liger_kernel
     def test_sft_trainer_with_liger(self, model_name, packing):
         """
@@ -419,7 +430,8 @@ def cleanup_liger_patches(trainer):
         finally:
             cleanup_liger_patches(trainer)
 
-    @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
+    @pytest.mark.parametrize("packing", PACKING_OPTIONS)
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST)
     @require_torch_accelerator
     def test_train_offloading(self, model_name, packing):
         """Test that activation offloading works with SFTTrainer."""
diff --git a/tests/test_bco_trainer.py b/tests/test_bco_trainer.py
index 7b7f0414438..79febde2884 100644
--- a/tests/test_bco_trainer.py
+++ b/tests/test_bco_trainer.py
@@ -18,7 +18,6 @@
 import torch
 from accelerate import Accelerator
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer
 from transformers.utils import is_peft_available
 
@@ -33,15 +32,16 @@
 
 
 class TestBCOTrainer(TrlTestCase):
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "config_name",
         [
-            ("standard_preference",),
-            ("standard_implicit_prompt_preference",),
-            ("standard_unpaired_preference",),
-            ("conversational_preference",),
-            ("conversational_implicit_prompt_preference",),
-            ("conversational_unpaired_preference",),
-        ]
+            "standard_preference",
+            "standard_implicit_prompt_preference",
+            "standard_unpaired_preference",
+            "conversational_preference",
+            "conversational_implicit_prompt_preference",
+            "conversational_unpaired_preference",
+        ],
     )
     @require_sklearn
     def test_train(self, config_name):
diff --git a/tests/test_cpo_trainer.py b/tests/test_cpo_trainer.py
index 56792f608dc..19833a414ff 100644
--- a/tests/test_cpo_trainer.py
+++ b/tests/test_cpo_trainer.py
@@ -11,11 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-
+import pytest
 import torch
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
 
 from trl import CPOConfig, CPOTrainer
@@ -37,7 +35,8 @@ def setup_method(self):
         self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.t5_tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "name, loss_type, config_name",
         [
             ("qwen", "sigmoid", "standard_preference"),
             ("t5", "hinge", "standard_implicit_prompt_preference"),
@@ -46,7 +45,7 @@ def setup_method(self):
             ("qwen", "simpo", "standard_preference"),
             ("t5", "simpo", "standard_implicit_prompt_preference"),
             ("qwen", "hinge", "conversational_preference"),
-        ]
+        ],
     )
     def test_cpo_trainer(self, name, loss_type, config_name):
         training_args = CPOConfig(
@@ -93,13 +92,14 @@ def test_cpo_trainer(self, name, loss_type, config_name):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.equal(param, new_param)
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "config_name",
         [
-            ("standard_preference",),
-            ("standard_implicit_prompt_preference",),
-            ("conversational_preference",),
-            ("conversational_implicit_prompt_preference",),
-        ]
+            "standard_preference",
+            "standard_implicit_prompt_preference",
+            "conversational_preference",
+            "conversational_implicit_prompt_preference",
+        ],
     )
     @require_peft
     def test_cpo_trainer_with_lora(self, config_name):
diff --git a/tests/test_data_utils.py b/tests/test_data_utils.py
index eff62650bb4..72a4d3e993f 100644
--- a/tests/test_data_utils.py
+++ b/tests/test_data_utils.py
@@ -13,12 +13,11 @@
 # limitations under the License.
 
 import copy
-import itertools
 import textwrap
 from time import strftime
 
+import pytest
 from datasets import Dataset, DatasetDict
-from parameterized import parameterized
 from transformers import AutoProcessor, AutoTokenizer
 
 from trl.data_utils import (
@@ -247,11 +246,11 @@ class TestIsConversational(TrlTestCase):
         {"prompt": "The sky is", "completion": " blue.", "label": True},
     ]
 
-    @parameterized.expand(itertools.product(conversational_examples))
+    @pytest.mark.parametrize("example", conversational_examples)
     def test_conversational(self, example):
         assert is_conversational(example)
 
-    @parameterized.expand(itertools.product(non_conversational_examples))
+    @pytest.mark.parametrize("example", non_conversational_examples)
     def test_non_conversational(self, example):
         assert not is_conversational(example)
 
@@ -345,7 +344,8 @@ class TestApplyChatTemplate(TrlTestCase):
         {"prompt": "The sky is", "completion": " blue.", "label": True},  # Unpaired preference
     ]
 
-    @parameterized.expand(itertools.product(tokenizers, conversational_examples))
+    @pytest.mark.parametrize("example", conversational_examples)
+    @pytest.mark.parametrize("tokenizer_id", tokenizers)
     def test_apply_chat_template(self, tokenizer_id, example):
         tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
         result = apply_chat_template(example, tokenizer)
@@ -371,7 +371,8 @@ def test_apply_chat_template(self, tokenizer_id, example):
             assert result["label"] == example["label"]
 
     # both conversational and non-conversational examples
-    @parameterized.expand(itertools.product(tokenizers, conversational_examples + non_conversational_examples))
+    @pytest.mark.parametrize("example", conversational_examples + non_conversational_examples)
+    @pytest.mark.parametrize("tokenizer_id", tokenizers)
     def test_maybe_apply_chat_template(self, tokenizer_id, example):
         tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
         result = maybe_apply_chat_template(example, tokenizer)
diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py
index e33eba91c3d..7fdc6091960 100644
--- a/tests/test_dpo_trainer.py
+++ b/tests/test_dpo_trainer.py
@@ -20,7 +20,6 @@
 import pytest
 import torch
 from datasets import Dataset, features, load_dataset
-from parameterized import parameterized
 from transformers import (
     AutoModelForCausalLM,
     AutoModelForImageTextToText,
@@ -192,22 +191,23 @@ def test_train(self):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12)
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "loss_type",
         [
-            ("sigmoid",),
-            ("hinge",),
-            ("ipo",),
-            ("exo_pair",),
-            ("nca_pair",),
-            ("robust",),
-            ("bco_pair",),
-            ("sppo_hard",),
-            ("aot",),
-            ("aot_pair",),
-            ("discopop",),
-            ("apo_zero",),
-            ("apo_down",),
-        ]
+            "sigmoid",
+            "hinge",
+            "ipo",
+            "exo_pair",
+            "nca_pair",
+            "robust",
+            "bco_pair",
+            "sppo_hard",
+            "aot",
+            "aot_pair",
+            "discopop",
+            "apo_zero",
+            "apo_down",
+        ],
     )
     def test_train_loss_types(self, loss_type):
         model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
@@ -345,7 +345,7 @@ def test_wrong_loss_weights_length(self):
                 loss_weights=[1.0, 0.5, 0.1],  # Wrong length
             )
 
-    @parameterized.expand([(None,), (0.5,)])
+    @pytest.mark.parametrize("rpo_alpha", [None, 0.5])
     def test_dpo_trainer_without_providing_ref_model(self, rpo_alpha):
         training_args = DPOConfig(
             output_dir=self.tmp_dir,
@@ -692,7 +692,8 @@ def test_dpo_lora_bf16_autocast_llama(self):
         # save peft adapter
         trainer.save_model()
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "loss_type, pre_compute, gen_during_eval",
         [
             ("sigmoid", False, False),
             ("sigmoid", False, True),
@@ -718,7 +719,7 @@ def test_dpo_lora_bf16_autocast_llama(self):
             ("robust", False, True),
             ("robust", True, False),
             ("robust", True, True),
-        ]
+        ],
     )
     @require_bitsandbytes
     @require_peft
@@ -1297,7 +1298,8 @@ def test_train_with_length_desensitization(self):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12)
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "beta, loss_type",
         [
             (0.1, "sigmoid"),
             (0.1, "apo_zero"),
@@ -1309,7 +1311,7 @@ def test_train_with_length_desensitization(self):
             (0.5, "apo_down"),
             (0.5, "sppo_hard"),
             (0.5, "nca_pair"),
-        ]
+        ],
     )
     @require_liger_kernel
     @pytest.mark.skipif(not (sys.version_info >= (3, 10)), reason="Liger kernel is not supported on Python 3.9")
@@ -1416,14 +1418,15 @@ def test_train_with_iterable_dataset(self):
 
 @require_vision
 class TestDPOVisionTrainer(TrlTestCase):
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "model_id",
         [
-            # ("trl-internal-testing/tiny-Idefics2ForConditionalGeneration",),  device issue from transformers, see https://github.com/huggingface/transformers/pull/39975
-            # ("trl-internal-testing/tiny-PaliGemmaForConditionalGeneration",),
-            ("trl-internal-testing/tiny-LlavaForConditionalGeneration",),
-            ("trl-internal-testing/tiny-LlavaNextForConditionalGeneration",),
-            ("trl-internal-testing/tiny-Gemma3ForConditionalGeneration",),
-        ]
+            # "trl-internal-testing/tiny-Idefics2ForConditionalGeneration",  device issue from transformers, see https://github.com/huggingface/transformers/pull/39975
+            # "trl-internal-testing/tiny-PaliGemmaForConditionalGeneration",
+            "trl-internal-testing/tiny-LlavaForConditionalGeneration",
+            "trl-internal-testing/tiny-LlavaNextForConditionalGeneration",
+            "trl-internal-testing/tiny-Gemma3ForConditionalGeneration",
+        ],
     )
     def test_vdpo_trainer(self, model_id):
         # fmt: off
@@ -1509,7 +1512,8 @@ def test_vdpo_trainer(self, model_id):
 
 
 class TestDPOConfig(TrlTestCase):
-    @parameterized.expand([(f_div_type, as_str) for f_div_type in list(FDivergenceType) for as_str in [False, True]])
+    @pytest.mark.parametrize("as_string", [False, True])
+    @pytest.mark.parametrize("f_divergence_type", list(FDivergenceType))
     def test_f_divergence_type(self, f_divergence_type, as_string: bool):
         training_args = DPOConfig(
             output_dir=self.tmp_dir,
diff --git a/tests/test_grpo_trainer.py b/tests/test_grpo_trainer.py
index a89c5f9bac0..d27a4b1d415 100644
--- a/tests/test_grpo_trainer.py
+++ b/tests/test_grpo_trainer.py
@@ -17,7 +17,6 @@
 import pytest
 import torch
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import (
     AutoModelForCausalLM,
     AutoModelForImageTextToText,
@@ -124,7 +123,7 @@ def test_init_minimal(self):
             train_dataset=dataset,
         )
 
-    @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)])
+    @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"])
     def test_training(self, config_name):
         dataset = load_dataset("trl-internal-testing/zen", config_name, split="train")
 
@@ -154,7 +153,7 @@ def test_training(self, config_name):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    @parameterized.expand([("bnpo",), ("dr_grpo",), ("dapo",)])
+    @pytest.mark.parametrize("loss_type", ["bnpo", "dr_grpo", "dapo"])
     def test_training_loss_types(self, loss_type):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -907,7 +906,7 @@ def test_training_vllm_with_additional_generation_kwargs(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    @parameterized.expand([(False,), ("group",), ("batch",), (True,), ("none",)])
+    @pytest.mark.parametrize("scale_rewards", [False, "group", "batch", True, "none"])
     def test_training_scale_rewards(self, scale_rewards):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -1251,14 +1250,15 @@ def test_prepare_input_called_with_correct_data(self):
             for i in range(8, 16):
                 assert mock_prepare.call_args_list[i].args[1] == expected_second_generation_batch
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "model_id",
         [
-            ("trl-internal-testing/tiny-Gemma3ForConditionalGeneration",),
-            ("trl-internal-testing/tiny-LlavaNextForConditionalGeneration",),
-            ("trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration",),
-            ("trl-internal-testing/tiny-Qwen2VLForConditionalGeneration",),
-            # ("trl-internal-testing/tiny-SmolVLMForConditionalGeneration",), seems not to support bf16 properly
-        ]
+            "trl-internal-testing/tiny-Gemma3ForConditionalGeneration",
+            "trl-internal-testing/tiny-LlavaNextForConditionalGeneration",
+            "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration",
+            "trl-internal-testing/tiny-Qwen2VLForConditionalGeneration",
+            # "trl-internal-testing/tiny-SmolVLMForConditionalGeneration", seems not to support bf16 properly
+        ],
     )
     @require_vision
     def test_training_vlm(self, model_id):
@@ -1471,11 +1471,12 @@ def reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "model_id",
         [
-            ("trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration",),
-            ("trl-internal-testing/tiny-Gemma3ForConditionalGeneration",),
-        ]
+            "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration",
+            "trl-internal-testing/tiny-Gemma3ForConditionalGeneration",
+        ],
     )
     @require_vision
     @require_vllm
diff --git a/tests/test_kto_trainer.py b/tests/test_kto_trainer.py
index e2c325149f2..eee6366bf15 100644
--- a/tests/test_kto_trainer.py
+++ b/tests/test_kto_trainer.py
@@ -16,7 +16,6 @@
 import pytest
 import torch
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
 from transformers.testing_utils import require_liger_kernel
 
@@ -40,7 +39,8 @@ def setup_method(self):
         self.t5_ref_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
         self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "name, config_name, loss_type, pre_compute, eval_dataset",
         [
             ("qwen", "standard_preference", "kto", True, True),
             # ("t5", "standard_implicit_prompt_preference", "kto", True, False), # KTO broken for enc-dec
@@ -50,7 +50,7 @@ def setup_method(self):
             # ("t5", "conversational_unpaired_preference", "apo_zero_unpaired", True, False),
             ("qwen", "standard_unpaired_preference", "apo_zero_unpaired", False, True),
             # ("t5", "conversational_unpaired_preference", "apo_zero_unpaired", False, False),
-        ]
+        ],
     )
     def test_kto_trainer(self, name, config_name, loss_type, pre_compute, eval_dataset):
         training_args = KTOConfig(
diff --git a/tests/test_modeling_value_head.py b/tests/test_modeling_value_head.py
index fd9d4ff0b3f..a8a462504d7 100644
--- a/tests/test_modeling_value_head.py
+++ b/tests/test_modeling_value_head.py
@@ -18,7 +18,6 @@
 import torch
 import transformers
 from packaging import version
-from parameterized import parameterized
 from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, GenerationConfig
 
 from trl import AutoModelForCausalLMWithValueHead, AutoModelForSeq2SeqLMWithValueHead, create_reference_model
@@ -304,7 +303,7 @@ def test_dropout_kwargs(self):
             # Check if v head of the model has the same dropout as the config
             assert model.v_head.dropout.p == 0.5
 
-    @parameterized.expand(ALL_CAUSAL_LM_MODELS)
+    @pytest.mark.parametrize("model_name", ALL_CAUSAL_LM_MODELS)
     def test_generate(self, model_name):
         r"""
         Test if `generate` works for every model
@@ -436,7 +435,7 @@ def test_dropout_kwargs(self):
             # Check if v head of the model has the same dropout as the config
             assert model.v_head.dropout.p == 0.5
 
-    @parameterized.expand(ALL_SEQ2SEQ_MODELS)
+    @pytest.mark.parametrize("model_name", ALL_SEQ2SEQ_MODELS)
     def test_generate(self, model_name):
         r"""
         Test if `generate` works for every model
diff --git a/tests/test_nash_md_trainer.py b/tests/test_nash_md_trainer.py
index d6026e73443..936db5bbdf5 100644
--- a/tests/test_nash_md_trainer.py
+++ b/tests/test_nash_md_trainer.py
@@ -11,10 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-
+import pytest
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer
 from transformers.utils import is_peft_available
 
@@ -36,7 +34,7 @@ def setup_method(self):
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
         self.tokenizer.pad_token = self.tokenizer.eos_token
 
-    @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)])
+    @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"])
     def test_nash_md_trainer_training(self, config_name):
         training_args = NashMDConfig(
             output_dir=self.tmp_dir,
@@ -184,7 +182,7 @@ def test_training_pre_pefted_model_implicit_ref_with_reward_model(self):
 
         assert "train_loss" in trainer.state.log_history[-1]
 
-    @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)])
+    @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"])
     @require_llm_blender
     def test_nash_md_trainer_judge_training(self, config_name):
         training_args = NashMDConfig(
diff --git a/tests/test_online_dpo_trainer.py b/tests/test_online_dpo_trainer.py
index f8706770371..b742bec8d1a 100644
--- a/tests/test_online_dpo_trainer.py
+++ b/tests/test_online_dpo_trainer.py
@@ -16,7 +16,6 @@
 import transformers
 from datasets import Dataset, features, load_dataset
 from packaging.version import Version
-from parameterized import parameterized
 from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer
 from transformers.testing_utils import require_torch_accelerator
 from transformers.utils import is_peft_available, is_vision_available
@@ -55,7 +54,7 @@ def setup_method(self):
         self.reward_tokenizer = AutoTokenizer.from_pretrained(self.reward_model_id)
         self.reward_tokenizer.pad_token = self.reward_tokenizer.eos_token
 
-    @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)])
+    @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"])
     def test_training(self, config_name):
         training_args = OnlineDPOConfig(
             output_dir=self.tmp_dir,
@@ -244,7 +243,7 @@ def test_training_with_peft_model_and_peft_config(self):
         # Check if training loss is available
         assert "train_loss" in trainer.state.log_history[-1]
 
-    @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)])
+    @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"])
     @require_llm_blender
     def test_training_with_judge(self, config_name):
         training_args = OnlineDPOConfig(
@@ -270,7 +269,7 @@ def test_training_with_judge(self, config_name):
         # Check if training loss is available
         assert "train_loss" in trainer.state.log_history[-1]
 
-    @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)])
+    @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"])
     @require_torch_accelerator
     @require_vllm
     @pytest.mark.slow
@@ -425,7 +424,7 @@ def test_generation_config_setup(self):
         assert trainer.generation_config.max_new_tokens == 64
         assert not trainer.generation_config.do_sample  # From generation_kwargs
 
-    @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)])
+    @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"])
     @require_torch_accelerator
     def test_training_with_transformers_paged(self, config_name):
         if Version(transformers.__version__) < Version("4.57.0"):
@@ -455,7 +454,7 @@ def test_training_with_transformers_paged(self, config_name):
         # Check if training loss is available
         assert "train_loss" in trainer.state.log_history[-1]
 
-    @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)])
+    @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"])
     def test_training_with_reward_funcs(self, config_name):
         def simple_reward_func(prompts, completions, completion_ids, **kwargs):
             return [0.5 for _ in prompts]
@@ -490,11 +489,12 @@ def simple_reward_func(prompts, completions, completion_ids, **kwargs):
 
 @require_vision
 class TestOnlineDPOVisionTrainer(TrlTestCase):
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "model_id",
         [
-            ("trl-internal-testing/tiny-Idefics2ForConditionalGeneration",),
-            ("trl-internal-testing/tiny-LlavaForConditionalGeneration",),
-        ]
+            "trl-internal-testing/tiny-Idefics2ForConditionalGeneration",
+            "trl-internal-testing/tiny-LlavaForConditionalGeneration",
+        ],
     )
     def test_online_dpo_vlm_trainer(self, model_id):
         dataset_dict = {
diff --git a/tests/test_orpo_trainer.py b/tests/test_orpo_trainer.py
index dedfc4c36c9..f882cf756f8 100644
--- a/tests/test_orpo_trainer.py
+++ b/tests/test_orpo_trainer.py
@@ -11,11 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-
+import pytest
 import torch
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
 
 from trl import ORPOConfig, ORPOTrainer
@@ -37,13 +35,14 @@ def setup_method(self):
         self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.t5_tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "name, config_name",
         [
             ("qwen", "standard_preference"),
             ("t5", "standard_implicit_prompt_preference"),
             ("qwen", "conversational_preference"),
             ("t5", "conversational_implicit_prompt_preference"),
-        ]
+        ],
     )
     def test_orpo_trainer(self, name, config_name):
         training_args = ORPOConfig(
@@ -88,13 +87,14 @@ def test_orpo_trainer(self, name, config_name):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.equal(param, new_param)
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "config_name",
         [
-            ("standard_preference",),
-            ("standard_implicit_prompt_preference",),
-            ("conversational_preference",),
-            ("conversational_implicit_prompt_preference",),
-        ]
+            "standard_preference",
+            "standard_implicit_prompt_preference",
+            "conversational_preference",
+            "conversational_implicit_prompt_preference",
+        ],
     )
     @require_peft
     def test_orpo_trainer_with_lora(self, config_name):
diff --git a/tests/test_prm_trainer.py b/tests/test_prm_trainer.py
index 16876c6df62..963df7b30f7 100644
--- a/tests/test_prm_trainer.py
+++ b/tests/test_prm_trainer.py
@@ -14,9 +14,9 @@
 
 from unittest.mock import MagicMock
 
+import pytest
 import torch
 from datasets import Dataset, load_dataset
-from parameterized import parameterized
 from transformers import AutoModelForTokenClassification, AutoTokenizer, PreTrainedTokenizerBase
 from transformers.utils import is_peft_available
 
@@ -209,7 +209,7 @@ def setup_method(self):
         self.model = AutoModelForTokenClassification.from_pretrained(model_id)
         self.tokenizer = AutoTokenizer.from_pretrained(model_id)
 
-    @parameterized.expand([True, False])
+    @pytest.mark.parametrize("train_on_last_step_only", [True, False])
     def test_train_full(self, train_on_last_step_only):
         dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_stepwise_supervision", split="train")
         training_args = PRMConfig(
diff --git a/tests/test_reward_trainer.py b/tests/test_reward_trainer.py
index ab6d6656e99..e12ce18355d 100644
--- a/tests/test_reward_trainer.py
+++ b/tests/test_reward_trainer.py
@@ -17,7 +17,6 @@
 import pytest
 import torch
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 from transformers.utils import is_peft_available
 
@@ -108,12 +107,13 @@ def test_collate_with_margin(self):
 
 
 class TestRewardTrainer(TrlTestCase):
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "model_id",
         [
-            ("trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",),
-            ("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification",),
-            ("trl-internal-testing/tiny-LlamaForSequenceClassification-3.2",),
-        ]
+            "trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
+            "trl-internal-testing/tiny-Qwen3MoeForSequenceClassification",
+            "trl-internal-testing/tiny-LlamaForSequenceClassification-3.2",
+        ],
     )
     def test_train(self, model_id):
         # Get the dataset
@@ -137,13 +137,14 @@ def test_train(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "config_name",
         [
-            ("standard_preference",),
-            ("conversational_preference",),
-            ("standard_implicit_prompt_preference",),
-            ("conversational_implicit_prompt_preference",),
-        ]
+            "standard_preference",
+            "conversational_preference",
+            "standard_implicit_prompt_preference",
+            "conversational_implicit_prompt_preference",
+        ],
     )
     def test_train_dataset_types(self, config_name):
         # Get the dataset
diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py
index c02f199e1c3..476fcfb0e72 100644
--- a/tests/test_rloo_trainer.py
+++ b/tests/test_rloo_trainer.py
@@ -17,7 +17,6 @@
 import pytest
 import torch
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import (
     AutoModelForCausalLM,
     AutoModelForImageTextToText,
@@ -45,7 +44,7 @@ def test_init_minimal(self):
             train_dataset=dataset,
         )
 
-    @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)])
+    @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"])
     def test_training(self, config_name):
         dataset = load_dataset("trl-internal-testing/zen", config_name, split="train")
 
@@ -1075,14 +1074,15 @@ def test_prepare_input_called_with_correct_data(self):
             for i in range(8, 16):
                 assert mock_prepare.call_args_list[i].args[1] == expected_second_generation_batch
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "model_id",
         [
-            ("trl-internal-testing/tiny-Gemma3ForConditionalGeneration",),
-            ("trl-internal-testing/tiny-LlavaNextForConditionalGeneration",),
-            ("trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration",),
-            ("trl-internal-testing/tiny-Qwen2VLForConditionalGeneration",),
-            # ("trl-internal-testing/tiny-SmolVLMForConditionalGeneration",), seems not to support bf16 properly
-        ]
+            "trl-internal-testing/tiny-Gemma3ForConditionalGeneration",
+            "trl-internal-testing/tiny-LlavaNextForConditionalGeneration",
+            "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration",
+            "trl-internal-testing/tiny-Qwen2VLForConditionalGeneration",
+            # "trl-internal-testing/tiny-SmolVLMForConditionalGeneration", seems not to support bf16 properly
+        ],
     )
     @require_vision
     def test_training_vlm(self, model_id):
@@ -1212,11 +1212,12 @@ def reward_func(completions, **kwargs):
             elif "base_layer" not in n:  # We expect the peft params to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed."
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "model_id",
         [
-            ("trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration",),
-            ("trl-internal-testing/tiny-Gemma3ForConditionalGeneration",),
-        ]
+            "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration",
+            "trl-internal-testing/tiny-Gemma3ForConditionalGeneration",
+        ],
     )
     @require_vision
     @require_vllm
diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py
index 87e47b911d2..21ff5386190 100644
--- a/tests/test_sft_trainer.py
+++ b/tests/test_sft_trainer.py
@@ -20,7 +20,6 @@
 import transformers
 from datasets import load_dataset
 from packaging.version import parse as parse_version
-from parameterized import parameterized
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from transformers.testing_utils import require_flash_attn, require_liger_kernel
 from transformers.utils import is_peft_available
@@ -250,12 +249,13 @@ def test_multiple_examples(self):
 
 
 class TestSFTTrainer(TrlTestCase):
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "model_id",
         [
-            ("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",),
-            ("trl-internal-testing/tiny-Qwen3MoeForCausalLM",),
-            ("trl-internal-testing/tiny-GptOssForCausalLM",),
-        ]
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            "trl-internal-testing/tiny-Qwen3MoeForCausalLM",
+            "trl-internal-testing/tiny-GptOssForCausalLM",
+        ],
     )
     def test_train(self, model_id):
         # Get the dataset
@@ -497,12 +497,13 @@ def test_train_dense_with_peft_config_lora(self):
             elif "base_layer" not in n:  # We expect the peft parameters to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "peft_type",
         [
-            ("prompt_tuning",),
-            ("prefix_tuning",),
-            ("prompt_encoder",),
-        ]
+            "prompt_tuning",
+            "prefix_tuning",
+            "prompt_encoder",
+        ],
     )
     @require_peft
     def test_train_with_peft_config_prompt_tuning(self, peft_type):
@@ -881,7 +882,7 @@ def test_train_padding_free(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @parameterized.expand([("bfd",), ("wrapped",)])
+    @pytest.mark.parametrize("packing_strategy", ["bfd", "wrapped"])
     @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning)
     @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning)
     def test_train_packing(self, packing_strategy):
@@ -1321,17 +1322,18 @@ def test_tag_added_peft(self):
         for tag in ["sft", "trl"]:
             assert tag in trainer.model.model_tags
 
-    @parameterized.expand(
+    @pytest.mark.parametrize(
+        "model_id",
         [
-            ("trl-internal-testing/tiny-Gemma3ForConditionalGeneration",),
-            # ("trl-internal-testing/tiny-Idefics2ForConditionalGeneration",),  device issue from transformers, see https://github.com/huggingface/transformers/pull/39975
-            # ("trl-internal-testing/tiny-Idefics3ForConditionalGeneration",),  device issue from transformers, see https://github.com/huggingface/transformers/pull/39975
-            ("trl-internal-testing/tiny-LlavaForConditionalGeneration",),
-            ("trl-internal-testing/tiny-LlavaNextForConditionalGeneration",),
-            ("trl-internal-testing/tiny-Qwen2VLForConditionalGeneration",),
-            ("trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration",),
-            # ("trl-internal-testing/tiny-SmolVLMForConditionalGeneration",),  device issue from transformers, see https://github.com/huggingface/transformers/pull/39975
-        ]
+            "trl-internal-testing/tiny-Gemma3ForConditionalGeneration",
+            # "trl-internal-testing/tiny-Idefics2ForConditionalGeneration",  device issue from transformers, see https://github.com/huggingface/transformers/pull/39975
+            # "trl-internal-testing/tiny-Idefics3ForConditionalGeneration",  device issue from transformers, see https://github.com/huggingface/transformers/pull/39975
+            "trl-internal-testing/tiny-LlavaForConditionalGeneration",
+            "trl-internal-testing/tiny-LlavaNextForConditionalGeneration",
+            "trl-internal-testing/tiny-Qwen2VLForConditionalGeneration",
+            "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration",
+            # "trl-internal-testing/tiny-SmolVLMForConditionalGeneration",  device issue from transformers, see https://github.com/huggingface/transformers/pull/39975
+        ],
     )
     @require_vision
     def test_train_vlm(self, model_id):
diff --git a/tests/test_trainers_args.py b/tests/test_trainers_args.py
index b76110d5f17..2005b54337c 100644
--- a/tests/test_trainers_args.py
+++ b/tests/test_trainers_args.py
@@ -11,10 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-
+import pytest
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer
 
 from trl import (
@@ -246,7 +244,7 @@ def test_kto(self):
         assert trainer.args.ref_model_init_kwargs == {"trust_remote_code": True}
         assert trainer.args.dataset_num_proc == 4
 
-    @parameterized.expand([(False,), (True,)])
+    @pytest.mark.parametrize("mixtures_coef_list", [False, True])
     def test_nash_md(self, mixtures_coef_list):
         model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -268,7 +266,7 @@ def test_nash_md(self, mixtures_coef_list):
         )
         assert trainer.args.mixture_coef == (0.5 if not mixtures_coef_list else [0.5, 0.6])
 
-    @parameterized.expand([(False,), (True,)])
+    @pytest.mark.parametrize("beta_list", [False, True])
     def test_online_dpo(self, beta_list):
         model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -372,7 +370,7 @@ def test_sft(self):
         assert trainer.args.dataset_kwargs["append_concat_token"]
         assert trainer.args.eval_packing
 
-    @parameterized.expand([(False,), (True,)])
+    @pytest.mark.parametrize("alpha_list", [False, True])
     def test_xpo(self, alpha_list):
         model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         tokenizer = AutoTokenizer.from_pretrained(model_id)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 60d9b9dcefb..b9590ebaddd 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -20,7 +20,6 @@
 import pytest
 import torch
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 from transformers.utils import is_peft_available
 
@@ -629,14 +628,9 @@ def test_sampler_with_mini_repeat_count_and_batch_size_3(self):
 
 
 class TestEntropyFromLogits(TrlTestCase):
-    @parameterized.expand(
-        [
-            (dtype, chunk_size, shape)
-            for dtype in (torch.float64, torch.float32, torch.float16, torch.bfloat16)
-            for chunk_size in (1, 16)
-            for shape in [(768,), (32, 768), (8, 16, 768), (2, 4, 8, 768)]
-        ]
-    )
+    @pytest.mark.parametrize("shape", [(768,), (32, 768), (8, 16, 768), (2, 4, 8, 768)])
+    @pytest.mark.parametrize("chunk_size", [1, 16])
+    @pytest.mark.parametrize("dtype", [torch.float64, torch.float32, torch.float16, torch.bfloat16])
     def test_entropy_from_logits_2_dims(self, dtype, chunk_size, shape):
         logits = torch.randn(*shape, dtype=dtype)
         if dtype in (torch.float64, torch.float32):
@@ -803,7 +797,7 @@ def test_print_messages_with_tools(self, mock_stdout):
 
 
 class TestSelectiveLogSoftmax(TrlTestCase):
-    @parameterized.expand([(torch.float64,), (torch.float32,), (torch.float16,), (torch.bfloat16,)])
+    @pytest.mark.parametrize("dtype", [torch.float64, torch.float32, torch.float16, torch.bfloat16])
     def test_selective_log_softmax(self, dtype):
         """Test selective_log_softmax with logits of different dtypes"""
         vocab_size = 1024
diff --git a/tests/test_xpo_trainer.py b/tests/test_xpo_trainer.py
index 4d41471187c..0c6f3ab02bf 100644
--- a/tests/test_xpo_trainer.py
+++ b/tests/test_xpo_trainer.py
@@ -11,10 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-
+import pytest
 from datasets import load_dataset
-from parameterized import parameterized
 from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer
 from transformers.utils import is_peft_available
 
@@ -36,7 +34,7 @@ def setup_method(self):
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
         self.tokenizer.pad_token = self.tokenizer.eos_token
 
-    @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)])
+    @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"])
     def test_xpo_trainer_training(self, config_name):
         training_args = XPOConfig(
             output_dir=self.tmp_dir,
@@ -182,7 +180,7 @@ def test_training_pre_pefted_model_implicit_ref(self):
 
         assert "train_loss" in trainer.state.log_history[-1]
 
-    @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)])
+    @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"])
     @require_llm_blender
     def test_xpo_trainer_judge_training(self, config_name):
         training_args = XPOConfig(