From 8126245bf35a1f34576de8308d9a702a9287e236 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 23 Oct 2025 14:49:17 +0200
Subject: [PATCH 01/18] Use explicit tiny-Qwen2ForCausalLM-2.5 model_id param
 in SFT tests

---
 tests/test_sft_trainer.py | 288 ++++++++++++++++++++++++++++----------
 1 file changed, 211 insertions(+), 77 deletions(-)

diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py
index 4416d9d5c9a..b4d4cd9866a 100644
--- a/tests/test_sft_trainer.py
+++ b/tests/test_sft_trainer.py
@@ -311,9 +311,15 @@ def test_train_gpt_oss(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    def test_train_model(self):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def test_train_model(self, model_id):
         # Instantiate the model
-        model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+        model = AutoModelForCausalLM.from_pretrained(model_id)
 
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
@@ -336,7 +342,13 @@ def test_train_model(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    def test_train_dft_loss(self):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def test_train_dft_loss(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling")
 
@@ -352,7 +364,7 @@ def test_train_dft_loss(self):
             eval_steps=3,
         )
         trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             args=training_args,
             train_dataset=dataset["train"],
             eval_dataset=dataset["test"],
@@ -400,7 +412,13 @@ def test_train_moe_model_with_aux_loss(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    def test_train_with_formatting_func(self):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def test_train_with_formatting_func(self, model_id):
         # Dummy formatting function
         def formatting_prompts_func(example):
             chosen, rejected = example["chosen"], example["rejected"]
@@ -412,7 +430,7 @@ def formatting_prompts_func(example):
         # Initialize the trainer
         training_args = SFTConfig(output_dir=self.tmp_dir, report_to="none")
         trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             args=training_args,
             train_dataset=dataset,
             formatting_func=formatting_prompts_func,
@@ -432,7 +450,13 @@ def formatting_prompts_func(example):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    def test_train_model_dtype(self):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def test_train_model_dtype(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
 
@@ -443,9 +467,7 @@ def test_train_model_dtype(self):
             learning_rate=0.1,
             report_to="none",
         )
-        trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset
-        )
+        trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset)
 
         # Save the initial parameters to compare them later
         previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()}
@@ -467,10 +489,15 @@ def test_train_model_dtype(self):
             assert new_param.dtype == torch.float16
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
-    def test_train_dense_with_peft_config_lora(self):
+    def test_train_dense_with_peft_config_lora(self, model_id):
         # Get the base model parameter names
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         model = AutoModelForCausalLM.from_pretrained(model_id)
         base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]
 
@@ -504,6 +531,12 @@ def test_train_dense_with_peft_config_lora(self):
             elif "base_layer" not in n:  # We expect the peft parameters to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @pytest.mark.parametrize(
         "peft_type",
         [
@@ -513,9 +546,8 @@ def test_train_dense_with_peft_config_lora(self):
         ],
     )
     @require_peft
-    def test_train_with_peft_config_prompt_tuning(self, peft_type):
+    def test_train_with_peft_config_prompt_tuning(self, peft_type, model_id):
         # Get the base model parameter names
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         model = AutoModelForCausalLM.from_pretrained(model_id)
         base_param_names = [f"base_model.{n}" for n, _ in model.named_parameters()]
 
@@ -528,7 +560,7 @@ def test_train_with_peft_config_prompt_tuning(self, peft_type):
             peft_config = PromptTuningConfig(
                 task_type=TaskType.CAUSAL_LM,
                 num_virtual_tokens=4,
-                tokenizer_name_or_path="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+                tokenizer_name_or_path=model_id,
             )
         elif peft_type == "prefix_tuning":
             peft_config = PrefixTuningConfig(
@@ -602,10 +634,15 @@ def test_train_moe_with_peft_config(self):
             elif "base_layer" not in n:  # We expect the peft parameters to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
-    def test_train_peft_model(self):
+    def test_train_peft_model(self, model_id):
         # Get the base model
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         model = AutoModelForCausalLM.from_pretrained(model_id)
 
         # Get the base model parameter names
@@ -639,10 +676,15 @@ def test_train_peft_model(self):
             elif "base_layer" not in n:  # We expect the peft parameters to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
-    def test_train_dense_with_peft_config_and_gradient_checkpointing(self):
+    def test_train_dense_with_peft_config_and_gradient_checkpointing(self, model_id):
         # Get the base model parameter names
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         model = AutoModelForCausalLM.from_pretrained(model_id)
         base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]
 
@@ -713,10 +755,15 @@ def test_train_moe_with_peft_config_and_gradient_checkpointing(self):
             elif "base_layer" not in n:  # We expect the peft parameters to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
-    def test_train_with_peft_model_and_gradient_checkpointing(self):
+    def test_train_with_peft_model_and_gradient_checkpointing(self, model_id):
         # Get the base model parameter names
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         model = AutoModelForCausalLM.from_pretrained(model_id)
         base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]
         model = get_peft_model(model, LoraConfig())
@@ -749,16 +796,20 @@ def test_train_with_peft_model_and_gradient_checkpointing(self):
             elif "base_layer" not in n:  # We expect the peft parameters to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_liger_kernel
-    def test_train_with_liger(self):
+    def test_train_with_liger(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
 
         # Initialize the trainer
         training_args = SFTConfig(output_dir=self.tmp_dir, use_liger_kernel=True, report_to="none")
-        trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset
-        )
+        trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset)
 
         # Save the initial parameters to compare them later
         previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()}
@@ -774,7 +825,13 @@ def test_train_with_liger(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    def test_train_with_non_chatml_conversational_data(self):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def test_train_with_non_chatml_conversational_data(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "conversational_language_modeling", split="train")
 
@@ -786,9 +843,7 @@ def rename_fields(example: list[dict]):
 
         # Initialize the trainer
         training_args = SFTConfig(output_dir=self.tmp_dir, report_to="none")
-        trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset
-        )
+        trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset)
 
         # Save the initial parameters to compare them later
         previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()}
@@ -804,9 +859,14 @@ def rename_fields(example: list[dict]):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    def test_train_with_pretokenized_data(self):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def test_train_with_pretokenized_data(self, model_id):
         # Get the dataset
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
 
@@ -834,15 +894,19 @@ def tokenize_example(example):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    def test_train_with_iterable_dataset(self):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def test_train_with_iterable_dataset(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train", streaming=True)
 
         # Initialize the trainer
         training_args = SFTConfig(output_dir=self.tmp_dir, max_steps=3, report_to="none")
-        trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset
-        )
+        trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset)
 
         # Save the initial parameters to compare them later
         previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()}
@@ -858,8 +922,14 @@ def test_train_with_iterable_dataset(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_flash_attn
-    def test_train_padding_free(self):
+    def test_train_padding_free(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
 
@@ -871,9 +941,7 @@ def test_train_padding_free(self):
             bf16=True,  # flash_attention_2 only supports bf16 and fp16
             report_to="none",
         )
-        trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset
-        )
+        trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset)
 
         # Save the initial parameters to compare them later
         previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()}
@@ -889,10 +957,16 @@ def test_train_padding_free(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @pytest.mark.parametrize("packing_strategy", ["bfd", "wrapped"])
     @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning)
     @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning)
-    def test_train_packing(self, packing_strategy):
+    def test_train_packing(self, packing_strategy, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
 
@@ -900,9 +974,7 @@ def test_train_packing(self, packing_strategy):
         training_args = SFTConfig(
             output_dir=self.tmp_dir, packing=True, packing_strategy=packing_strategy, max_length=10, report_to="none"
         )
-        trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset
-        )
+        trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset)
 
         # Save the initial parameters to compare them later
         previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()}
@@ -918,9 +990,15 @@ def test_train_packing(self, packing_strategy):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning)
     @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning)
-    def test_eval_packing(self):
+    def test_eval_packing(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling")
 
@@ -932,7 +1010,7 @@ def test_eval_packing(self):
             report_to="none",
         )
         trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             args=training_args,
             train_dataset=dataset["train"],
             eval_dataset=dataset["test"],
@@ -952,9 +1030,15 @@ def test_eval_packing(self):
         assert len(trainer.train_dataset["input_ids"]) == 3  # w/ this dataset, we end up with 46 seqs
         assert len(trainer.eval_dataset["input_ids"]) == 1  # w/ this dataset, we end up with 6 seqs
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning)
     @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning)
-    def test_only_train_packing(self):
+    def test_only_train_packing(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling")
 
@@ -967,7 +1051,7 @@ def test_only_train_packing(self):
             report_to="none",
         )
         trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             args=training_args,
             train_dataset=dataset["train"],
             eval_dataset=dataset["test"],
@@ -987,23 +1071,27 @@ def test_only_train_packing(self):
         assert len(trainer.train_dataset["input_ids"]) == 3  # w/ this dataset, we end up with 46 seqs
         assert len(trainer.eval_dataset["input_ids"]) == 2  # w/ this dataset, we end up with 6 seqs
 
-    def test_train_with_chat_template_kwargs(self):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def test_train_with_chat_template_kwargs(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
 
         # Initialize the trainer
         training_args = SFTConfig(output_dir=self.tmp_dir, report_to="none")
 
-        tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
         # The following template is a simplified version of the Qwen chat template, where an additional argument
         # `role_capital` is used to control the capitalization of roles.
         tokenizer.chat_template = '{%- if messages[0]["role"] == "system" -%}    {{ "<|im_start|>" + ("SYSTEM" if role_capital else "system") + "\\n" + messages[0]["content"] + "<|im_end|>\\n" }}{%- else -%}    {{ "<|im_start|>" + ("SYSTEM" if role_capital else "system") + "\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n" }}{%- endif -%}{%- for message in messages -%}    {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) -%}        {{ "<|im_start|>" + (message.role.upper() if role_capital else message.role) + "\\n" + message.content + "<|im_end|>\\n" }}    {%- elif message.role == "assistant" -%}        {{ "<|im_start|>" + ("ASSISTANT" if role_capital else "assistant") }}        {%- if message.content -%}            {{ "\\n" + message.content }}        {%- endif -%}        {{ "<|im_end|>\\n" }}    {%- elif message.role == "tool" -%}        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") -%}            {{ "<|im_start|>" + ("USER" if role_capital else "user") }}        {%- endif -%}        {{ "\\n<tool_response>\\n" + message.content + "\\n</tool_response>" }}        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") -%}            {{ "<|im_end|>\\n" }}        {%- endif -%}    {%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}    {{ "<|im_start|>" + ("ASSISTANT" if role_capital else "assistant") + "\\n" }}{%- endif -%}'
 
         dataset.add_column("chat_template_kwargs", [{"role_capital": bool(i % 2)} for i in range(len(dataset))])
 
-        trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset
-        )
+        trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset)
 
         # Save the initial parameters to compare them later
         previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()}
@@ -1215,15 +1303,19 @@ def test_train_with_set_chat_template_from_path(self):
             original_template_content = f.read()
         assert template_content == original_template_content, "Chat template content does not match the original"
 
-    def test_train_toolcall_data(self):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def test_train_toolcall_data(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/toolcall", split="train")
 
         # Initialize the trainer
         training_args = SFTConfig(output_dir=self.tmp_dir, report_to="none")
-        trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset
-        )
+        trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset)
 
         # Save the initial parameters to compare them later
         previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()}
@@ -1239,14 +1331,20 @@ def test_train_toolcall_data(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    def test_train_with_eval(self):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def test_train_with_eval(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling")
 
         # Initialize the trainer
         training_args = SFTConfig(output_dir=self.tmp_dir, eval_strategy="steps", eval_steps=3, report_to="none")
         trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             args=training_args,
             train_dataset=dataset["train"],
             eval_dataset=dataset["test"],
@@ -1258,14 +1356,20 @@ def test_train_with_eval(self):
         # Check that the eval loss is not None
         assert trainer.state.log_history[0]["eval_loss"] is not None
 
-    def test_train_with_multiple_eval_dataset(self):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def test_train_with_multiple_eval_dataset(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling")
 
         # Initialize the trainer
         training_args = SFTConfig(output_dir=self.tmp_dir, eval_strategy="steps", eval_steps=3, report_to="none")
         trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             args=training_args,
             train_dataset=dataset["train"],
             eval_dataset={"data1": dataset["test"], "data2": dataset["test"]},
@@ -1277,15 +1381,19 @@ def test_train_with_multiple_eval_dataset(self):
         assert trainer.state.log_history[-3]["eval_data1_loss"] is not None
         assert trainer.state.log_history[-2]["eval_data2_loss"] is not None
 
-    def test_train_with_gradient_checkpointing(self):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def test_train_with_gradient_checkpointing(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
 
         # Initialize the trainer
         training_args = SFTConfig(output_dir=self.tmp_dir, gradient_checkpointing=True, report_to="none")
-        trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset
-        )
+        trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset)
 
         # Save the initial parameters to compare them later
         previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()}
@@ -1301,27 +1409,36 @@ def test_train_with_gradient_checkpointing(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    def test_tag_added(self):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def test_tag_added(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
 
         # Initialize the trainer
-        trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-            train_dataset=dataset,
-        )
+        trainer = SFTTrainer(model=model_id, train_dataset=dataset)
 
         for tag in ["sft", "trl"]:
             assert tag in trainer.model.model_tags
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
-    def test_tag_added_peft(self):
+    def test_tag_added_peft(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
 
         # Initialize the trainer
         trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             train_dataset=dataset,
             peft_config=LoraConfig(),
         )
@@ -1549,14 +1666,20 @@ def test_train_vlm_text_only_data(self):
             else:
                 assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12), f"Param {n} is not updated"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
-    def test_prompt_tuning(self):
+    def test_prompt_tuning(self, model_id):
         """Test that SFT works with Prompt Tuning."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
 
         training_args = SFTConfig(output_dir=self.tmp_dir, report_to="none")
         trainer = SFTTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             args=training_args,
             train_dataset=dataset,
             peft_config=PromptEncoderConfig(task_type=TaskType.CAUSAL_LM, num_virtual_tokens=8),
@@ -1581,9 +1704,15 @@ def test_prompt_tuning(self):
             else:
                 raise ValueError(f"Unexpected parameter {n} in model: {trainer.model}")
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
     @require_bitsandbytes
-    def test_peft_model_with_quantization(self):
+    def test_peft_model_with_quantization(self, model_id):
         """SFTTrainer should not freeze layers of existing PeftModel.
 
         This test simulates a realistic QLoRA scenario where a quantized base model is first converted to a PeftModel,
@@ -1591,7 +1720,6 @@ def test_peft_model_with_quantization(self):
         including the LoRA adapters, making training impossible.
         """
         # Get the base model
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         model = AutoModelForCausalLM.from_pretrained(model_id)
 
         # Simulate a realistic QLoRA setup by mocking quantization attributes
@@ -1675,10 +1803,16 @@ def test_peft_model_with_quantization(self):
             "All original LoRA parameters should remain trainable after SFTTrainer initialization"
         )
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
-    def test_prompt_tuning_peft_model(self):
+    def test_prompt_tuning_peft_model(self, model_id):
         """Test that SFT works with Prompt Tuning and a pre-converted PeftModel"""
-        model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+        model = AutoModelForCausalLM.from_pretrained(model_id)
         model = get_peft_model(model, PromptEncoderConfig(task_type=TaskType.CAUSAL_LM, num_virtual_tokens=8))
 
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")

From bc1211006a79708cb9b8b2764d96448ca0c2a28a Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 23 Oct 2025 16:11:24 +0200
Subject: [PATCH 02/18] Use fixture instead

---
 tests/test_sft_trainer.py | 171 ++------------------------------------
 1 file changed, 9 insertions(+), 162 deletions(-)

diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py
index b4d4cd9866a..5c89b1ea745 100644
--- a/tests/test_sft_trainer.py
+++ b/tests/test_sft_trainer.py
@@ -256,6 +256,15 @@ def test_multiple_examples(self):
 
 
 class TestSFTTrainer(TrlTestCase):
+    @pytest.fixture(
+        scope="class",
+        params=[
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def model_id(self, request):
+        return request.param
+
     @pytest.mark.parametrize(
         "model_id",
         [
@@ -311,12 +320,6 @@ def test_train_gpt_oss(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     def test_train_model(self, model_id):
         # Instantiate the model
         model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -342,12 +345,6 @@ def test_train_model(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     def test_train_dft_loss(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling")
@@ -412,12 +409,6 @@ def test_train_moe_model_with_aux_loss(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     def test_train_with_formatting_func(self, model_id):
         # Dummy formatting function
         def formatting_prompts_func(example):
@@ -450,12 +441,6 @@ def formatting_prompts_func(example):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     def test_train_model_dtype(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
@@ -489,12 +474,6 @@ def test_train_model_dtype(self, model_id):
             assert new_param.dtype == torch.float16
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     @require_peft
     def test_train_dense_with_peft_config_lora(self, model_id):
         # Get the base model parameter names
@@ -531,12 +510,6 @@ def test_train_dense_with_peft_config_lora(self, model_id):
             elif "base_layer" not in n:  # We expect the peft parameters to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     @pytest.mark.parametrize(
         "peft_type",
         [
@@ -634,12 +607,6 @@ def test_train_moe_with_peft_config(self):
             elif "base_layer" not in n:  # We expect the peft parameters to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     @require_peft
     def test_train_peft_model(self, model_id):
         # Get the base model
@@ -676,12 +643,6 @@ def test_train_peft_model(self, model_id):
             elif "base_layer" not in n:  # We expect the peft parameters to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     @require_peft
     def test_train_dense_with_peft_config_and_gradient_checkpointing(self, model_id):
         # Get the base model parameter names
@@ -755,12 +716,6 @@ def test_train_moe_with_peft_config_and_gradient_checkpointing(self):
             elif "base_layer" not in n:  # We expect the peft parameters to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     @require_peft
     def test_train_with_peft_model_and_gradient_checkpointing(self, model_id):
         # Get the base model parameter names
@@ -796,12 +751,6 @@ def test_train_with_peft_model_and_gradient_checkpointing(self, model_id):
             elif "base_layer" not in n:  # We expect the peft parameters to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     @require_liger_kernel
     def test_train_with_liger(self, model_id):
         # Get the dataset
@@ -825,12 +774,6 @@ def test_train_with_liger(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     def test_train_with_non_chatml_conversational_data(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "conversational_language_modeling", split="train")
@@ -859,12 +802,6 @@ def rename_fields(example: list[dict]):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     def test_train_with_pretokenized_data(self, model_id):
         # Get the dataset
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -894,12 +831,6 @@ def tokenize_example(example):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     def test_train_with_iterable_dataset(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train", streaming=True)
@@ -922,12 +853,6 @@ def test_train_with_iterable_dataset(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     @require_flash_attn
     def test_train_padding_free(self, model_id):
         # Get the dataset
@@ -957,12 +882,6 @@ def test_train_padding_free(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     @pytest.mark.parametrize("packing_strategy", ["bfd", "wrapped"])
     @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning)
     @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning)
@@ -990,12 +909,6 @@ def test_train_packing(self, packing_strategy, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning)
     @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning)
     def test_eval_packing(self, model_id):
@@ -1030,12 +943,6 @@ def test_eval_packing(self, model_id):
         assert len(trainer.train_dataset["input_ids"]) == 3  # w/ this dataset, we end up with 46 seqs
         assert len(trainer.eval_dataset["input_ids"]) == 1  # w/ this dataset, we end up with 6 seqs
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning)
     @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning)
     def test_only_train_packing(self, model_id):
@@ -1071,12 +978,6 @@ def test_only_train_packing(self, model_id):
         assert len(trainer.train_dataset["input_ids"]) == 3  # w/ this dataset, we end up with 46 seqs
         assert len(trainer.eval_dataset["input_ids"]) == 2  # w/ this dataset, we end up with 6 seqs
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     def test_train_with_chat_template_kwargs(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
@@ -1303,12 +1204,6 @@ def test_train_with_set_chat_template_from_path(self):
             original_template_content = f.read()
         assert template_content == original_template_content, "Chat template content does not match the original"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     def test_train_toolcall_data(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/toolcall", split="train")
@@ -1331,12 +1226,6 @@ def test_train_toolcall_data(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     def test_train_with_eval(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling")
@@ -1356,12 +1245,6 @@ def test_train_with_eval(self, model_id):
         # Check that the eval loss is not None
         assert trainer.state.log_history[0]["eval_loss"] is not None
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     def test_train_with_multiple_eval_dataset(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling")
@@ -1381,12 +1264,6 @@ def test_train_with_multiple_eval_dataset(self, model_id):
         assert trainer.state.log_history[-3]["eval_data1_loss"] is not None
         assert trainer.state.log_history[-2]["eval_data2_loss"] is not None
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     def test_train_with_gradient_checkpointing(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
@@ -1409,12 +1286,6 @@ def test_train_with_gradient_checkpointing(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     def test_tag_added(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
@@ -1425,12 +1296,6 @@ def test_tag_added(self, model_id):
         for tag in ["sft", "trl"]:
             assert tag in trainer.model.model_tags
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     @require_peft
     def test_tag_added_peft(self, model_id):
         # Get the dataset
@@ -1666,12 +1531,6 @@ def test_train_vlm_text_only_data(self):
             else:
                 assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12), f"Param {n} is not updated"
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     @require_peft
     def test_prompt_tuning(self, model_id):
         """Test that SFT works with Prompt Tuning."""
@@ -1704,12 +1563,6 @@ def test_prompt_tuning(self, model_id):
             else:
                 raise ValueError(f"Unexpected parameter {n} in model: {trainer.model}")
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     @require_peft
     @require_bitsandbytes
     def test_peft_model_with_quantization(self, model_id):
@@ -1803,12 +1656,6 @@ def test_peft_model_with_quantization(self, model_id):
             "All original LoRA parameters should remain trainable after SFTTrainer initialization"
         )
 
-    @pytest.mark.parametrize(
-        "model_id",
-        [
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
     @require_peft
     def test_prompt_tuning_peft_model(self, model_id):
         """Test that SFT works with Prompt Tuning and a pre-converted PeftModel"""

From ba30f5a700f117221246f70d541dada0fa8ab4c7 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 23 Oct 2025 17:19:20 +0200
Subject: [PATCH 03/18] Use model_id fixture in GRPO tests

---
 tests/test_grpo_trainer.py | 141 ++++++++++++++++++++-----------------
 1 file changed, 75 insertions(+), 66 deletions(-)

diff --git a/tests/test_grpo_trainer.py b/tests/test_grpo_trainer.py
index 50efa9e45cd..689410d13f9 100644
--- a/tests/test_grpo_trainer.py
+++ b/tests/test_grpo_trainer.py
@@ -113,17 +113,26 @@ def test_compute_entropy_all_masked(self):
 
 
 class TestGRPOTrainer(TrlTestCase):
-    def test_init_minimal(self):
+    @pytest.fixture(
+        scope="class",
+        params=[
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def model_id(self, request):
+        return request.param
+
+    def test_init_minimal(self, model_id):
         # Test that GRPOTrainer can be instantiated with only model, reward_model and train_dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
         GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             train_dataset=dataset,
         )
 
     @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"])
-    def test_training(self, config_name):
+    def test_training(self, config_name, model_id):
         dataset = load_dataset("trl-internal-testing/zen", config_name, split="train")
 
         training_args = GRPOConfig(
@@ -135,7 +144,7 @@ def test_training(self, config_name):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -153,7 +162,7 @@ def test_training(self, config_name):
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
     @pytest.mark.parametrize("loss_type", ["bnpo", "dr_grpo", "dapo"])
-    def test_training_loss_types(self, loss_type):
+    def test_training_loss_types(self, loss_type, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = GRPOConfig(
@@ -167,7 +176,7 @@ def test_training_loss_types(self, loss_type):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -184,7 +193,7 @@ def test_training_loss_types(self, loss_type):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_with_eval(self):
+    def test_training_with_eval(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
 
         training_args = GRPOConfig(
@@ -198,7 +207,7 @@ def test_training_with_eval(self):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset["train"],
@@ -207,7 +216,7 @@ def test_training_with_eval(self):
 
         trainer.train()
 
-    def test_training_multiple_iterations(self):
+    def test_training_multiple_iterations(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = GRPOConfig(
@@ -220,7 +229,7 @@ def test_training_multiple_iterations(self):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -238,8 +247,8 @@ def test_training_multiple_iterations(self):
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
     @require_peft
-    def test_training_peft(self):
-        model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+    def test_training_peft(self, model_id):
+        model = AutoModelForCausalLM.from_pretrained(model_id)
         base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -274,12 +283,12 @@ def test_training_peft(self):
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed."
 
     @require_peft
-    def test_training_peft_with_gradient_checkpointing(self):
+    def test_training_peft_with_gradient_checkpointing(self, model_id):
         """Test that training works with PEFT and gradient checkpointing enabled."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         model = AutoModelForCausalLM.from_pretrained(
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model_id,
             dtype=torch.float32,  # Use float32 for testing to avoid precision issues
         )
 
@@ -322,7 +331,7 @@ def test_training_peft_with_gradient_checkpointing(self):
             else:  # Base model parameters should not change
                 assert torch.equal(param, new_param), f"Base parameter {n} has changed."
 
-    def test_training_different_reward_model(self):
+    def test_training_different_reward_model(self, model_id):
         # Use a reward model different from the model: different chat template, tokenization, etc.
         dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train")
         reward_model_id = "trl-internal-testing/tiny-LlamaForSequenceClassification-3.2"
@@ -343,7 +352,7 @@ def test_training_different_reward_model(self):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=reward_model,
             args=training_args,
             train_dataset=dataset,
@@ -361,7 +370,7 @@ def test_training_different_reward_model(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_reward_func_standard(self):
+    def test_training_reward_func_standard(self, model_id):
         # Test if trainer can handle reward function with standard format
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -378,7 +387,7 @@ def reward_func(completions, **kwargs):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=reward_func,
             args=training_args,
             train_dataset=dataset,
@@ -395,7 +404,7 @@ def reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_reward_func_conversational(self):
+    def test_training_reward_func_conversational(self, model_id):
         # Test if trainer can handle reward function with conversational format
         dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train")
 
@@ -413,7 +422,7 @@ def reward_func(completions, **kwargs):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=reward_func,
             args=training_args,
             train_dataset=dataset,
@@ -430,7 +439,7 @@ def reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_multiple_reward_funcs(self):
+    def test_training_multiple_reward_funcs(self, model_id):
         # Test that GRPOTrainer can be instantiated with multiple reward functions
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -451,7 +460,7 @@ def reward_func2(completions, **kwargs):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=[reward_func1, reward_func2],
             args=training_args,
             train_dataset=dataset,
@@ -468,7 +477,7 @@ def reward_func2(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_multiple_reward_funcs_with_None_output(self):
+    def test_training_multiple_reward_funcs_with_None_output(self, model_id):
         """Test that a valid math reward function is processed correctly while the code reward function returns None."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -490,7 +499,7 @@ def non_applicable_reward_func(completions, **kwargs):
         )
 
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=[
                 applicable_reward_func,
                 non_applicable_reward_func,
@@ -512,7 +521,7 @@ def non_applicable_reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_multiple_reward_funcs_with_weights(self):
+    def test_training_multiple_reward_funcs_with_weights(self, model_id):
         """Test that GRPOTrainer can handle multiple reward functions with weights."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -534,7 +543,7 @@ def reward_func2(completions, **kwargs):
             reward_weights=[0.7, 0.3],  # weight of reward_func1 and reward_func2 respectively
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=[reward_func1, reward_func2],
             args=training_args,
             train_dataset=dataset,
@@ -556,7 +565,7 @@ def reward_func2(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_multiple_mixed_reward_funcs(self):
+    def test_training_multiple_mixed_reward_funcs(self, model_id):
         # Test if the trainer can handle a mix of reward functions and reward models
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -573,7 +582,7 @@ def reward_func(completions, **kwargs):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=[reward_func, "trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5"],
             args=training_args,
             train_dataset=dataset,
@@ -590,7 +599,7 @@ def reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_reward_func_additional_column(self):
+    def test_training_reward_func_additional_column(self, model_id):
         # Test if trainer can handle reward function that rely on additional columns in the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -611,7 +620,7 @@ def reward_func(completions, some_values, **kwargs):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=reward_func,
             args=training_args,
             train_dataset=dataset,
@@ -628,7 +637,7 @@ def reward_func(completions, some_values, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_with_sync_ref_model(self):
+    def test_training_with_sync_ref_model(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = GRPOConfig(
@@ -642,7 +651,7 @@ def test_training_with_sync_ref_model(self):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -659,7 +668,7 @@ def test_training_with_sync_ref_model(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_beta_non_zero(self):
+    def test_training_beta_non_zero(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
         training_args = GRPOConfig(
             output_dir=self.tmp_dir,
@@ -671,7 +680,7 @@ def test_training_beta_non_zero(self):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -688,7 +697,7 @@ def test_training_beta_non_zero(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_with_entropy_filter(self):
+    def test_training_with_entropy_filter(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
         training_args = GRPOConfig(
             output_dir=self.tmp_dir,
@@ -700,7 +709,7 @@ def test_training_with_entropy_filter(self):
             top_entropy_quantile=0.2,
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -832,7 +841,7 @@ def test_training_vllm_importance_sampling_correction(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_with_additional_generation_kwargs(self):
+    def test_training_with_additional_generation_kwargs(self, model_id):
         """Test that training works with additional generation kwargs."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -850,7 +859,7 @@ def test_training_with_additional_generation_kwargs(self):
         )
 
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -906,7 +915,7 @@ def test_training_vllm_with_additional_generation_kwargs(self):
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
     @pytest.mark.parametrize("scale_rewards", [False, "group", "batch", True, "none"])
-    def test_training_scale_rewards(self, scale_rewards):
+    def test_training_scale_rewards(self, scale_rewards, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = GRPOConfig(
@@ -919,7 +928,7 @@ def test_training_scale_rewards(self, scale_rewards):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -937,7 +946,7 @@ def test_training_scale_rewards(self, scale_rewards):
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
     @patch("transformers.generation.utils.GenerationMixin.generate")
-    def test_training_with_mask_truncated_completions(self, mock_generate):
+    def test_training_with_mask_truncated_completions(self, mock_generate, model_id):
         """Test that training works with mask_truncated_completions=True parameter."""
 
         # We mock the generate method because the model's random weights make it extremely unlikely to produce a
@@ -969,7 +978,7 @@ def fake_generate(input_ids, **kwargs):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -986,7 +995,7 @@ def fake_generate(input_ids, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_with_mask_truncated_completions_all_masked(self):
+    def test_training_with_mask_truncated_completions_all_masked(self, model_id):
         """
         Test that when all generated completions are truncated (i.e., none contain an EOS token), and
         mask_truncated_completions=True, the model receives no effective learning signal and therefore does not update
@@ -1007,7 +1016,7 @@ def test_training_with_mask_truncated_completions_all_masked(self):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -1024,7 +1033,7 @@ def test_training_with_mask_truncated_completions_all_masked(self):
             new_param = trainer.model.get_parameter(n)
             assert torch.equal(param, new_param), f"Parameter {n} has changed."
 
-    def test_warning_raised_all_rewards_none(self, caplog):
+    def test_warning_raised_all_rewards_none(self, model_id, caplog):
         """Test that a proper warning is raised when all rewards are None."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -1041,7 +1050,7 @@ def always_none_reward_func(completions, **kwargs):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=always_none_reward_func,
             args=training_args,
             train_dataset=dataset,
@@ -1053,7 +1062,7 @@ def always_none_reward_func(completions, **kwargs):
         expected_warning = "All reward functions returned None for the following kwargs:"
         assert expected_warning in caplog.text
 
-    def test_training_num_generations_larger_than_batch_size(self):
+    def test_training_num_generations_larger_than_batch_size(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = GRPOConfig(
@@ -1066,7 +1075,7 @@ def test_training_num_generations_larger_than_batch_size(self):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -1083,7 +1092,7 @@ def test_training_num_generations_larger_than_batch_size(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_delta_clipping(self):
+    def test_training_delta_clipping(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = GRPOConfig(
@@ -1096,7 +1105,7 @@ def test_training_delta_clipping(self):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -1113,7 +1122,7 @@ def test_training_delta_clipping(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_multiple_dataloader_workers(self):
+    def test_training_multiple_dataloader_workers(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = GRPOConfig(
@@ -1126,7 +1135,7 @@ def test_training_multiple_dataloader_workers(self):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -1143,7 +1152,7 @@ def test_training_multiple_dataloader_workers(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_with_generation_kwargs(self):
+    def test_training_with_generation_kwargs(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = GRPOConfig(
@@ -1156,7 +1165,7 @@ def test_training_with_generation_kwargs(self):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -1173,7 +1182,7 @@ def test_training_with_generation_kwargs(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_with_reward_func_accessing_trainer_state(self):
+    def test_training_with_reward_func_accessing_trainer_state(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         def reward_func(completions, **kwargs):
@@ -1191,14 +1200,14 @@ def reward_func(completions, **kwargs):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=reward_func,
             args=training_args,
             train_dataset=dataset,
         )
         trainer.train()
 
-    def test_prepare_input_called_with_correct_data(self):
+    def test_prepare_input_called_with_correct_data(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
         training_args = GRPOConfig(
             output_dir=self.tmp_dir,
@@ -1214,7 +1223,7 @@ def test_prepare_input_called_with_correct_data(self):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -1552,7 +1561,7 @@ def reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_sequence_importance_sampling(self):
+    def test_training_sequence_importance_sampling(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = GRPOConfig(
@@ -1566,7 +1575,7 @@ def test_training_sequence_importance_sampling(self):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -1583,7 +1592,7 @@ def test_training_sequence_importance_sampling(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_mismatched_reward_processing_classes_length(self):
+    def test_mismatched_reward_processing_classes_length(self, model_id):
         """Test that mismatched length between reward_funcs and reward_processing_classes raises error."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -1602,14 +1611,14 @@ def test_mismatched_reward_processing_classes_length(self):
 
         with pytest.raises(ValueError, match="must match"):
             GRPOTrainer(
-                model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+                model=model_id,
                 reward_funcs=reward_models,
                 reward_processing_classes=single_processing_class,  # only one, but need two
                 args=training_args,
                 train_dataset=dataset,
             )
 
-    def test_correct_reward_processing_classes_list(self):
+    def test_correct_reward_processing_classes_list(self, model_id):
         """Test that correct list of reward_processing_classes works properly."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -1631,7 +1640,7 @@ def test_correct_reward_processing_classes_list(self):
         correct_processing_classes = [processing_class1, processing_class2]
 
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=reward_models,
             reward_processing_classes=correct_processing_classes,
             args=training_args,
@@ -1640,7 +1649,7 @@ def test_correct_reward_processing_classes_list(self):
 
         assert len(trainer.reward_processing_classes) == len(reward_models)
 
-    def test_single_reward_model_with_single_processing_class(self):
+    def test_single_reward_model_with_single_processing_class(self, model_id):
         """Test that single reward model with single processing class works."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -1655,7 +1664,7 @@ def test_single_reward_model_with_single_processing_class(self):
         training_args = GRPOConfig(output_dir=self.tmp_dir, report_to="none")
 
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=reward_model,
             reward_processing_classes=single_processing_class,  # single object for single reward model
             args=training_args,

From 919e8f0fc1afc5f0ba4b9b127c2fd4e5b6f16c7c Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 23 Oct 2025 17:19:57 +0200
Subject: [PATCH 04/18] Use model_id fixture in DPO tests

---
 tests/test_dpo_trainer.py | 47 +++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py
index fa7038167d4..17ac7b224b8 100644
--- a/tests/test_dpo_trainer.py
+++ b/tests/test_dpo_trainer.py
@@ -153,6 +153,15 @@ def test_tokenize_row_with_truncation_and_special_tokens(self):
 
 
 class TestDPOTrainer(TrlTestCase):
+    @pytest.fixture(
+        scope="class",
+        params=[
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def model_id(self, request):
+        return request.param
+
     def setup_method(self):
         self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
@@ -160,8 +169,7 @@ def setup_method(self):
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
         self.tokenizer.pad_token = self.tokenizer.eos_token
 
-    def test_train(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_train(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         training_args = DPOConfig(
@@ -207,8 +215,7 @@ def test_train(self):
             "apo_down",
         ],
     )
-    def test_train_loss_types(self, loss_type):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_train_loss_types(self, loss_type, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
@@ -301,12 +308,11 @@ def test_dpo_trainer_with_weighting(self):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12)
 
-    def test_train_with_multiple_loss_types(self):
+    def test_train_with_multiple_loss_types(self, model_id):
         """
         Tests multi-loss combinations, loss type inference, and weight configuration. MPO combines DPO (sigmoid), BCO
         (bco_pair), and SFT (sft) losses.
         """
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
@@ -637,12 +643,11 @@ def test_dpo_lora_save(self):
 
     @require_peft
     @require_torch_gpu_if_bnb_not_multi_backend_enabled
-    def test_dpo_lora_bf16_autocast_llama(self):
+    def test_dpo_lora_bf16_autocast_llama(self, model_id):
         # Note this test only works on compute capability > 7 GPU devices
         from peft import LoraConfig
         from transformers import BitsAndBytesConfig
 
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
         lora_config = LoraConfig(
@@ -778,10 +783,9 @@ def test_dpo_lora_bf16_autocast(self, loss_type, pre_compute, gen_during_eval):
         trainer.save_model()
 
     @require_peft
-    def test_dpo_lora_tags(self):
+    def test_dpo_lora_tags(self, model_id):
         from peft import LoraConfig
 
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
         lora_config = LoraConfig(
@@ -824,8 +828,7 @@ def test_dpo_lora_tags(self):
             assert tag in trainer.model.model_tags
 
     @require_peft
-    def test_dpo_tags(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_dpo_tags(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
         # lora model
@@ -995,8 +998,7 @@ def test_dpo_trainer_dtype(self):
                 train_dataset=dummy_dataset["train"],
             )
 
-    def test_dpo_loss_alpha_div_f(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_dpo_loss_alpha_div_f(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
         # lora model
@@ -1036,8 +1038,7 @@ def test_dpo_loss_alpha_div_f(self):
         )
         assert torch.isfinite(losses).cpu().numpy().all()
 
-    def test_dpo_loss_js_div_f(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_dpo_loss_js_div_f(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
         # lora model
@@ -1230,10 +1231,10 @@ def test_padding_free(self):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12)
 
-    def test_compute_metrics(self):
-        model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
-        ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
-        tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+    def test_compute_metrics(self, model_id):
+        model = AutoModelForCausalLM.from_pretrained(model_id)
+        ref_model = AutoModelForCausalLM.from_pretrained(model_id)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
         tokenizer.pad_token = tokenizer.eos_token
 
         dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_preference")
@@ -1265,8 +1266,7 @@ def dummy_compute_metrics(*args, **kwargs):
 
         assert trainer.state.log_history[-2]["eval_test"] == 0.0
 
-    def test_train_with_length_desensitization(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_train_with_length_desensitization(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
@@ -1379,8 +1379,7 @@ def test_dpo_trainer_with_liger(self, beta, loss_type):
         assert output is not None
         assert "loss" not in output.keys()
 
-    def test_train_with_iterable_dataset(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_train_with_iterable_dataset(self, model_id):
         dataset = load_dataset(
             "trl-internal-testing/zen",
             "standard_preference",

From f599123873b767436f0a8899a708d143604fe24b Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 23 Oct 2025 17:24:28 +0200
Subject: [PATCH 05/18] Use model_id fixture in ORPO tests

---
 tests/test_orpo_trainer.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/tests/test_orpo_trainer.py b/tests/test_orpo_trainer.py
index f882cf756f8..48159662be5 100644
--- a/tests/test_orpo_trainer.py
+++ b/tests/test_orpo_trainer.py
@@ -23,6 +23,15 @@
 
 
 class TestORPOTrainer(TrlTestCase):
+    @pytest.fixture(
+        scope="class",
+        params=[
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def model_id(self, request):
+        return request.param
+
     def setup_method(self):
         self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
@@ -144,9 +153,9 @@ def test_orpo_trainer_with_lora(self, config_name):
                 if param.sum() != 0:  # ignore 0 biases
                     assert not torch.equal(param, new_param)
 
-    def test_compute_metrics(self):
-        model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
-        tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+    def test_compute_metrics(self, model_id):
+        model = AutoModelForCausalLM.from_pretrained(model_id)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
         tokenizer.pad_token = tokenizer.eos_token
 
         dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_preference")

From c922f28a748a3551784cd4262f591e7e82f74d92 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 23 Oct 2025 17:28:10 +0200
Subject: [PATCH 06/18] Use model_id fixture in KTO tests

---
 tests/test_kto_trainer.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/tests/test_kto_trainer.py b/tests/test_kto_trainer.py
index e551c0073cd..f58a02ef8d4 100644
--- a/tests/test_kto_trainer.py
+++ b/tests/test_kto_trainer.py
@@ -25,6 +25,15 @@
 
 
 class TestKTOTrainer(TrlTestCase):
+    @pytest.fixture(
+        scope="class",
+        params=[
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def model_id(self, request):
+        return request.param
+
     def setup_method(self):
         self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
@@ -390,10 +399,10 @@ def test_kto_trainer_with_liger(self):
             if param.sum() != 0:
                 assert not torch.equal(param, new_param)
 
-    def test_compute_metrics(self):
-        model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
-        ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
-        tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+    def test_compute_metrics(self, model_id):
+        model = AutoModelForCausalLM.from_pretrained(model_id)
+        ref_model = AutoModelForCausalLM.from_pretrained(model_id)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
         tokenizer.pad_token = tokenizer.eos_token
 
         dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference")

From 2e50260aab3505c339b2220f7f2f3f1ad9e198d7 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 23 Oct 2025 17:30:22 +0200
Subject: [PATCH 07/18] Use model_id fixture in activation_offloading tests

---
 tests/test_activation_offloading.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/tests/test_activation_offloading.py b/tests/test_activation_offloading.py
index 12364c23d94..15cef523de8 100644
--- a/tests/test_activation_offloading.py
+++ b/tests/test_activation_offloading.py
@@ -11,8 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-
+import pytest
 import torch
 from torch import nn
 from transformers import AutoModelForCausalLM
@@ -29,11 +28,19 @@
 
 
 class TestActivationOffloading(TrlTestCase):
+    @pytest.fixture(
+        scope="class",
+        params=[
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def model_id(self, request):
+        return request.param
+
     @require_torch_accelerator
     @require_peft
-    def test_offloading_with_peft_models(self) -> None:
+    def test_offloading_with_peft_models(self, model_id) -> None:
         """Test that activation offloading works with PEFT models."""
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
         peft_config = LoraConfig(
             lora_alpha=16,
@@ -77,8 +84,7 @@ def test_offloading_with_peft_models(self) -> None:
                     )
 
     @require_torch_accelerator
-    def test_noop_manager_with_offloading(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_noop_manager_with_offloading(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
         inp = torch.randint(0, 100, (2, 10), device=torch_device)
 
@@ -124,9 +130,8 @@ def test_min_offload_size(self):
         # that the logic handles both offloaded and non-offloaded tensors
 
     @require_torch_accelerator
-    def test_real_hf_model(self):
+    def test_real_hf_model(self, model_id):
         """Test with an actual HuggingFace model"""
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
 
         # Create small input

From 29d25f91be163fc087711901ab332f9b96f45fb1 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 23 Oct 2025 17:34:08 +0200
Subject: [PATCH 08/18] Refactor model_id in callbacks tests

---
 tests/test_callbacks.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/tests/test_callbacks.py b/tests/test_callbacks.py
index 811bcf79f37..986f95883a2 100644
--- a/tests/test_callbacks.py
+++ b/tests/test_callbacks.py
@@ -66,9 +66,10 @@ def __init__(self, model, ref_model, args, train_dataset, eval_dataset, processi
 
 class TestWinRateCallback(TrlTestCase):
     def setup_method(self):
-        self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
-        self.ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
-        self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+        self.model = AutoModelForCausalLM.from_pretrained(model_id)
+        self.ref_model = AutoModelForCausalLM.from_pretrained(model_id)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.tokenizer.pad_token = self.tokenizer.eos_token
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
         dataset["train"] = dataset["train"].select(range(8))
@@ -224,8 +225,9 @@ def test_lora(self):
 
 class TestLogCompletionsCallback(TrlTestCase):
     def setup_method(self):
-        self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
-        self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+        self.model = AutoModelForCausalLM.from_pretrained(model_id)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.tokenizer.pad_token = self.tokenizer.eos_token
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
         dataset["train"] = dataset["train"].select(range(8))
@@ -318,8 +320,9 @@ def test_basic_comet(self):
 @require_mergekit
 class TestMergeModelCallback(TrlTestCase):
     def setup_method(self):
-        self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
-        self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+        self.model = AutoModelForCausalLM.from_pretrained(model_id)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
 
     def test_callback(self):
@@ -374,8 +377,9 @@ def test_every_checkpoint(self):
 
 class TestBEMACallback(TrlTestCase):
     def setup_method(self):
-        self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
-        self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+        self.model = AutoModelForCausalLM.from_pretrained(model_id)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.tokenizer.pad_token = self.tokenizer.eos_token
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling")
 

From 6c3f5225be06adf5a0a08247100370802adaf75c Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 23 Oct 2025 17:37:12 +0200
Subject: [PATCH 09/18] Refactor model_id in dataset_formatting tests

---
 tests/test_dataset_formatting.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_dataset_formatting.py b/tests/test_dataset_formatting.py
index a9817d8455f..33dfe0e3e01 100644
--- a/tests/test_dataset_formatting.py
+++ b/tests/test_dataset_formatting.py
@@ -121,8 +121,9 @@ def test_get_formatting_func_from_dataset_with_unknown_format(self):
 @pytest.mark.filterwarnings("ignore::FutureWarning")
 class TestSetupChatFormat(TrlTestCase):
     def setup_method(self):
-        self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
-        self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
+        self.model = AutoModelForCausalLM.from_pretrained(model_id)
         # remove built-in chat_template to simulate a model having no chat_template
         self.tokenizer.chat_template = None
 

From 88dffd21f0045791796e3c9db05570b01176c5b7 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 23 Oct 2025 17:39:40 +0200
Subject: [PATCH 10/18] Use model_id fixture in Online DPO tests

---
 tests/test_online_dpo_trainer.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/tests/test_online_dpo_trainer.py b/tests/test_online_dpo_trainer.py
index b5c9a1d9ee1..ec190d02afc 100644
--- a/tests/test_online_dpo_trainer.py
+++ b/tests/test_online_dpo_trainer.py
@@ -42,6 +42,15 @@
 
 
 class TestOnlineDPOTrainer(TrlTestCase):
+    @pytest.fixture(
+        scope="class",
+        params=[
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def model_id(self, request):
+        return request.param
+
     def setup_method(self):
         self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
@@ -80,7 +89,7 @@ def test_training(self, config_name):
         # Check if training loss is available
         assert "train_loss" in trainer.state.log_history[-1]
 
-    def test_training_model_str(self):
+    def test_training_model_str(self, model_id):
         training_args = OnlineDPOConfig(
             output_dir=self.tmp_dir,
             per_device_train_batch_size=2,
@@ -92,7 +101,7 @@ def test_training_model_str(self):
         dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
 
         trainer = OnlineDPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=self.reward_model,
             args=training_args,
             train_dataset=dummy_dataset["train"],

From e08bdb27b339776f027147d03fb1516ca7972759 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 23 Oct 2025 18:00:04 +0200
Subject: [PATCH 11/18] Use model_id fixture in RLOO tests

---
 tests/test_rloo_trainer.py | 129 ++++++++++++++++++++-----------------
 1 file changed, 69 insertions(+), 60 deletions(-)

diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py
index 476fcfb0e72..ea874d77959 100644
--- a/tests/test_rloo_trainer.py
+++ b/tests/test_rloo_trainer.py
@@ -35,17 +35,26 @@
 
 
 class TestRLOOTrainer(TrlTestCase):
-    def test_init_minimal(self):
+    @pytest.fixture(
+        scope="class",
+        params=[
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def model_id(self, request):
+        return request.param
+
+    def test_init_minimal(self, model_id):
         # Test that RLOOTrainer can be instantiated with only model, reward_model and train_dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
         RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             train_dataset=dataset,
         )
 
     @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"])
-    def test_training(self, config_name):
+    def test_training(self, config_name, model_id):
         dataset = load_dataset("trl-internal-testing/zen", config_name, split="train")
 
         training_args = RLOOConfig(
@@ -57,7 +66,7 @@ def test_training(self, config_name):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -74,7 +83,7 @@ def test_training(self, config_name):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_with_eval(self):
+    def test_training_with_eval(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
 
         training_args = RLOOConfig(
@@ -88,7 +97,7 @@ def test_training_with_eval(self):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset["train"],
@@ -97,7 +106,7 @@ def test_training_with_eval(self):
 
         trainer.train()
 
-    def test_training_multiple_iterations(self):
+    def test_training_multiple_iterations(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = RLOOConfig(
@@ -110,7 +119,7 @@ def test_training_multiple_iterations(self):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -128,8 +137,8 @@ def test_training_multiple_iterations(self):
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
     @require_peft
-    def test_training_peft(self):
-        model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+    def test_training_peft(self, model_id):
+        model = AutoModelForCausalLM.from_pretrained(model_id)
         base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()]
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -164,12 +173,12 @@ def test_training_peft(self):
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed."
 
     @require_peft
-    def test_training_peft_with_gradient_checkpointing(self):
+    def test_training_peft_with_gradient_checkpointing(self, model_id):
         """Test that training works with PEFT and gradient checkpointing enabled."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         model = AutoModelForCausalLM.from_pretrained(
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model_id,
             dtype=torch.float32,  # Use float32 for testing to avoid precision issues
         )
 
@@ -212,7 +221,7 @@ def test_training_peft_with_gradient_checkpointing(self):
             else:  # Base model parameters should not change
                 assert torch.equal(param, new_param), f"Base parameter {n} has changed."
 
-    def test_training_different_reward_model(self):
+    def test_training_different_reward_model(self, model_id):
         # Use a reward model different from the model: different chat template, tokenization, etc.
         dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train")
         reward_model_id = "trl-internal-testing/tiny-LlamaForSequenceClassification-3.2"
@@ -233,7 +242,7 @@ def test_training_different_reward_model(self):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=reward_model,
             args=training_args,
             train_dataset=dataset,
@@ -251,7 +260,7 @@ def test_training_different_reward_model(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_reward_func_standard(self):
+    def test_training_reward_func_standard(self, model_id):
         # Test if trainer can handle reward function with standard format
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -268,7 +277,7 @@ def reward_func(completions, **kwargs):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=reward_func,
             args=training_args,
             train_dataset=dataset,
@@ -285,7 +294,7 @@ def reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_reward_func_conversational(self):
+    def test_training_reward_func_conversational(self, model_id):
         # Test if trainer can handle reward function with conversational format
         dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train")
 
@@ -303,7 +312,7 @@ def reward_func(completions, **kwargs):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=reward_func,
             args=training_args,
             train_dataset=dataset,
@@ -320,7 +329,7 @@ def reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_multiple_reward_funcs(self):
+    def test_training_multiple_reward_funcs(self, model_id):
         # Test that RLOOTrainer can be instantiated with multiple reward functions
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -341,7 +350,7 @@ def reward_func2(completions, **kwargs):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=[reward_func1, reward_func2],
             args=training_args,
             train_dataset=dataset,
@@ -358,7 +367,7 @@ def reward_func2(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_multiple_reward_funcs_with_None_output(self):
+    def test_training_multiple_reward_funcs_with_None_output(self, model_id):
         """Test that a valid math reward function is processed correctly while the code reward function returns None."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -380,7 +389,7 @@ def non_applicable_reward_func(completions, **kwargs):
         )
 
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=[
                 applicable_reward_func,
                 non_applicable_reward_func,
@@ -402,7 +411,7 @@ def non_applicable_reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_multiple_reward_funcs_with_weights(self):
+    def test_training_multiple_reward_funcs_with_weights(self, model_id):
         """Test that RLOOTrainer can handle multiple reward functions with weights."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -424,7 +433,7 @@ def reward_func2(completions, **kwargs):
             reward_weights=[0.7, 0.3],  # weight of reward_func1 and reward_func2 respectively
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=[reward_func1, reward_func2],
             args=training_args,
             train_dataset=dataset,
@@ -446,7 +455,7 @@ def reward_func2(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_multiple_mixed_reward_funcs(self):
+    def test_training_multiple_mixed_reward_funcs(self, model_id):
         # Test if the trainer can handle a mix of reward functions and reward models
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -463,7 +472,7 @@ def reward_func(completions, **kwargs):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=[reward_func, "trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5"],
             args=training_args,
             train_dataset=dataset,
@@ -480,7 +489,7 @@ def reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_reward_func_additional_column(self):
+    def test_training_reward_func_additional_column(self, model_id):
         # Test if trainer can handle reward function that rely on additional columns in the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -501,7 +510,7 @@ def reward_func(completions, some_values, **kwargs):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=reward_func,
             args=training_args,
             train_dataset=dataset,
@@ -518,7 +527,7 @@ def reward_func(completions, some_values, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_with_sync_ref_model(self):
+    def test_training_with_sync_ref_model(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = RLOOConfig(
@@ -532,7 +541,7 @@ def test_training_with_sync_ref_model(self):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -549,7 +558,7 @@ def test_training_with_sync_ref_model(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_beta_zero(self):
+    def test_training_beta_zero(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
         training_args = RLOOConfig(
             output_dir=self.tmp_dir,
@@ -561,7 +570,7 @@ def test_training_beta_zero(self):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -658,7 +667,7 @@ def test_training_vllm_guided_decoding(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_with_additional_generation_kwargs(self):
+    def test_training_with_additional_generation_kwargs(self, model_id):
         """Test that training works with additional generation kwargs."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -676,7 +685,7 @@ def test_training_with_additional_generation_kwargs(self):
         )
 
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -731,7 +740,7 @@ def test_training_vllm_with_additional_generation_kwargs(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_with_normalized_advantages(self):
+    def test_training_with_normalized_advantages(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = RLOOConfig(
@@ -744,7 +753,7 @@ def test_training_with_normalized_advantages(self):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -761,7 +770,7 @@ def test_training_with_normalized_advantages(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_with_clipped_rewards(self):
+    def test_training_with_clipped_rewards(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = RLOOConfig(
@@ -774,7 +783,7 @@ def test_training_with_clipped_rewards(self):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -792,7 +801,7 @@ def test_training_with_clipped_rewards(self):
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
     @patch("transformers.generation.utils.GenerationMixin.generate")
-    def test_training_with_mask_truncated_completions(self, mock_generate):
+    def test_training_with_mask_truncated_completions(self, mock_generate, model_id):
         """Test that training works with mask_truncated_completions=True parameter."""
 
         # We mock the generate method because the model's random weights make it extremely unlikely to produce a
@@ -824,7 +833,7 @@ def fake_generate(input_ids, **kwargs):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -841,7 +850,7 @@ def fake_generate(input_ids, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_with_mask_truncated_completions_all_masked(self):
+    def test_training_with_mask_truncated_completions_all_masked(self, model_id):
         """
         Test that when all generated completions are truncated (i.e., none contain an EOS token), and
         mask_truncated_completions=True, the model receives no effective learning signal and therefore does not update
@@ -862,7 +871,7 @@ def test_training_with_mask_truncated_completions_all_masked(self):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -879,7 +888,7 @@ def test_training_with_mask_truncated_completions_all_masked(self):
             new_param = trainer.model.get_parameter(n)
             assert torch.equal(param, new_param), f"Parameter {n} has changed."
 
-    def test_warning_raised_all_rewards_none(self, caplog):
+    def test_warning_raised_all_rewards_none(self, model_id, caplog):
         """Test that a proper warning is raised when all rewards are None."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -896,7 +905,7 @@ def always_none_reward_func(completions, **kwargs):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=always_none_reward_func,
             args=training_args,
             train_dataset=dataset,
@@ -908,7 +917,7 @@ def always_none_reward_func(completions, **kwargs):
         expected_warning = "All reward functions returned None for the following kwargs:"
         assert expected_warning in caplog.text
 
-    def test_training_num_generations_larger_than_batch_size(self):
+    def test_training_num_generations_larger_than_batch_size(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = RLOOConfig(
@@ -921,7 +930,7 @@ def test_training_num_generations_larger_than_batch_size(self):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -938,7 +947,7 @@ def test_training_num_generations_larger_than_batch_size(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_multiple_dataloader_workers(self):
+    def test_training_multiple_dataloader_workers(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = RLOOConfig(
@@ -951,7 +960,7 @@ def test_training_multiple_dataloader_workers(self):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -968,7 +977,7 @@ def test_training_multiple_dataloader_workers(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_with_generation_kwargs(self):
+    def test_training_with_generation_kwargs(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = RLOOConfig(
@@ -981,7 +990,7 @@ def test_training_with_generation_kwargs(self):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -998,7 +1007,7 @@ def test_training_with_generation_kwargs(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_training_with_reward_func_accessing_trainer_state(self):
+    def test_training_with_reward_func_accessing_trainer_state(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         def reward_func(completions, **kwargs):
@@ -1016,14 +1025,14 @@ def reward_func(completions, **kwargs):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=reward_func,
             args=training_args,
             train_dataset=dataset,
         )
         trainer.train()
 
-    def test_prepare_input_called_with_correct_data(self):
+    def test_prepare_input_called_with_correct_data(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
         training_args = RLOOConfig(
             output_dir=self.tmp_dir,
@@ -1039,7 +1048,7 @@ def test_prepare_input_called_with_correct_data(self):
             report_to="none",
         )
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
@@ -1291,7 +1300,7 @@ def reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
-    def test_mismatched_reward_processing_classes_length(self):
+    def test_mismatched_reward_processing_classes_length(self, model_id):
         """Test that mismatched length between reward_funcs and reward_processing_classes raises error."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -1310,14 +1319,14 @@ def test_mismatched_reward_processing_classes_length(self):
 
         with pytest.raises(ValueError, match="must match"):
             RLOOTrainer(
-                model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+                model=model_id,
                 reward_funcs=reward_models,
                 reward_processing_classes=single_processing_class,  # only one, but need two
                 args=training_args,
                 train_dataset=dataset,
             )
 
-    def test_correct_reward_processing_classes_list(self):
+    def test_correct_reward_processing_classes_list(self, model_id):
         """Test that correct list of reward_processing_classes works properly."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -1339,7 +1348,7 @@ def test_correct_reward_processing_classes_list(self):
         correct_processing_classes = [processing_class1, processing_class2]
 
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=reward_models,
             reward_processing_classes=correct_processing_classes,
             args=training_args,
@@ -1348,7 +1357,7 @@ def test_correct_reward_processing_classes_list(self):
 
         assert len(trainer.reward_processing_classes) == len(reward_models)
 
-    def test_single_reward_model_with_single_processing_class(self):
+    def test_single_reward_model_with_single_processing_class(self, model_id):
         """Test that single reward model with single processing class works."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -1363,7 +1372,7 @@ def test_single_reward_model_with_single_processing_class(self):
         training_args = RLOOConfig(output_dir=self.tmp_dir, report_to="none")
 
         trainer = RLOOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=reward_model,
             reward_processing_classes=single_processing_class,  # single object for single reward model
             args=training_args,

From f11a09cfeb922551b41ac4fb3a7caf063805de6d Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 23 Oct 2025 18:07:49 +0200
Subject: [PATCH 12/18] Use model_id fixture in BCO tests

---
 tests/experimental/test_bco_trainer.py | 42 ++++++++++++--------------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/tests/experimental/test_bco_trainer.py b/tests/experimental/test_bco_trainer.py
index 9e70fbac075..4738361ccd8 100644
--- a/tests/experimental/test_bco_trainer.py
+++ b/tests/experimental/test_bco_trainer.py
@@ -33,6 +33,15 @@
 
 @pytest.mark.low_priority
 class TestBCOTrainer(TrlTestCase):
+    @pytest.fixture(
+        scope="class",
+        params=[
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def model_id(self, request):
+        return request.param
+
     @pytest.mark.parametrize(
         "config_name",
         [
@@ -45,8 +54,7 @@ class TestBCOTrainer(TrlTestCase):
         ],
     )
     @require_sklearn
-    def test_train(self, config_name):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_train(self, config_name, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         ref_model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -81,8 +89,7 @@ def test_train(self, config_name):
                 assert not torch.equal(param.cpu(), new_param.cpu())
 
     @require_sklearn
-    def test_train_with_precompute(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_train_with_precompute(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         ref_model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -118,8 +125,7 @@ def test_train_with_precompute(self):
                 assert not torch.equal(param.cpu(), new_param.cpu())
 
     @require_sklearn
-    def test_train_eval(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_train_eval(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         ref_model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -146,8 +152,7 @@ def test_train_eval(self):
         trainer.train()
 
     @require_sklearn
-    def test_init_with_ref_model_is_model(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_init_with_ref_model_is_model(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
@@ -169,8 +174,7 @@ def test_init_with_ref_model_is_model(self):
             )
 
     @require_sklearn
-    def test_tokenize_and_process_tokens(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_tokenize_and_process_tokens(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         ref_model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -225,8 +229,7 @@ def test_tokenize_and_process_tokens(self):
         assert processed_dataset["completion_labels"][0] == [-100, -100, -100, -100, 27261, 13, 151645]
 
     @require_sklearn
-    def test_train_without_providing_ref_model(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_train_without_providing_ref_model(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
@@ -259,8 +262,7 @@ def test_train_without_providing_ref_model(self):
                 assert not torch.equal(param.cpu(), new_param.cpu())
 
     @require_sklearn
-    def test_train_udm(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_train_udm(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
@@ -309,8 +311,7 @@ def embed_prompt(input_ids, attention_mask, model):
 
     @require_sklearn
     @require_peft
-    def test_train_without_providing_ref_model_with_lora(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_train_without_providing_ref_model_with_lora(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         lora_config = LoraConfig(r=16, lora_alpha=32, lora_dropout=0.05, task_type="CAUSAL_LM")
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -347,8 +348,7 @@ def test_train_without_providing_ref_model_with_lora(self):
 
     @require_sklearn
     @require_no_wandb
-    def test_generate_during_eval_no_wandb(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_generate_during_eval_no_wandb(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
@@ -378,8 +378,7 @@ def test_generate_during_eval_no_wandb(self):
 
     @require_sklearn
     @require_peft
-    def test_lora_train_and_save(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_lora_train_and_save(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         lora_config = LoraConfig(r=16, lora_alpha=32, lora_dropout=0.05, task_type="CAUSAL_LM")
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -410,8 +409,7 @@ def test_lora_train_and_save(self):
         AutoModelForCausalLM.from_pretrained(self.tmp_dir)
 
     @require_sklearn
-    def test_compute_metrics(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_compute_metrics(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         ref_model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)

From 9827e99106cbc1841e774a466782a17f3fce77af Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 23 Oct 2025 18:13:16 +0200
Subject: [PATCH 13/18] Use model_id fixture in trainers args tests

---
 tests/test_trainers_args.py | 39 ++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/tests/test_trainers_args.py b/tests/test_trainers_args.py
index 2005b54337c..384c746da5d 100644
--- a/tests/test_trainers_args.py
+++ b/tests/test_trainers_args.py
@@ -43,9 +43,17 @@
 
 
 class TestTrainerArg(TrlTestCase):
+    @pytest.fixture(
+        scope="class",
+        params=[
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def model_id(self, request):
+        return request.param
+
     @require_sklearn
-    def test_bco(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_bco(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train")
         training_args = BCOConfig(
@@ -91,8 +99,7 @@ def test_bco(self):
         assert trainer.args.min_density_ratio == 0.2
         assert trainer.args.max_density_ratio == 20.0
 
-    def test_cpo(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_cpo(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
         training_args = CPOConfig(
@@ -132,8 +139,7 @@ def test_cpo(self):
         assert trainer.args.model_init_kwargs == {"trust_remote_code": True}
         assert trainer.args.dataset_num_proc == 4
 
-    def test_dpo(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_dpo(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
         training_args = DPOConfig(
@@ -199,8 +205,7 @@ def test_dpo(self):
         assert trainer.args.rpo_alpha == 0.5
         assert trainer.args.discopop_tau == 0.1
 
-    def test_kto(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_kto(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train")
         training_args = KTOConfig(
@@ -245,8 +250,7 @@ def test_kto(self):
         assert trainer.args.dataset_num_proc == 4
 
     @pytest.mark.parametrize("mixtures_coef_list", [False, True])
-    def test_nash_md(self, mixtures_coef_list):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_nash_md(self, mixtures_coef_list, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         model = AutoModelForCausalLM.from_pretrained(model_id)
         ref_model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -267,8 +271,7 @@ def test_nash_md(self, mixtures_coef_list):
         assert trainer.args.mixture_coef == (0.5 if not mixtures_coef_list else [0.5, 0.6])
 
     @pytest.mark.parametrize("beta_list", [False, True])
-    def test_online_dpo(self, beta_list):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_online_dpo(self, beta_list, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         model = AutoModelForCausalLM.from_pretrained(model_id)
         ref_model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -297,8 +300,7 @@ def test_online_dpo(self, beta_list):
         assert trainer.args.beta == (0.6 if not beta_list else [0.6, 0.7])
         assert trainer.args.loss_type == "hinge"
 
-    def test_orpo(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_orpo(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
         training_args = ORPOConfig(
@@ -324,8 +326,7 @@ def test_orpo(self):
         assert not trainer.args.disable_dropout
         assert trainer.args.label_pad_token_id == -99
 
-    def test_reward(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_reward(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         model = AutoModelForCausalLM.from_pretrained(model_id)
         dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
@@ -345,8 +346,7 @@ def test_reward(self):
         assert trainer.args.dataset_num_proc == 4
         assert trainer.args.center_rewards_coefficient == 0.1
 
-    def test_sft(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_sft(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
         training_args = SFTConfig(
             self.tmp_dir,
@@ -371,8 +371,7 @@ def test_sft(self):
         assert trainer.args.eval_packing
 
     @pytest.mark.parametrize("alpha_list", [False, True])
-    def test_xpo(self, alpha_list):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_xpo(self, alpha_list, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         model = AutoModelForCausalLM.from_pretrained(model_id)
         ref_model = AutoModelForCausalLM.from_pretrained(model_id)

From 224e38836e8f1018853bb73a6157fc9b52242c75 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 23 Oct 2025 18:17:39 +0200
Subject: [PATCH 14/18] Use model_id fixture in CLI tests

---
 tests/test_cli.py | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index 48087f5054c..c3f463add4a 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -30,17 +30,26 @@
     "to fail on Python <3.10.",  # let's say it's a known issue, but not expected to be fixed, because too niche
 )
 class TestCLI(TrlTestCase):
-    def test_dpo(self):
+    @pytest.fixture(
+        scope="class",
+        params=[
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def model_id(self, request):
+        return request.param
+
+    def test_dpo(self, model_id):
         from trl.cli import main
 
-        command = f"trl dpo --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_preference --report_to none"
+        command = f"trl dpo --output_dir {self.tmp_dir} --model_name_or_path {model_id} --dataset_name trl-internal-testing/zen --dataset_config standard_preference --report_to none"
         with patch("sys.argv", command.split(" ")):
             main()
 
-    def test_dpo_multiple_loss_types(self):
+    def test_dpo_multiple_loss_types(self, model_id):
         from trl.cli import main
 
-        command = f"trl dpo --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_preference --report_to none --loss_type sigmoid bco_pair --loss_weights 1.0 0.5"
+        command = f"trl dpo --output_dir {self.tmp_dir} --model_name_or_path {model_id} --dataset_name trl-internal-testing/zen --dataset_config standard_preference --report_to none --loss_type sigmoid bco_pair --loss_weights 1.0 0.5"
         with patch("sys.argv", command.split(" ")):
             main()
 
@@ -53,17 +62,17 @@ def test_env(self, mock_stdout):
             main()
         assert "TRL version: " in mock_stdout.getvalue().strip()
 
-    def test_grpo(self):
+    def test_grpo(self, model_id):
         from trl.cli import main
 
-        command = f"trl grpo --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --reward_model_name_or_path trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_prompt_only --num_generations 4 --max_completion_length 32 --report_to none"
+        command = f"trl grpo --output_dir {self.tmp_dir} --model_name_or_path {model_id} --reward_model_name_or_path trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_prompt_only --num_generations 4 --max_completion_length 32 --report_to none"
         with patch("sys.argv", command.split(" ")):
             main()
 
-    def test_kto(self):
+    def test_kto(self, model_id):
         from trl.cli import main
 
-        command = f"trl kto --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_unpaired_preference --report_to none"
+        command = f"trl kto --output_dir {self.tmp_dir} --model_name_or_path {model_id} --dataset_name trl-internal-testing/zen --dataset_config standard_unpaired_preference --report_to none"
         with patch("sys.argv", command.split(" ")):
             main()
 
@@ -74,21 +83,21 @@ def test_reward(self):
         with patch("sys.argv", command.split(" ")):
             main()
 
-    def test_rloo(self):
+    def test_rloo(self, model_id):
         from trl.cli import main
 
-        command = f"trl rloo --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --reward_model_name_or_path trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_prompt_only --num_generations 2 --max_completion_length 32 --report_to none"
+        command = f"trl rloo --output_dir {self.tmp_dir} --model_name_or_path {model_id} --reward_model_name_or_path trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_prompt_only --num_generations 2 --max_completion_length 32 --report_to none"
         with patch("sys.argv", command.split(" ")):
             main()
 
-    def test_sft(self):
+    def test_sft(self, model_id):
         from trl.cli import main
 
-        command = f"trl sft --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_language_modeling --report_to none"
+        command = f"trl sft --output_dir {self.tmp_dir} --model_name_or_path {model_id} --dataset_name trl-internal-testing/zen --dataset_config standard_language_modeling --report_to none"
         with patch("sys.argv", command.split(" ")):
             main()
 
-    def test_sft_config_file(self):
+    def test_sft_config_file(self, model_id):
         from trl.cli import main
 
         output_dir = os.path.join(self.tmp_dir, "output")
@@ -96,7 +105,7 @@ def test_sft_config_file(self):
         # Create a temporary config file
         config_path = os.path.join(self.tmp_dir, "config.yaml")
         config_content = {
-            "model_name_or_path": "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            "model_name_or_path": model_id,
             "dataset_name": "trl-internal-testing/zen",
             "dataset_config": "standard_language_modeling",
             "report_to": "none",

From 9ab06fdaef79d1cd3ef041460134c12e55d95269 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 30 Oct 2025 11:09:14 +0100
Subject: [PATCH 15/18] Revert "Use fixture instead"

This reverts commit bc1211006a79708cb9b8b2764d96448ca0c2a28a.
---
 tests/test_sft_trainer.py | 171 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 162 insertions(+), 9 deletions(-)

diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py
index cdbba837797..b7e64dd37e1 100644
--- a/tests/test_sft_trainer.py
+++ b/tests/test_sft_trainer.py
@@ -256,15 +256,6 @@ def test_multiple_examples(self):
 
 
 class TestSFTTrainer(TrlTestCase):
-    @pytest.fixture(
-        scope="class",
-        params=[
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
-    def model_id(self, request):
-        return request.param
-
     @pytest.mark.parametrize(
         "model_id",
         [
@@ -320,6 +311,12 @@ def test_train_gpt_oss(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_train_model(self, model_id):
         # Instantiate the model
         model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -345,6 +342,12 @@ def test_train_model(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_train_dft_loss(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling")
@@ -409,6 +412,12 @@ def test_train_moe_model_with_aux_loss(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_train_with_formatting_func(self, model_id):
         # Dummy formatting function
         def formatting_prompts_func(example):
@@ -441,6 +450,12 @@ def formatting_prompts_func(example):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_train_model_dtype(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
@@ -474,6 +489,12 @@ def test_train_model_dtype(self, model_id):
             assert new_param.dtype == torch.float16
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
     def test_train_dense_with_peft_config_lora(self, model_id):
         # Get the base model parameter names
@@ -510,6 +531,12 @@ def test_train_dense_with_peft_config_lora(self, model_id):
             elif "base_layer" not in n:  # We expect the peft parameters to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @pytest.mark.parametrize(
         "peft_type",
         [
@@ -607,6 +634,12 @@ def test_train_moe_with_peft_config(self):
             elif "base_layer" not in n:  # We expect the peft parameters to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
     def test_train_peft_model(self, model_id):
         # Get the base model
@@ -643,6 +676,12 @@ def test_train_peft_model(self, model_id):
             elif "base_layer" not in n:  # We expect the peft parameters to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
     def test_train_dense_with_peft_config_and_gradient_checkpointing(self, model_id):
         # Get the base model parameter names
@@ -716,6 +755,12 @@ def test_train_moe_with_peft_config_and_gradient_checkpointing(self):
             elif "base_layer" not in n:  # We expect the peft parameters to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
     def test_train_with_peft_model_and_gradient_checkpointing(self, model_id):
         # Get the base model parameter names
@@ -751,6 +796,12 @@ def test_train_with_peft_model_and_gradient_checkpointing(self, model_id):
             elif "base_layer" not in n:  # We expect the peft parameters to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_liger_kernel
     def test_train_with_liger(self, model_id):
         # Get the dataset
@@ -774,6 +825,12 @@ def test_train_with_liger(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_train_with_non_chatml_conversational_data(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "conversational_language_modeling", split="train")
@@ -802,6 +859,12 @@ def rename_fields(example: list[dict]):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_train_with_pretokenized_data(self, model_id):
         # Get the dataset
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -831,6 +894,12 @@ def tokenize_example(example):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_train_with_iterable_dataset(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train", streaming=True)
@@ -853,6 +922,12 @@ def test_train_with_iterable_dataset(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_flash_attn
     def test_train_padding_free(self, model_id):
         # Get the dataset
@@ -882,6 +957,12 @@ def test_train_padding_free(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @pytest.mark.parametrize("packing_strategy", ["bfd", "wrapped"])
     @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning)
     @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning)
@@ -909,6 +990,12 @@ def test_train_packing(self, packing_strategy, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning)
     @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning)
     def test_eval_packing(self, model_id):
@@ -943,6 +1030,12 @@ def test_eval_packing(self, model_id):
         assert len(trainer.train_dataset["input_ids"]) == 3  # w/ this dataset, we end up with 46 seqs
         assert len(trainer.eval_dataset["input_ids"]) == 1  # w/ this dataset, we end up with 6 seqs
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning)
     @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning)
     def test_only_train_packing(self, model_id):
@@ -978,6 +1071,12 @@ def test_only_train_packing(self, model_id):
         assert len(trainer.train_dataset["input_ids"]) == 3  # w/ this dataset, we end up with 46 seqs
         assert len(trainer.eval_dataset["input_ids"]) == 2  # w/ this dataset, we end up with 6 seqs
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_train_with_chat_template_kwargs(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
@@ -1204,6 +1303,12 @@ def test_train_with_set_chat_template_from_path(self):
             original_template_content = f.read()
         assert template_content == original_template_content, "Chat template content does not match the original"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_train_toolcall_data(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/toolcall", split="train")
@@ -1226,6 +1331,12 @@ def test_train_toolcall_data(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_train_with_eval(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling")
@@ -1245,6 +1356,12 @@ def test_train_with_eval(self, model_id):
         # Check that the eval loss is not None
         assert trainer.state.log_history[0]["eval_loss"] is not None
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_train_with_multiple_eval_dataset(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling")
@@ -1264,6 +1381,12 @@ def test_train_with_multiple_eval_dataset(self, model_id):
         assert trainer.state.log_history[-3]["eval_data1_loss"] is not None
         assert trainer.state.log_history[-2]["eval_data2_loss"] is not None
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_train_with_gradient_checkpointing(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
@@ -1286,6 +1409,12 @@ def test_train_with_gradient_checkpointing(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.allclose(param, new_param), f"Parameter {n} has not changed"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_tag_added(self, model_id):
         # Get the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
@@ -1296,6 +1425,12 @@ def test_tag_added(self, model_id):
         for tag in ["sft", "trl"]:
             assert tag in trainer.model.model_tags
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
     def test_tag_added_peft(self, model_id):
         # Get the dataset
@@ -1550,6 +1685,12 @@ def test_train_vlm_text_only_data(self, model_id):
             else:
                 assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12), f"Param {n} is not updated"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
     def test_prompt_tuning(self, model_id):
         """Test that SFT works with Prompt Tuning."""
@@ -1582,6 +1723,12 @@ def test_prompt_tuning(self, model_id):
             else:
                 raise ValueError(f"Unexpected parameter {n} in model: {trainer.model}")
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
     @require_bitsandbytes
     def test_peft_model_with_quantization(self, model_id):
@@ -1675,6 +1822,12 @@ def test_peft_model_with_quantization(self, model_id):
             "All original LoRA parameters should remain trainable after SFTTrainer initialization"
         )
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
     def test_prompt_tuning_peft_model(self, model_id):
         """Test that SFT works with Prompt Tuning and a pre-converted PeftModel"""

From 23395efb79dce63aab8b1df3d363f07fd19659a2 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 30 Oct 2025 11:14:21 +0100
Subject: [PATCH 16/18] Use explicit tiny-Qwen2ForCausalLM-2.5 model_id param
 in experimental trainer args tests

---
 tests/experimental/test_trainers_args.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/experimental/test_trainers_args.py b/tests/experimental/test_trainers_args.py
index bd86bb61b5d..c04b291bae6 100644
--- a/tests/experimental/test_trainers_args.py
+++ b/tests/experimental/test_trainers_args.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import pytest
 from datasets import load_dataset
 from transformers import AutoTokenizer
 
@@ -21,9 +22,14 @@
 
 
 class TestTrainerArg(TrlTestCase):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
-    def test_bco(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_bco(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train")
         training_args = BCOConfig(

From 09347968dcf1ed9c917e4ad51b812a981f6165d3 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 30 Oct 2025 11:37:35 +0100
Subject: [PATCH 17/18] Replace model_id fixture with parameter

---
 tests/experimental/test_bco_trainer.py |  69 +++++++-
 tests/slow/test_grpo_slow.py           |  12 +-
 tests/test_activation_offloading.py    |  21 ++-
 tests/test_cli.py                      |  45 +++++-
 tests/test_dpo_trainer.py              |  75 +++++++--
 tests/test_grpo_trainer.py             | 211 ++++++++++++++++++++++++-
 tests/test_kto_trainer.py              |  15 +-
 tests/test_online_dpo_trainer.py       |  15 +-
 tests/test_orpo_trainer.py             |  15 +-
 tests/test_rloo_trainer.py             | 183 ++++++++++++++++++++-
 tests/test_trainers_args.py            |  57 ++++++-
 11 files changed, 640 insertions(+), 78 deletions(-)

diff --git a/tests/experimental/test_bco_trainer.py b/tests/experimental/test_bco_trainer.py
index 4738361ccd8..ecbfdbb569e 100644
--- a/tests/experimental/test_bco_trainer.py
+++ b/tests/experimental/test_bco_trainer.py
@@ -33,15 +33,12 @@
 
 @pytest.mark.low_priority
 class TestBCOTrainer(TrlTestCase):
-    @pytest.fixture(
-        scope="class",
-        params=[
+    @pytest.mark.parametrize(
+        "model_id",
+        [
             "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
         ],
     )
-    def model_id(self, request):
-        return request.param
-
     @pytest.mark.parametrize(
         "config_name",
         [
@@ -88,6 +85,12 @@ def test_train(self, config_name, model_id):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.equal(param.cpu(), new_param.cpu())
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
     def test_train_with_precompute(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -124,6 +127,12 @@ def test_train_with_precompute(self, model_id):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.equal(param.cpu(), new_param.cpu())
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
     def test_train_eval(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -151,6 +160,12 @@ def test_train_eval(self, model_id):
 
         trainer.train()
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
     def test_init_with_ref_model_is_model(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -173,6 +188,12 @@ def test_init_with_ref_model_is_model(self, model_id):
                 train_dataset=dataset,
             )
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
     def test_tokenize_and_process_tokens(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -228,6 +249,12 @@ def test_tokenize_and_process_tokens(self, model_id):
         assert processed_dataset["completion_attention_mask"][0] == [1, 1, 1, 1, 1, 1, 1]
         assert processed_dataset["completion_labels"][0] == [-100, -100, -100, -100, 27261, 13, 151645]
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
     def test_train_without_providing_ref_model(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -261,6 +288,12 @@ def test_train_without_providing_ref_model(self, model_id):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.equal(param.cpu(), new_param.cpu())
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
     def test_train_udm(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -309,6 +342,12 @@ def embed_prompt(input_ids, attention_mask, model):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.equal(param.cpu(), new_param.cpu())
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
     @require_peft
     def test_train_without_providing_ref_model_with_lora(self, model_id):
@@ -346,6 +385,12 @@ def test_train_without_providing_ref_model_with_lora(self, model_id):
                 if param.sum() != 0:  # ignore 0 biases
                     assert not torch.equal(param.cpu(), new_param.cpu())
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
     @require_no_wandb
     def test_generate_during_eval_no_wandb(self, model_id):
@@ -376,6 +421,12 @@ def test_generate_during_eval_no_wandb(self, model_id):
                 eval_dataset=dataset["test"],
             )
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
     @require_peft
     def test_lora_train_and_save(self, model_id):
@@ -408,6 +459,12 @@ def test_lora_train_and_save(self, model_id):
         # assert that the model is loaded without giving OSError
         AutoModelForCausalLM.from_pretrained(self.tmp_dir)
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
     def test_compute_metrics(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
diff --git a/tests/slow/test_grpo_slow.py b/tests/slow/test_grpo_slow.py
index bf63984d645..4f14f1646e8 100644
--- a/tests/slow/test_grpo_slow.py
+++ b/tests/slow/test_grpo_slow.py
@@ -341,10 +341,16 @@ def reward_func(prompts, completions, **kwargs):
 
         release_memory(model, trainer)
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_vllm
     @require_bitsandbytes
     @require_peft
-    def test_vlm_processor_vllm_colocate_mode(self):
+    def test_vlm_processor_vllm_colocate_mode(self, model_id):
         """
         Test that VLM processors work with vLLM in colocate mode.
 
@@ -423,9 +429,7 @@ def dummy_reward_func(completions, **kwargs):
                 try:
                     # Load model with quantization for memory efficiency
                     model = AutoModelForCausalLM.from_pretrained(
-                        "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-                        quantization_config=quantization_config,
-                        dtype=torch.bfloat16,
+                        model_id, quantization_config=quantization_config, dtype=torch.bfloat16
                     )
 
                     trainer = GRPOTrainer(
diff --git a/tests/test_activation_offloading.py b/tests/test_activation_offloading.py
index 15cef523de8..6e8d8e2e937 100644
--- a/tests/test_activation_offloading.py
+++ b/tests/test_activation_offloading.py
@@ -28,15 +28,12 @@
 
 
 class TestActivationOffloading(TrlTestCase):
-    @pytest.fixture(
-        scope="class",
-        params=[
+    @pytest.mark.parametrize(
+        "model_id",
+        [
             "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
         ],
     )
-    def model_id(self, request):
-        return request.param
-
     @require_torch_accelerator
     @require_peft
     def test_offloading_with_peft_models(self, model_id) -> None:
@@ -83,6 +80,12 @@ def test_offloading_with_peft_models(self, model_id) -> None:
                         f"Gradient mismatch for {name_orig}"
                     )
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_torch_accelerator
     def test_noop_manager_with_offloading(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
@@ -129,6 +132,12 @@ def test_min_offload_size(self):
         # The test passes if no errors occur, as we're mainly testing
         # that the logic handles both offloaded and non-offloaded tensors
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_torch_accelerator
     def test_real_hf_model(self, model_id):
         """Test with an actual HuggingFace model"""
diff --git a/tests/test_cli.py b/tests/test_cli.py
index c3f463add4a..3c96e579e51 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -30,15 +30,12 @@
     "to fail on Python <3.10.",  # let's say it's a known issue, but not expected to be fixed, because too niche
 )
 class TestCLI(TrlTestCase):
-    @pytest.fixture(
-        scope="class",
-        params=[
+    @pytest.mark.parametrize(
+        "model_id",
+        [
             "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
         ],
     )
-    def model_id(self, request):
-        return request.param
-
     def test_dpo(self, model_id):
         from trl.cli import main
 
@@ -46,6 +43,12 @@ def test_dpo(self, model_id):
         with patch("sys.argv", command.split(" ")):
             main()
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_dpo_multiple_loss_types(self, model_id):
         from trl.cli import main
 
@@ -62,6 +65,12 @@ def test_env(self, mock_stdout):
             main()
         assert "TRL version: " in mock_stdout.getvalue().strip()
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_grpo(self, model_id):
         from trl.cli import main
 
@@ -69,6 +78,12 @@ def test_grpo(self, model_id):
         with patch("sys.argv", command.split(" ")):
             main()
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_kto(self, model_id):
         from trl.cli import main
 
@@ -83,6 +98,12 @@ def test_reward(self):
         with patch("sys.argv", command.split(" ")):
             main()
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_rloo(self, model_id):
         from trl.cli import main
 
@@ -90,6 +111,12 @@ def test_rloo(self, model_id):
         with patch("sys.argv", command.split(" ")):
             main()
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_sft(self, model_id):
         from trl.cli import main
 
@@ -97,6 +124,12 @@ def test_sft(self, model_id):
         with patch("sys.argv", command.split(" ")):
             main()
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_sft_config_file(self, model_id):
         from trl.cli import main
 
diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py
index 17ac7b224b8..223fb70bf4f 100644
--- a/tests/test_dpo_trainer.py
+++ b/tests/test_dpo_trainer.py
@@ -153,15 +153,6 @@ def test_tokenize_row_with_truncation_and_special_tokens(self):
 
 
 class TestDPOTrainer(TrlTestCase):
-    @pytest.fixture(
-        scope="class",
-        params=[
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
-    def model_id(self, request):
-        return request.param
-
     def setup_method(self):
         self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
@@ -169,6 +160,12 @@ def setup_method(self):
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
         self.tokenizer.pad_token = self.tokenizer.eos_token
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_train(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -197,6 +194,12 @@ def test_train(self, model_id):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12)
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @pytest.mark.parametrize(
         "loss_type",
         [
@@ -308,6 +311,12 @@ def test_dpo_trainer_with_weighting(self):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12)
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_train_with_multiple_loss_types(self, model_id):
         """
         Tests multi-loss combinations, loss type inference, and weight configuration. MPO combines DPO (sigmoid), BCO
@@ -641,6 +650,12 @@ def test_dpo_lora_save(self):
         except OSError:
             pytest.fail("Loading the saved peft adapter failed")
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
     @require_torch_gpu_if_bnb_not_multi_backend_enabled
     def test_dpo_lora_bf16_autocast_llama(self, model_id):
@@ -782,6 +797,12 @@ def test_dpo_lora_bf16_autocast(self, loss_type, pre_compute, gen_during_eval):
         # save peft adapter
         trainer.save_model()
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
     def test_dpo_lora_tags(self, model_id):
         from peft import LoraConfig
@@ -827,6 +848,12 @@ def test_dpo_lora_tags(self, model_id):
         for tag in ["dpo", "trl"]:
             assert tag in trainer.model.model_tags
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
     def test_dpo_tags(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -998,6 +1025,12 @@ def test_dpo_trainer_dtype(self):
                 train_dataset=dummy_dataset["train"],
             )
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_dpo_loss_alpha_div_f(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
@@ -1038,6 +1071,12 @@ def test_dpo_loss_alpha_div_f(self, model_id):
         )
         assert torch.isfinite(losses).cpu().numpy().all()
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_dpo_loss_js_div_f(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
@@ -1231,6 +1270,12 @@ def test_padding_free(self):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12)
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_compute_metrics(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         ref_model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -1266,6 +1311,12 @@ def dummy_compute_metrics(*args, **kwargs):
 
         assert trainer.state.log_history[-2]["eval_test"] == 0.0
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_train_with_length_desensitization(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -1379,6 +1430,12 @@ def test_dpo_trainer_with_liger(self, beta, loss_type):
         assert output is not None
         assert "loss" not in output.keys()
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_train_with_iterable_dataset(self, model_id):
         dataset = load_dataset(
             "trl-internal-testing/zen",
diff --git a/tests/test_grpo_trainer.py b/tests/test_grpo_trainer.py
index 88a2dac7b83..01e5d646872 100644
--- a/tests/test_grpo_trainer.py
+++ b/tests/test_grpo_trainer.py
@@ -108,15 +108,12 @@ def test_compute_entropy_all_masked(self):
 
 
 class TestGRPOTrainer(TrlTestCase):
-    @pytest.fixture(
-        scope="class",
-        params=[
+    @pytest.mark.parametrize(
+        "model_id",
+        [
             "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
         ],
     )
-    def model_id(self, request):
-        return request.param
-
     def test_init_minimal(self, model_id):
         # Test that GRPOTrainer can be instantiated with only model, reward_model and train_dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -126,6 +123,12 @@ def test_init_minimal(self, model_id):
             train_dataset=dataset,
         )
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"])
     def test_training(self, config_name, model_id):
         dataset = load_dataset("trl-internal-testing/zen", config_name, split="train")
@@ -156,6 +159,12 @@ def test_training(self, config_name, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @pytest.mark.parametrize("loss_type", ["bnpo", "dr_grpo", "dapo"])
     def test_training_loss_types(self, loss_type, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -188,6 +197,12 @@ def test_training_loss_types(self, loss_type, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_with_eval(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
 
@@ -211,6 +226,12 @@ def test_training_with_eval(self, model_id):
 
         trainer.train()
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_multiple_iterations(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -241,6 +262,12 @@ def test_training_multiple_iterations(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
     def test_training_peft(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -277,6 +304,12 @@ def test_training_peft(self, model_id):
             elif "base_layer" not in n:  # We expect the peft params to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
     def test_training_peft_with_gradient_checkpointing(self, model_id):
         """Test that training works with PEFT and gradient checkpointing enabled."""
@@ -326,6 +359,12 @@ def test_training_peft_with_gradient_checkpointing(self, model_id):
             else:  # Base model parameters should not change
                 assert torch.equal(param, new_param), f"Base parameter {n} has changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_different_reward_model(self, model_id):
         # Use a reward model different from the model: different chat template, tokenization, etc.
         dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train")
@@ -365,6 +404,12 @@ def test_training_different_reward_model(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_reward_func_standard(self, model_id):
         # Test if trainer can handle reward function with standard format
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -399,6 +444,12 @@ def reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_reward_func_conversational(self, model_id):
         # Test if trainer can handle reward function with conversational format
         dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train")
@@ -434,6 +485,12 @@ def reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_multiple_reward_funcs(self, model_id):
         # Test that GRPOTrainer can be instantiated with multiple reward functions
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -472,6 +529,12 @@ def reward_func2(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_multiple_reward_funcs_with_None_output(self, model_id):
         """Test that a valid math reward function is processed correctly while the code reward function returns None."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -516,6 +579,12 @@ def non_applicable_reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_multiple_reward_funcs_with_weights(self, model_id):
         """Test that GRPOTrainer can handle multiple reward functions with weights."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -560,6 +629,12 @@ def reward_func2(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_multiple_mixed_reward_funcs(self, model_id):
         # Test if the trainer can handle a mix of reward functions and reward models
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -594,6 +669,12 @@ def reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_reward_func_additional_column(self, model_id):
         # Test if trainer can handle reward function that rely on additional columns in the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -632,6 +713,12 @@ def reward_func(completions, some_values, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_with_sync_ref_model(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -663,6 +750,12 @@ def test_training_with_sync_ref_model(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_beta_non_zero(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
         training_args = GRPOConfig(
@@ -692,6 +785,12 @@ def test_training_beta_non_zero(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_with_entropy_filter(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
         training_args = GRPOConfig(
@@ -836,6 +935,12 @@ def test_training_vllm_importance_sampling_correction(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_with_additional_generation_kwargs(self, model_id):
         """Test that training works with additional generation kwargs."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -909,6 +1014,12 @@ def test_training_vllm_with_additional_generation_kwargs(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @pytest.mark.parametrize("scale_rewards", [False, "group", "batch", True, "none"])
     def test_training_scale_rewards(self, scale_rewards, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -940,6 +1051,12 @@ def test_training_scale_rewards(self, scale_rewards, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @patch("transformers.generation.utils.GenerationMixin.generate")
     def test_training_with_mask_truncated_completions(self, mock_generate, model_id):
         """Test that training works with mask_truncated_completions=True parameter."""
@@ -990,6 +1107,12 @@ def fake_generate(input_ids, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_with_mask_truncated_completions_all_masked(self, model_id):
         """
         Test that when all generated completions are truncated (i.e., none contain an EOS token), and
@@ -1028,6 +1151,12 @@ def test_training_with_mask_truncated_completions_all_masked(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert torch.equal(param, new_param), f"Parameter {n} has changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_warning_raised_all_rewards_none(self, model_id, caplog):
         """Test that a proper warning is raised when all rewards are None."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -1057,6 +1186,12 @@ def always_none_reward_func(completions, **kwargs):
         expected_warning = "All reward functions returned None for the following kwargs:"
         assert expected_warning in caplog.text
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_num_generations_larger_than_batch_size(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -1087,6 +1222,12 @@ def test_training_num_generations_larger_than_batch_size(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_delta_clipping(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -1117,6 +1258,12 @@ def test_training_delta_clipping(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_multiple_dataloader_workers(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -1147,6 +1294,12 @@ def test_training_multiple_dataloader_workers(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_with_generation_kwargs(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -1177,6 +1330,12 @@ def test_training_with_generation_kwargs(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_with_reward_func_accessing_trainer_state(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -1202,6 +1361,12 @@ def reward_func(completions, **kwargs):
         )
         trainer.train()
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_prepare_input_called_with_correct_data(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
         training_args = GRPOConfig(
@@ -1584,6 +1749,12 @@ def reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_sequence_importance_sampling(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -1645,6 +1816,12 @@ def test_training_with_chat_template_kwargs(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_mismatched_reward_processing_classes_length(self, model_id):
         """Test that mismatched length between reward_funcs and reward_processing_classes raises error."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -1671,6 +1848,12 @@ def test_mismatched_reward_processing_classes_length(self, model_id):
                 train_dataset=dataset,
             )
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_correct_reward_processing_classes_list(self, model_id):
         """Test that correct list of reward_processing_classes works properly."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -1702,6 +1885,12 @@ def test_correct_reward_processing_classes_list(self, model_id):
 
         assert len(trainer.reward_processing_classes) == len(reward_models)
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_single_reward_model_with_single_processing_class(self, model_id):
         """Test that single reward model with single processing class works."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -1729,7 +1918,13 @@ def test_single_reward_model_with_single_processing_class(self, model_id):
 
 
 class TestGSPOTokenTrainer(TrlTestCase):
-    def test_training(self):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def test_training(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         training_args = GRPOConfig(
@@ -1743,7 +1938,7 @@ def test_training(self):
             report_to="none",
         )
         trainer = GSPOTokenTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=training_args,
             train_dataset=dataset,
diff --git a/tests/test_kto_trainer.py b/tests/test_kto_trainer.py
index f58a02ef8d4..7bc6278765e 100644
--- a/tests/test_kto_trainer.py
+++ b/tests/test_kto_trainer.py
@@ -25,15 +25,6 @@
 
 
 class TestKTOTrainer(TrlTestCase):
-    @pytest.fixture(
-        scope="class",
-        params=[
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
-    def model_id(self, request):
-        return request.param
-
     def setup_method(self):
         self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
@@ -399,6 +390,12 @@ def test_kto_trainer_with_liger(self):
             if param.sum() != 0:
                 assert not torch.equal(param, new_param)
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_compute_metrics(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         ref_model = AutoModelForCausalLM.from_pretrained(model_id)
diff --git a/tests/test_online_dpo_trainer.py b/tests/test_online_dpo_trainer.py
index d2ea8690d05..a34aab347c7 100644
--- a/tests/test_online_dpo_trainer.py
+++ b/tests/test_online_dpo_trainer.py
@@ -42,15 +42,6 @@
 
 
 class TestOnlineDPOTrainer(TrlTestCase):
-    @pytest.fixture(
-        scope="class",
-        params=[
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
-    def model_id(self, request):
-        return request.param
-
     def setup_method(self):
         self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
@@ -89,6 +80,12 @@ def test_training(self, config_name):
         # Check if training loss is available
         assert "train_loss" in trainer.state.log_history[-1]
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_model_str(self, model_id):
         training_args = OnlineDPOConfig(
             output_dir=self.tmp_dir,
diff --git a/tests/test_orpo_trainer.py b/tests/test_orpo_trainer.py
index 48159662be5..6d3d4549c23 100644
--- a/tests/test_orpo_trainer.py
+++ b/tests/test_orpo_trainer.py
@@ -23,15 +23,6 @@
 
 
 class TestORPOTrainer(TrlTestCase):
-    @pytest.fixture(
-        scope="class",
-        params=[
-            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
-        ],
-    )
-    def model_id(self, request):
-        return request.param
-
     def setup_method(self):
         self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
@@ -153,6 +144,12 @@ def test_orpo_trainer_with_lora(self, config_name):
                 if param.sum() != 0:  # ignore 0 biases
                     assert not torch.equal(param, new_param)
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_compute_metrics(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py
index f146c2905c6..42492ab8546 100644
--- a/tests/test_rloo_trainer.py
+++ b/tests/test_rloo_trainer.py
@@ -35,15 +35,12 @@
 
 
 class TestRLOOTrainer(TrlTestCase):
-    @pytest.fixture(
-        scope="class",
-        params=[
+    @pytest.mark.parametrize(
+        "model_id",
+        [
             "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
         ],
     )
-    def model_id(self, request):
-        return request.param
-
     def test_init_minimal(self, model_id):
         # Test that RLOOTrainer can be instantiated with only model, reward_model and train_dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -53,6 +50,12 @@ def test_init_minimal(self, model_id):
             train_dataset=dataset,
         )
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"])
     def test_training(self, config_name, model_id):
         dataset = load_dataset("trl-internal-testing/zen", config_name, split="train")
@@ -83,6 +86,12 @@ def test_training(self, config_name, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_with_eval(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
 
@@ -106,6 +115,12 @@ def test_training_with_eval(self, model_id):
 
         trainer.train()
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_multiple_iterations(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -136,6 +151,12 @@ def test_training_multiple_iterations(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
     def test_training_peft(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -172,6 +193,12 @@ def test_training_peft(self, model_id):
             elif "base_layer" not in n:  # We expect the peft params to be different (except for the base layer)
                 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_peft
     def test_training_peft_with_gradient_checkpointing(self, model_id):
         """Test that training works with PEFT and gradient checkpointing enabled."""
@@ -221,6 +248,12 @@ def test_training_peft_with_gradient_checkpointing(self, model_id):
             else:  # Base model parameters should not change
                 assert torch.equal(param, new_param), f"Base parameter {n} has changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_different_reward_model(self, model_id):
         # Use a reward model different from the model: different chat template, tokenization, etc.
         dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train")
@@ -260,6 +293,12 @@ def test_training_different_reward_model(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_reward_func_standard(self, model_id):
         # Test if trainer can handle reward function with standard format
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -294,6 +333,12 @@ def reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_reward_func_conversational(self, model_id):
         # Test if trainer can handle reward function with conversational format
         dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train")
@@ -329,6 +374,12 @@ def reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_multiple_reward_funcs(self, model_id):
         # Test that RLOOTrainer can be instantiated with multiple reward functions
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -367,6 +418,12 @@ def reward_func2(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_multiple_reward_funcs_with_None_output(self, model_id):
         """Test that a valid math reward function is processed correctly while the code reward function returns None."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -411,6 +468,12 @@ def non_applicable_reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_multiple_reward_funcs_with_weights(self, model_id):
         """Test that RLOOTrainer can handle multiple reward functions with weights."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -455,6 +518,12 @@ def reward_func2(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_multiple_mixed_reward_funcs(self, model_id):
         # Test if the trainer can handle a mix of reward functions and reward models
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -489,6 +558,12 @@ def reward_func(completions, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_reward_func_additional_column(self, model_id):
         # Test if trainer can handle reward function that rely on additional columns in the dataset
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -527,6 +602,12 @@ def reward_func(completions, some_values, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_with_sync_ref_model(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -558,6 +639,12 @@ def test_training_with_sync_ref_model(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_beta_zero(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
         training_args = RLOOConfig(
@@ -667,6 +754,12 @@ def test_training_vllm_guided_decoding(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_with_additional_generation_kwargs(self, model_id):
         """Test that training works with additional generation kwargs."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -740,6 +833,12 @@ def test_training_vllm_with_additional_generation_kwargs(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_with_normalized_advantages(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -770,6 +869,12 @@ def test_training_with_normalized_advantages(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_with_clipped_rewards(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -800,6 +905,12 @@ def test_training_with_clipped_rewards(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @patch("transformers.generation.utils.GenerationMixin.generate")
     def test_training_with_mask_truncated_completions(self, mock_generate, model_id):
         """Test that training works with mask_truncated_completions=True parameter."""
@@ -850,6 +961,12 @@ def fake_generate(input_ids, **kwargs):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_with_mask_truncated_completions_all_masked(self, model_id):
         """
         Test that when all generated completions are truncated (i.e., none contain an EOS token), and
@@ -888,6 +1005,12 @@ def test_training_with_mask_truncated_completions_all_masked(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert torch.equal(param, new_param), f"Parameter {n} has changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_warning_raised_all_rewards_none(self, model_id, caplog):
         """Test that a proper warning is raised when all rewards are None."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -917,6 +1040,12 @@ def always_none_reward_func(completions, **kwargs):
         expected_warning = "All reward functions returned None for the following kwargs:"
         assert expected_warning in caplog.text
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_num_generations_larger_than_batch_size(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -947,6 +1076,12 @@ def test_training_num_generations_larger_than_batch_size(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_multiple_dataloader_workers(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -977,6 +1112,12 @@ def test_training_multiple_dataloader_workers(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_with_generation_kwargs(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -1007,6 +1148,12 @@ def test_training_with_generation_kwargs(self, model_id):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_training_with_reward_func_accessing_trainer_state(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
@@ -1032,6 +1179,12 @@ def reward_func(completions, **kwargs):
         )
         trainer.train()
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_prepare_input_called_with_correct_data(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
         training_args = RLOOConfig(
@@ -1347,6 +1500,12 @@ def test_training_with_chat_template_kwargs(self):
             new_param = trainer.model.get_parameter(n)
             assert not torch.equal(param, new_param), f"Parameter {n} has not changed."
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_mismatched_reward_processing_classes_length(self, model_id):
         """Test that mismatched length between reward_funcs and reward_processing_classes raises error."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -1373,6 +1532,12 @@ def test_mismatched_reward_processing_classes_length(self, model_id):
                 train_dataset=dataset,
             )
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_correct_reward_processing_classes_list(self, model_id):
         """Test that correct list of reward_processing_classes works properly."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
@@ -1404,6 +1569,12 @@ def test_correct_reward_processing_classes_list(self, model_id):
 
         assert len(trainer.reward_processing_classes) == len(reward_models)
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_single_reward_model_with_single_processing_class(self, model_id):
         """Test that single reward model with single processing class works."""
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
diff --git a/tests/test_trainers_args.py b/tests/test_trainers_args.py
index de35fb5d07e..edbcaf9c04c 100644
--- a/tests/test_trainers_args.py
+++ b/tests/test_trainers_args.py
@@ -41,15 +41,12 @@
 
 
 class TestTrainerArg(TrlTestCase):
-    @pytest.fixture(
-        scope="class",
-        params=[
+    @pytest.mark.parametrize(
+        "model_id",
+        [
             "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
         ],
     )
-    def model_id(self, request):
-        return request.param
-
     def test_cpo(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
@@ -90,6 +87,12 @@ def test_cpo(self, model_id):
         assert trainer.args.model_init_kwargs == {"trust_remote_code": True}
         assert trainer.args.dataset_num_proc == 4
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_dpo(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
@@ -156,6 +159,12 @@ def test_dpo(self, model_id):
         assert trainer.args.rpo_alpha == 0.5
         assert trainer.args.discopop_tau == 0.1
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_kto(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train")
@@ -200,6 +209,12 @@ def test_kto(self, model_id):
         assert trainer.args.ref_model_init_kwargs == {"trust_remote_code": True}
         assert trainer.args.dataset_num_proc == 4
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @pytest.mark.parametrize("mixtures_coef_list", [False, True])
     def test_nash_md(self, mixtures_coef_list, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -221,6 +236,12 @@ def test_nash_md(self, mixtures_coef_list, model_id):
         )
         assert trainer.args.mixture_coef == (0.5 if not mixtures_coef_list else [0.5, 0.6])
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @pytest.mark.parametrize("beta_list", [False, True])
     def test_online_dpo(self, beta_list, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -251,6 +272,12 @@ def test_online_dpo(self, beta_list, model_id):
         assert trainer.args.beta == (0.6 if not beta_list else [0.6, 0.7])
         assert trainer.args.loss_type == "hinge"
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_orpo(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
@@ -277,6 +304,12 @@ def test_orpo(self, model_id):
         assert not trainer.args.disable_dropout
         assert trainer.args.label_pad_token_id == -99
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_reward(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -297,6 +330,12 @@ def test_reward(self, model_id):
         assert trainer.args.dataset_num_proc == 4
         assert trainer.args.center_rewards_coefficient == 0.1
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     def test_sft(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
         training_args = SFTConfig(
@@ -321,6 +360,12 @@ def test_sft(self, model_id):
         assert trainer.args.dataset_kwargs["append_concat_token"]
         assert trainer.args.eval_packing
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @pytest.mark.parametrize("alpha_list", [False, True])
     def test_xpo(self, alpha_list, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)

From f6f5f827b0f0e1d8ae5ec9193a71d80b3a614302 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 30 Oct 2025 11:42:03 +0100
Subject: [PATCH 18/18] Use model_id param in experimental GRPO with replay
 buffer tests

---
 .../test_grpo_with_replay_buffer_trainer.py         | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/tests/experimental/test_grpo_with_replay_buffer_trainer.py b/tests/experimental/test_grpo_with_replay_buffer_trainer.py
index cad66f8034c..a1696a48267 100644
--- a/tests/experimental/test_grpo_with_replay_buffer_trainer.py
+++ b/tests/experimental/test_grpo_with_replay_buffer_trainer.py
@@ -83,11 +83,12 @@ def test_sample(self):
 @pytest.mark.low_priority
 class TestUpdateWithReplayBuffer:
     def setup_method(self):
+        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         config = GRPOWithReplayBufferConfig(
             replay_buffer_size=5,
         )
         self.trainer = GRPOWithReplayBufferTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=config,
             train_dataset=None,
@@ -238,7 +239,13 @@ def test_update_with_inputs_different_seq_len(self):
 
 @pytest.mark.low_priority
 class TestGRPOWithReplayBufferTrainer(TrlTestCase):
-    def test_training_with_replay_buffer(self):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def test_training_with_replay_buffer(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         # Guarantee that some rewards have 0 std
@@ -258,7 +265,7 @@ def custom_reward_func(completions, **kwargs):
             report_to="none",
         )
         trainer = GRPOTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=[custom_reward_func],
             args=training_args,
             train_dataset=dataset,