huggingface · albertvillanova · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025
diff --git a/tests/experimental/test_bco_trainer.py b/tests/experimental/test_bco_trainer.py
@@ -33,6 +33,12 @@
 
 @pytest.mark.low_priority
 class TestBCOTrainer(TrlTestCase):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @pytest.mark.parametrize(
         "config_name",
         [
@@ -45,8 +51,7 @@ class TestBCOTrainer(TrlTestCase):
         ],
     )
     @require_sklearn
-    def test_train(self, config_name):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_train(self, config_name, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         ref_model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -80,9 +85,14 @@ def test_train(self, config_name):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.equal(param.cpu(), new_param.cpu())
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
-    def test_train_with_precompute(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_train_with_precompute(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         ref_model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -117,9 +127,14 @@ def test_train_with_precompute(self):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.equal(param.cpu(), new_param.cpu())
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
-    def test_train_eval(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_train_eval(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         ref_model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -145,9 +160,14 @@ def test_train_eval(self):
 
         trainer.train()
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
-    def test_init_with_ref_model_is_model(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_init_with_ref_model_is_model(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
@@ -168,9 +188,14 @@ def test_init_with_ref_model_is_model(self):
                 train_dataset=dataset,
             )
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
-    def test_tokenize_and_process_tokens(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_tokenize_and_process_tokens(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         ref_model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -224,9 +249,14 @@ def test_tokenize_and_process_tokens(self):
         assert processed_dataset["completion_attention_mask"][0] == [1, 1, 1, 1, 1, 1, 1]
         assert processed_dataset["completion_labels"][0] == [-100, -100, -100, -100, 27261, 13, 151645]
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
-    def test_train_without_providing_ref_model(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_train_without_providing_ref_model(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
@@ -258,9 +288,14 @@ def test_train_without_providing_ref_model(self):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.equal(param.cpu(), new_param.cpu())
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
-    def test_train_udm(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_train_udm(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
@@ -307,10 +342,15 @@ def embed_prompt(input_ids, attention_mask, model):
             if param.sum() != 0:  # ignore 0 biases
                 assert not torch.equal(param.cpu(), new_param.cpu())
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
     @require_peft
-    def test_train_without_providing_ref_model_with_lora(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_train_without_providing_ref_model_with_lora(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         lora_config = LoraConfig(r=16, lora_alpha=32, lora_dropout=0.05, task_type="CAUSAL_LM")
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -345,10 +385,15 @@ def test_train_without_providing_ref_model_with_lora(self):
                 if param.sum() != 0:  # ignore 0 biases
                     assert not torch.equal(param.cpu(), new_param.cpu())
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
     @require_no_wandb
-    def test_generate_during_eval_no_wandb(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_generate_during_eval_no_wandb(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
@@ -376,10 +421,15 @@ def test_generate_during_eval_no_wandb(self):
                 eval_dataset=dataset["test"],
             )
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
     @require_peft
-    def test_lora_train_and_save(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_lora_train_and_save(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         lora_config = LoraConfig(r=16, lora_alpha=32, lora_dropout=0.05, task_type="CAUSAL_LM")
         tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -409,9 +459,14 @@ def test_lora_train_and_save(self):
         # assert that the model is loaded without giving OSError
         AutoModelForCausalLM.from_pretrained(self.tmp_dir)
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
-    def test_compute_metrics(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_compute_metrics(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id)
         ref_model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)

diff --git a/tests/experimental/test_grpo_with_replay_buffer_trainer.py b/tests/experimental/test_grpo_with_replay_buffer_trainer.py
@@ -96,11 +96,12 @@ def test_sample(self):
 @pytest.mark.low_priority
 class TestUpdateWithReplayBuffer:
     def setup_method(self):
+        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         config = GRPOWithReplayBufferConfig(
             replay_buffer_size=5,
         )
         self.trainer = GRPOWithReplayBufferTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
             args=config,
             train_dataset=None,
@@ -251,7 +252,13 @@ def test_update_with_inputs_different_seq_len(self):
 
 @pytest.mark.low_priority
 class TestGRPOWithReplayBufferTrainer(TrlTestCase):
-    def test_training_with_replay_buffer(self):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
+    def test_training_with_replay_buffer(self, model_id):
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
 
         # Guarantee that some rewards have 0 std
@@ -271,7 +278,7 @@ def custom_reward_func(completions, **kwargs):
             report_to="none",
         )
         trainer = GRPOWithReplayBufferTrainer(
-            model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+            model=model_id,
             reward_funcs=[custom_reward_func],
             args=training_args,
             train_dataset=dataset,

diff --git a/tests/experimental/test_trainers_args.py b/tests/experimental/test_trainers_args.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import pytest
 from datasets import load_dataset
 from transformers import AutoTokenizer
 
@@ -21,9 +22,14 @@
 
 
 class TestTrainerArg(TrlTestCase):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_sklearn
-    def test_bco(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_bco(self, model_id):
         tokenizer = AutoTokenizer.from_pretrained(model_id)
         dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train")
         training_args = BCOConfig(

diff --git a/tests/test_activation_offloading.py b/tests/test_activation_offloading.py
@@ -11,8 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-
+import pytest
 import torch
 from torch import nn
 from transformers import AutoModelForCausalLM
@@ -29,11 +28,16 @@
 
 
 class TestActivationOffloading(TrlTestCase):
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_torch_accelerator
     @require_peft
-    def test_offloading_with_peft_models(self) -> None:
+    def test_offloading_with_peft_models(self, model_id) -> None:
         """Test that activation offloading works with PEFT models."""
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
         peft_config = LoraConfig(
             lora_alpha=16,
@@ -76,9 +80,14 @@ def test_offloading_with_peft_models(self) -> None:
                         f"Gradient mismatch for {name_orig}"
                     )
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_torch_accelerator
-    def test_noop_manager_with_offloading(self):
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+    def test_noop_manager_with_offloading(self, model_id):
         model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
         inp = torch.randint(0, 100, (2, 10), device=torch_device)
 
@@ -123,10 +132,15 @@ def test_min_offload_size(self):
         # The test passes if no errors occur, as we're mainly testing
         # that the logic handles both offloaded and non-offloaded tensors
 
+    @pytest.mark.parametrize(
+        "model_id",
+        [
+            "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+        ],
+    )
     @require_torch_accelerator
-    def test_real_hf_model(self):
+    def test_real_hf_model(self, model_id):
         """Test with an actual HuggingFace model"""
-        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
         model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
 
         # Create small input

diff --git a/tests/test_callbacks.py b/tests/test_callbacks.py
@@ -66,9 +66,10 @@ def __init__(self, model, ref_model, args, train_dataset, eval_dataset, processi
 
 class TestWinRateCallback(TrlTestCase):
     def setup_method(self):
-        self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
-        self.ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
-        self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+        self.model = AutoModelForCausalLM.from_pretrained(model_id)
+        self.ref_model = AutoModelForCausalLM.from_pretrained(model_id)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.tokenizer.pad_token = self.tokenizer.eos_token
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
         dataset["train"] = dataset["train"].select(range(8))
@@ -224,8 +225,9 @@ def test_lora(self):
 
 class TestLogCompletionsCallback(TrlTestCase):
     def setup_method(self):
-        self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
-        self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+        self.model = AutoModelForCausalLM.from_pretrained(model_id)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.tokenizer.pad_token = self.tokenizer.eos_token
         dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
         dataset["train"] = dataset["train"].select(range(8))
@@ -318,8 +320,9 @@ def test_basic_comet(self):
 @require_mergekit
 class TestMergeModelCallback(TrlTestCase):
     def setup_method(self):
-        self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
-        self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+        self.model = AutoModelForCausalLM.from_pretrained(model_id)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
 
     def test_callback(self):
@@ -374,8 +377,9 @@ def test_every_checkpoint(self):
 
 class TestBEMACallback(TrlTestCase):
     def setup_method(self):
-        self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
-        self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
+        model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
+        self.model = AutoModelForCausalLM.from_pretrained(model_id)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.tokenizer.pad_token = self.tokenizer.eos_token
         dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling")