From 8126245bf35a1f34576de8308d9a702a9287e236 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Oct 2025 14:49:17 +0200 Subject: [PATCH 01/18] Use explicit tiny-Qwen2ForCausalLM-2.5 model_id param in SFT tests --- tests/test_sft_trainer.py | 288 ++++++++++++++++++++++++++++---------- 1 file changed, 211 insertions(+), 77 deletions(-) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 4416d9d5c9a..b4d4cd9866a 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -311,9 +311,15 @@ def test_train_gpt_oss(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_train_model(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_model(self, model_id): # Instantiate the model - model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + model = AutoModelForCausalLM.from_pretrained(model_id) # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -336,7 +342,13 @@ def test_train_model(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_train_dft_loss(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_dft_loss(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") @@ -352,7 +364,7 @@ def test_train_dft_loss(self): eval_steps=3, ) trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["test"], @@ -400,7 +412,13 @@ def test_train_moe_model_with_aux_loss(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_train_with_formatting_func(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_formatting_func(self, model_id): # Dummy formatting function def formatting_prompts_func(example): chosen, rejected = example["chosen"], example["rejected"] @@ -412,7 +430,7 @@ def formatting_prompts_func(example): # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, report_to="none") trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, args=training_args, train_dataset=dataset, formatting_func=formatting_prompts_func, @@ -432,7 +450,13 @@ def formatting_prompts_func(example): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_train_model_dtype(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_model_dtype(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -443,9 +467,7 @@ def test_train_model_dtype(self): learning_rate=0.1, report_to="none", ) - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -467,10 +489,15 @@ def test_train_model_dtype(self): assert new_param.dtype == torch.float16 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_train_dense_with_peft_config_lora(self): + def test_train_dense_with_peft_config_lora(self, model_id): # Get the base model parameter names - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" model = AutoModelForCausalLM.from_pretrained(model_id) base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()] @@ -504,6 +531,12 @@ def test_train_dense_with_peft_config_lora(self): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize( "peft_type", [ @@ -513,9 +546,8 @@ def test_train_dense_with_peft_config_lora(self): ], ) @require_peft - def test_train_with_peft_config_prompt_tuning(self, peft_type): + def test_train_with_peft_config_prompt_tuning(self, peft_type, model_id): # Get the base model parameter names - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" model = AutoModelForCausalLM.from_pretrained(model_id) base_param_names = [f"base_model.{n}" for n, _ in model.named_parameters()] @@ -528,7 +560,7 @@ def test_train_with_peft_config_prompt_tuning(self, peft_type): peft_config = PromptTuningConfig( task_type=TaskType.CAUSAL_LM, num_virtual_tokens=4, - tokenizer_name_or_path="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + tokenizer_name_or_path=model_id, ) elif peft_type == "prefix_tuning": peft_config = PrefixTuningConfig( @@ -602,10 +634,15 @@ def test_train_moe_with_peft_config(self): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_train_peft_model(self): + def test_train_peft_model(self, model_id): # Get the base model - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" model = AutoModelForCausalLM.from_pretrained(model_id) # Get the base model parameter names @@ -639,10 +676,15 @@ def test_train_peft_model(self): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_train_dense_with_peft_config_and_gradient_checkpointing(self): + def test_train_dense_with_peft_config_and_gradient_checkpointing(self, model_id): # Get the base model parameter names - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" model = AutoModelForCausalLM.from_pretrained(model_id) base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()] @@ -713,10 +755,15 @@ def test_train_moe_with_peft_config_and_gradient_checkpointing(self): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_train_with_peft_model_and_gradient_checkpointing(self): + def test_train_with_peft_model_and_gradient_checkpointing(self, model_id): # Get the base model parameter names - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" model = AutoModelForCausalLM.from_pretrained(model_id) base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()] model = get_peft_model(model, LoraConfig()) @@ -749,16 +796,20 @@ def test_train_with_peft_model_and_gradient_checkpointing(self): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_liger_kernel - def test_train_with_liger(self): + def test_train_with_liger(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, use_liger_kernel=True, report_to="none") - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -774,7 +825,13 @@ def test_train_with_liger(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_train_with_non_chatml_conversational_data(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_non_chatml_conversational_data(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "conversational_language_modeling", split="train") @@ -786,9 +843,7 @@ def rename_fields(example: list[dict]): # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, report_to="none") - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -804,9 +859,14 @@ def rename_fields(example: list[dict]): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_train_with_pretokenized_data(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_pretokenized_data(self, model_id): # Get the dataset - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -834,15 +894,19 @@ def tokenize_example(example): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_train_with_iterable_dataset(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_iterable_dataset(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train", streaming=True) # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, max_steps=3, report_to="none") - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -858,8 +922,14 @@ def test_train_with_iterable_dataset(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_flash_attn - def test_train_padding_free(self): + def test_train_padding_free(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -871,9 +941,7 @@ def test_train_padding_free(self): bf16=True, # flash_attention_2 only supports bf16 and fp16 report_to="none", ) - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -889,10 +957,16 @@ def test_train_padding_free(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("packing_strategy", ["bfd", "wrapped"]) @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning) @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning) - def test_train_packing(self, packing_strategy): + def test_train_packing(self, packing_strategy, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -900,9 +974,7 @@ def test_train_packing(self, packing_strategy): training_args = SFTConfig( output_dir=self.tmp_dir, packing=True, packing_strategy=packing_strategy, max_length=10, report_to="none" ) - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -918,9 +990,15 @@ def test_train_packing(self, packing_strategy): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning) @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning) - def test_eval_packing(self): + def test_eval_packing(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") @@ -932,7 +1010,7 @@ def test_eval_packing(self): report_to="none", ) trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["test"], @@ -952,9 +1030,15 @@ def test_eval_packing(self): assert len(trainer.train_dataset["input_ids"]) == 3 # w/ this dataset, we end up with 46 seqs assert len(trainer.eval_dataset["input_ids"]) == 1 # w/ this dataset, we end up with 6 seqs + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning) @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning) - def test_only_train_packing(self): + def test_only_train_packing(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") @@ -967,7 +1051,7 @@ def test_only_train_packing(self): report_to="none", ) trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["test"], @@ -987,23 +1071,27 @@ def test_only_train_packing(self): assert len(trainer.train_dataset["input_ids"]) == 3 # w/ this dataset, we end up with 46 seqs assert len(trainer.eval_dataset["input_ids"]) == 2 # w/ this dataset, we end up with 6 seqs - def test_train_with_chat_template_kwargs(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_chat_template_kwargs(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, report_to="none") - tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + tokenizer = AutoTokenizer.from_pretrained(model_id) # The following template is a simplified version of the Qwen chat template, where an additional argument # `role_capital` is used to control the capitalization of roles. tokenizer.chat_template = '{%- if messages[0]["role"] == "system" -%} {{ "<|im_start|>" + ("SYSTEM" if role_capital else "system") + "\\n" + messages[0]["content"] + "<|im_end|>\\n" }}{%- else -%} {{ "<|im_start|>" + ("SYSTEM" if role_capital else "system") + "\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n" }}{%- endif -%}{%- for message in messages -%} {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) -%} {{ "<|im_start|>" + (message.role.upper() if role_capital else message.role) + "\\n" + message.content + "<|im_end|>\\n" }} {%- elif message.role == "assistant" -%} {{ "<|im_start|>" + ("ASSISTANT" if role_capital else "assistant") }} {%- if message.content -%} {{ "\\n" + message.content }} {%- endif -%} {{ "<|im_end|>\\n" }} {%- elif message.role == "tool" -%} {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") -%} {{ "<|im_start|>" + ("USER" if role_capital else "user") }} {%- endif -%} {{ "\\n\\n" + message.content + "\\n" }} {%- if loop.last or (messages[loop.index0 + 1].role != "tool") -%} {{ "<|im_end|>\\n" }} {%- endif -%} {%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%} {{ "<|im_start|>" + ("ASSISTANT" if role_capital else "assistant") + "\\n" }}{%- endif -%}' dataset.add_column("chat_template_kwargs", [{"role_capital": bool(i % 2)} for i in range(len(dataset))]) - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -1215,15 +1303,19 @@ def test_train_with_set_chat_template_from_path(self): original_template_content = f.read() assert template_content == original_template_content, "Chat template content does not match the original" - def test_train_toolcall_data(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_toolcall_data(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/toolcall", split="train") # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, report_to="none") - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -1239,14 +1331,20 @@ def test_train_toolcall_data(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_train_with_eval(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_eval(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, eval_strategy="steps", eval_steps=3, report_to="none") trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["test"], @@ -1258,14 +1356,20 @@ def test_train_with_eval(self): # Check that the eval loss is not None assert trainer.state.log_history[0]["eval_loss"] is not None - def test_train_with_multiple_eval_dataset(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_multiple_eval_dataset(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, eval_strategy="steps", eval_steps=3, report_to="none") trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, args=training_args, train_dataset=dataset["train"], eval_dataset={"data1": dataset["test"], "data2": dataset["test"]}, @@ -1277,15 +1381,19 @@ def test_train_with_multiple_eval_dataset(self): assert trainer.state.log_history[-3]["eval_data1_loss"] is not None assert trainer.state.log_history[-2]["eval_data2_loss"] is not None - def test_train_with_gradient_checkpointing(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_train_with_gradient_checkpointing(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, gradient_checkpointing=True, report_to="none") - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", args=training_args, train_dataset=dataset - ) + trainer = SFTTrainer(model=model_id, args=training_args, train_dataset=dataset) # Save the initial parameters to compare them later previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} @@ -1301,27 +1409,36 @@ def test_train_with_gradient_checkpointing(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - def test_tag_added(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_tag_added(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") # Initialize the trainer - trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - train_dataset=dataset, - ) + trainer = SFTTrainer(model=model_id, train_dataset=dataset) for tag in ["sft", "trl"]: assert tag in trainer.model.model_tags + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_tag_added_peft(self): + def test_tag_added_peft(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") # Initialize the trainer trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, train_dataset=dataset, peft_config=LoraConfig(), ) @@ -1549,14 +1666,20 @@ def test_train_vlm_text_only_data(self): else: assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12), f"Param {n} is not updated" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_prompt_tuning(self): + def test_prompt_tuning(self, model_id): """Test that SFT works with Prompt Tuning.""" dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") training_args = SFTConfig(output_dir=self.tmp_dir, report_to="none") trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, args=training_args, train_dataset=dataset, peft_config=PromptEncoderConfig(task_type=TaskType.CAUSAL_LM, num_virtual_tokens=8), @@ -1581,9 +1704,15 @@ def test_prompt_tuning(self): else: raise ValueError(f"Unexpected parameter {n} in model: {trainer.model}") + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft @require_bitsandbytes - def test_peft_model_with_quantization(self): + def test_peft_model_with_quantization(self, model_id): """SFTTrainer should not freeze layers of existing PeftModel. This test simulates a realistic QLoRA scenario where a quantized base model is first converted to a PeftModel, @@ -1591,7 +1720,6 @@ def test_peft_model_with_quantization(self): including the LoRA adapters, making training impossible. """ # Get the base model - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" model = AutoModelForCausalLM.from_pretrained(model_id) # Simulate a realistic QLoRA setup by mocking quantization attributes @@ -1675,10 +1803,16 @@ def test_peft_model_with_quantization(self): "All original LoRA parameters should remain trainable after SFTTrainer initialization" ) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft - def test_prompt_tuning_peft_model(self): + def test_prompt_tuning_peft_model(self, model_id): """Test that SFT works with Prompt Tuning and a pre-converted PeftModel""" - model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + model = AutoModelForCausalLM.from_pretrained(model_id) model = get_peft_model(model, PromptEncoderConfig(task_type=TaskType.CAUSAL_LM, num_virtual_tokens=8)) dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") From bc1211006a79708cb9b8b2764d96448ca0c2a28a Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Oct 2025 16:11:24 +0200 Subject: [PATCH 02/18] Use fixture instead --- tests/test_sft_trainer.py | 171 ++------------------------------------ 1 file changed, 9 insertions(+), 162 deletions(-) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index b4d4cd9866a..5c89b1ea745 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -256,6 +256,15 @@ def test_multiple_examples(self): class TestSFTTrainer(TrlTestCase): + @pytest.fixture( + scope="class", + params=[ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def model_id(self, request): + return request.param + @pytest.mark.parametrize( "model_id", [ @@ -311,12 +320,6 @@ def test_train_gpt_oss(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) def test_train_model(self, model_id): # Instantiate the model model = AutoModelForCausalLM.from_pretrained(model_id) @@ -342,12 +345,6 @@ def test_train_model(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) def test_train_dft_loss(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") @@ -412,12 +409,6 @@ def test_train_moe_model_with_aux_loss(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) def test_train_with_formatting_func(self, model_id): # Dummy formatting function def formatting_prompts_func(example): @@ -450,12 +441,6 @@ def formatting_prompts_func(example): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) def test_train_model_dtype(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -489,12 +474,6 @@ def test_train_model_dtype(self, model_id): assert new_param.dtype == torch.float16 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) @require_peft def test_train_dense_with_peft_config_lora(self, model_id): # Get the base model parameter names @@ -531,12 +510,6 @@ def test_train_dense_with_peft_config_lora(self, model_id): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) @pytest.mark.parametrize( "peft_type", [ @@ -634,12 +607,6 @@ def test_train_moe_with_peft_config(self): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) @require_peft def test_train_peft_model(self, model_id): # Get the base model @@ -676,12 +643,6 @@ def test_train_peft_model(self, model_id): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) @require_peft def test_train_dense_with_peft_config_and_gradient_checkpointing(self, model_id): # Get the base model parameter names @@ -755,12 +716,6 @@ def test_train_moe_with_peft_config_and_gradient_checkpointing(self): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) @require_peft def test_train_with_peft_model_and_gradient_checkpointing(self, model_id): # Get the base model parameter names @@ -796,12 +751,6 @@ def test_train_with_peft_model_and_gradient_checkpointing(self, model_id): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) @require_liger_kernel def test_train_with_liger(self, model_id): # Get the dataset @@ -825,12 +774,6 @@ def test_train_with_liger(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) def test_train_with_non_chatml_conversational_data(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "conversational_language_modeling", split="train") @@ -859,12 +802,6 @@ def rename_fields(example: list[dict]): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) def test_train_with_pretokenized_data(self, model_id): # Get the dataset tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -894,12 +831,6 @@ def tokenize_example(example): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) def test_train_with_iterable_dataset(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train", streaming=True) @@ -922,12 +853,6 @@ def test_train_with_iterable_dataset(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) @require_flash_attn def test_train_padding_free(self, model_id): # Get the dataset @@ -957,12 +882,6 @@ def test_train_padding_free(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) @pytest.mark.parametrize("packing_strategy", ["bfd", "wrapped"]) @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning) @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning) @@ -990,12 +909,6 @@ def test_train_packing(self, packing_strategy, model_id): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning) @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning) def test_eval_packing(self, model_id): @@ -1030,12 +943,6 @@ def test_eval_packing(self, model_id): assert len(trainer.train_dataset["input_ids"]) == 3 # w/ this dataset, we end up with 46 seqs assert len(trainer.eval_dataset["input_ids"]) == 1 # w/ this dataset, we end up with 6 seqs - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning) @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning) def test_only_train_packing(self, model_id): @@ -1071,12 +978,6 @@ def test_only_train_packing(self, model_id): assert len(trainer.train_dataset["input_ids"]) == 3 # w/ this dataset, we end up with 46 seqs assert len(trainer.eval_dataset["input_ids"]) == 2 # w/ this dataset, we end up with 6 seqs - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) def test_train_with_chat_template_kwargs(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -1303,12 +1204,6 @@ def test_train_with_set_chat_template_from_path(self): original_template_content = f.read() assert template_content == original_template_content, "Chat template content does not match the original" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) def test_train_toolcall_data(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/toolcall", split="train") @@ -1331,12 +1226,6 @@ def test_train_toolcall_data(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) def test_train_with_eval(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") @@ -1356,12 +1245,6 @@ def test_train_with_eval(self, model_id): # Check that the eval loss is not None assert trainer.state.log_history[0]["eval_loss"] is not None - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) def test_train_with_multiple_eval_dataset(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") @@ -1381,12 +1264,6 @@ def test_train_with_multiple_eval_dataset(self, model_id): assert trainer.state.log_history[-3]["eval_data1_loss"] is not None assert trainer.state.log_history[-2]["eval_data2_loss"] is not None - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) def test_train_with_gradient_checkpointing(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -1409,12 +1286,6 @@ def test_train_with_gradient_checkpointing(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) def test_tag_added(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -1425,12 +1296,6 @@ def test_tag_added(self, model_id): for tag in ["sft", "trl"]: assert tag in trainer.model.model_tags - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) @require_peft def test_tag_added_peft(self, model_id): # Get the dataset @@ -1666,12 +1531,6 @@ def test_train_vlm_text_only_data(self): else: assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12), f"Param {n} is not updated" - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) @require_peft def test_prompt_tuning(self, model_id): """Test that SFT works with Prompt Tuning.""" @@ -1704,12 +1563,6 @@ def test_prompt_tuning(self, model_id): else: raise ValueError(f"Unexpected parameter {n} in model: {trainer.model}") - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) @require_peft @require_bitsandbytes def test_peft_model_with_quantization(self, model_id): @@ -1803,12 +1656,6 @@ def test_peft_model_with_quantization(self, model_id): "All original LoRA parameters should remain trainable after SFTTrainer initialization" ) - @pytest.mark.parametrize( - "model_id", - [ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) @require_peft def test_prompt_tuning_peft_model(self, model_id): """Test that SFT works with Prompt Tuning and a pre-converted PeftModel""" From ba30f5a700f117221246f70d541dada0fa8ab4c7 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Oct 2025 17:19:20 +0200 Subject: [PATCH 03/18] Use model_id fixture in GRPO tests --- tests/test_grpo_trainer.py | 141 ++++++++++++++++++++----------------- 1 file changed, 75 insertions(+), 66 deletions(-) diff --git a/tests/test_grpo_trainer.py b/tests/test_grpo_trainer.py index 50efa9e45cd..689410d13f9 100644 --- a/tests/test_grpo_trainer.py +++ b/tests/test_grpo_trainer.py @@ -113,17 +113,26 @@ def test_compute_entropy_all_masked(self): class TestGRPOTrainer(TrlTestCase): - def test_init_minimal(self): + @pytest.fixture( + scope="class", + params=[ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def model_id(self, request): + return request.param + + def test_init_minimal(self, model_id): # Test that GRPOTrainer can be instantiated with only model, reward_model and train_dataset dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", train_dataset=dataset, ) @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) - def test_training(self, config_name): + def test_training(self, config_name, model_id): dataset = load_dataset("trl-internal-testing/zen", config_name, split="train") training_args = GRPOConfig( @@ -135,7 +144,7 @@ def test_training(self, config_name): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -153,7 +162,7 @@ def test_training(self, config_name): assert not torch.equal(param, new_param), f"Parameter {n} has not changed." @pytest.mark.parametrize("loss_type", ["bnpo", "dr_grpo", "dapo"]) - def test_training_loss_types(self, loss_type): + def test_training_loss_types(self, loss_type, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -167,7 +176,7 @@ def test_training_loss_types(self, loss_type): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -184,7 +193,7 @@ def test_training_loss_types(self, loss_type): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_eval(self): + def test_training_with_eval(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only") training_args = GRPOConfig( @@ -198,7 +207,7 @@ def test_training_with_eval(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset["train"], @@ -207,7 +216,7 @@ def test_training_with_eval(self): trainer.train() - def test_training_multiple_iterations(self): + def test_training_multiple_iterations(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -220,7 +229,7 @@ def test_training_multiple_iterations(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -238,8 +247,8 @@ def test_training_multiple_iterations(self): assert not torch.equal(param, new_param), f"Parameter {n} has not changed." @require_peft - def test_training_peft(self): - model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + def test_training_peft(self, model_id): + model = AutoModelForCausalLM.from_pretrained(model_id) base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()] dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -274,12 +283,12 @@ def test_training_peft(self): assert not torch.allclose(param, new_param), f"Parameter {n} has not changed." @require_peft - def test_training_peft_with_gradient_checkpointing(self): + def test_training_peft_with_gradient_checkpointing(self, model_id): """Test that training works with PEFT and gradient checkpointing enabled.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") model = AutoModelForCausalLM.from_pretrained( - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model_id, dtype=torch.float32, # Use float32 for testing to avoid precision issues ) @@ -322,7 +331,7 @@ def test_training_peft_with_gradient_checkpointing(self): else: # Base model parameters should not change assert torch.equal(param, new_param), f"Base parameter {n} has changed." - def test_training_different_reward_model(self): + def test_training_different_reward_model(self, model_id): # Use a reward model different from the model: different chat template, tokenization, etc. dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train") reward_model_id = "trl-internal-testing/tiny-LlamaForSequenceClassification-3.2" @@ -343,7 +352,7 @@ def test_training_different_reward_model(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_model, args=training_args, train_dataset=dataset, @@ -361,7 +370,7 @@ def test_training_different_reward_model(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_reward_func_standard(self): + def test_training_reward_func_standard(self, model_id): # Test if trainer can handle reward function with standard format dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -378,7 +387,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, @@ -395,7 +404,7 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_reward_func_conversational(self): + def test_training_reward_func_conversational(self, model_id): # Test if trainer can handle reward function with conversational format dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train") @@ -413,7 +422,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, @@ -430,7 +439,7 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_reward_funcs(self): + def test_training_multiple_reward_funcs(self, model_id): # Test that GRPOTrainer can be instantiated with multiple reward functions dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -451,7 +460,7 @@ def reward_func2(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[reward_func1, reward_func2], args=training_args, train_dataset=dataset, @@ -468,7 +477,7 @@ def reward_func2(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_reward_funcs_with_None_output(self): + def test_training_multiple_reward_funcs_with_None_output(self, model_id): """Test that a valid math reward function is processed correctly while the code reward function returns None.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -490,7 +499,7 @@ def non_applicable_reward_func(completions, **kwargs): ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[ applicable_reward_func, non_applicable_reward_func, @@ -512,7 +521,7 @@ def non_applicable_reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_reward_funcs_with_weights(self): + def test_training_multiple_reward_funcs_with_weights(self, model_id): """Test that GRPOTrainer can handle multiple reward functions with weights.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -534,7 +543,7 @@ def reward_func2(completions, **kwargs): reward_weights=[0.7, 0.3], # weight of reward_func1 and reward_func2 respectively ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[reward_func1, reward_func2], args=training_args, train_dataset=dataset, @@ -556,7 +565,7 @@ def reward_func2(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_mixed_reward_funcs(self): + def test_training_multiple_mixed_reward_funcs(self, model_id): # Test if the trainer can handle a mix of reward functions and reward models dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -573,7 +582,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[reward_func, "trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5"], args=training_args, train_dataset=dataset, @@ -590,7 +599,7 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_reward_func_additional_column(self): + def test_training_reward_func_additional_column(self, model_id): # Test if trainer can handle reward function that rely on additional columns in the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -611,7 +620,7 @@ def reward_func(completions, some_values, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, @@ -628,7 +637,7 @@ def reward_func(completions, some_values, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_sync_ref_model(self): + def test_training_with_sync_ref_model(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -642,7 +651,7 @@ def test_training_with_sync_ref_model(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -659,7 +668,7 @@ def test_training_with_sync_ref_model(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_beta_non_zero(self): + def test_training_beta_non_zero(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( output_dir=self.tmp_dir, @@ -671,7 +680,7 @@ def test_training_beta_non_zero(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -688,7 +697,7 @@ def test_training_beta_non_zero(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_entropy_filter(self): + def test_training_with_entropy_filter(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( output_dir=self.tmp_dir, @@ -700,7 +709,7 @@ def test_training_with_entropy_filter(self): top_entropy_quantile=0.2, ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -832,7 +841,7 @@ def test_training_vllm_importance_sampling_correction(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_additional_generation_kwargs(self): + def test_training_with_additional_generation_kwargs(self, model_id): """Test that training works with additional generation kwargs.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -850,7 +859,7 @@ def test_training_with_additional_generation_kwargs(self): ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -906,7 +915,7 @@ def test_training_vllm_with_additional_generation_kwargs(self): assert not torch.equal(param, new_param), f"Parameter {n} has not changed." @pytest.mark.parametrize("scale_rewards", [False, "group", "batch", True, "none"]) - def test_training_scale_rewards(self, scale_rewards): + def test_training_scale_rewards(self, scale_rewards, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -919,7 +928,7 @@ def test_training_scale_rewards(self, scale_rewards): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -937,7 +946,7 @@ def test_training_scale_rewards(self, scale_rewards): assert not torch.equal(param, new_param), f"Parameter {n} has not changed." @patch("transformers.generation.utils.GenerationMixin.generate") - def test_training_with_mask_truncated_completions(self, mock_generate): + def test_training_with_mask_truncated_completions(self, mock_generate, model_id): """Test that training works with mask_truncated_completions=True parameter.""" # We mock the generate method because the model's random weights make it extremely unlikely to produce a @@ -969,7 +978,7 @@ def fake_generate(input_ids, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -986,7 +995,7 @@ def fake_generate(input_ids, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_mask_truncated_completions_all_masked(self): + def test_training_with_mask_truncated_completions_all_masked(self, model_id): """ Test that when all generated completions are truncated (i.e., none contain an EOS token), and mask_truncated_completions=True, the model receives no effective learning signal and therefore does not update @@ -1007,7 +1016,7 @@ def test_training_with_mask_truncated_completions_all_masked(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1024,7 +1033,7 @@ def test_training_with_mask_truncated_completions_all_masked(self): new_param = trainer.model.get_parameter(n) assert torch.equal(param, new_param), f"Parameter {n} has changed." - def test_warning_raised_all_rewards_none(self, caplog): + def test_warning_raised_all_rewards_none(self, model_id, caplog): """Test that a proper warning is raised when all rewards are None.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1041,7 +1050,7 @@ def always_none_reward_func(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=always_none_reward_func, args=training_args, train_dataset=dataset, @@ -1053,7 +1062,7 @@ def always_none_reward_func(completions, **kwargs): expected_warning = "All reward functions returned None for the following kwargs:" assert expected_warning in caplog.text - def test_training_num_generations_larger_than_batch_size(self): + def test_training_num_generations_larger_than_batch_size(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -1066,7 +1075,7 @@ def test_training_num_generations_larger_than_batch_size(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1083,7 +1092,7 @@ def test_training_num_generations_larger_than_batch_size(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_delta_clipping(self): + def test_training_delta_clipping(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -1096,7 +1105,7 @@ def test_training_delta_clipping(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1113,7 +1122,7 @@ def test_training_delta_clipping(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_dataloader_workers(self): + def test_training_multiple_dataloader_workers(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -1126,7 +1135,7 @@ def test_training_multiple_dataloader_workers(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1143,7 +1152,7 @@ def test_training_multiple_dataloader_workers(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_generation_kwargs(self): + def test_training_with_generation_kwargs(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -1156,7 +1165,7 @@ def test_training_with_generation_kwargs(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1173,7 +1182,7 @@ def test_training_with_generation_kwargs(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_reward_func_accessing_trainer_state(self): + def test_training_with_reward_func_accessing_trainer_state(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") def reward_func(completions, **kwargs): @@ -1191,14 +1200,14 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, ) trainer.train() - def test_prepare_input_called_with_correct_data(self): + def test_prepare_input_called_with_correct_data(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( output_dir=self.tmp_dir, @@ -1214,7 +1223,7 @@ def test_prepare_input_called_with_correct_data(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1552,7 +1561,7 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_sequence_importance_sampling(self): + def test_training_sequence_importance_sampling(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -1566,7 +1575,7 @@ def test_training_sequence_importance_sampling(self): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1583,7 +1592,7 @@ def test_training_sequence_importance_sampling(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_mismatched_reward_processing_classes_length(self): + def test_mismatched_reward_processing_classes_length(self, model_id): """Test that mismatched length between reward_funcs and reward_processing_classes raises error.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1602,14 +1611,14 @@ def test_mismatched_reward_processing_classes_length(self): with pytest.raises(ValueError, match="must match"): GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_models, reward_processing_classes=single_processing_class, # only one, but need two args=training_args, train_dataset=dataset, ) - def test_correct_reward_processing_classes_list(self): + def test_correct_reward_processing_classes_list(self, model_id): """Test that correct list of reward_processing_classes works properly.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1631,7 +1640,7 @@ def test_correct_reward_processing_classes_list(self): correct_processing_classes = [processing_class1, processing_class2] trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_models, reward_processing_classes=correct_processing_classes, args=training_args, @@ -1640,7 +1649,7 @@ def test_correct_reward_processing_classes_list(self): assert len(trainer.reward_processing_classes) == len(reward_models) - def test_single_reward_model_with_single_processing_class(self): + def test_single_reward_model_with_single_processing_class(self, model_id): """Test that single reward model with single processing class works.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1655,7 +1664,7 @@ def test_single_reward_model_with_single_processing_class(self): training_args = GRPOConfig(output_dir=self.tmp_dir, report_to="none") trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_model, reward_processing_classes=single_processing_class, # single object for single reward model args=training_args, From 919e8f0fc1afc5f0ba4b9b127c2fd4e5b6f16c7c Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Oct 2025 17:19:57 +0200 Subject: [PATCH 04/18] Use model_id fixture in DPO tests --- tests/test_dpo_trainer.py | 47 +++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py index fa7038167d4..17ac7b224b8 100644 --- a/tests/test_dpo_trainer.py +++ b/tests/test_dpo_trainer.py @@ -153,6 +153,15 @@ def test_tokenize_row_with_truncation_and_special_tokens(self): class TestDPOTrainer(TrlTestCase): + @pytest.fixture( + scope="class", + params=[ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def model_id(self, request): + return request.param + def setup_method(self): self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" self.model = AutoModelForCausalLM.from_pretrained(self.model_id) @@ -160,8 +169,7 @@ def setup_method(self): self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer.pad_token = self.tokenizer.eos_token - def test_train(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") tokenizer = AutoTokenizer.from_pretrained(model_id) training_args = DPOConfig( @@ -207,8 +215,7 @@ def test_train(self): "apo_down", ], ) - def test_train_loss_types(self, loss_type): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train_loss_types(self, loss_type, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -301,12 +308,11 @@ def test_dpo_trainer_with_weighting(self): if param.sum() != 0: # ignore 0 biases assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12) - def test_train_with_multiple_loss_types(self): + def test_train_with_multiple_loss_types(self, model_id): """ Tests multi-loss combinations, loss type inference, and weight configuration. MPO combines DPO (sigmoid), BCO (bco_pair), and SFT (sft) losses. """ - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -637,12 +643,11 @@ def test_dpo_lora_save(self): @require_peft @require_torch_gpu_if_bnb_not_multi_backend_enabled - def test_dpo_lora_bf16_autocast_llama(self): + def test_dpo_lora_bf16_autocast_llama(self, model_id): # Note this test only works on compute capability > 7 GPU devices from peft import LoraConfig from transformers import BitsAndBytesConfig - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" tokenizer = AutoTokenizer.from_pretrained(model_id) lora_config = LoraConfig( @@ -778,10 +783,9 @@ def test_dpo_lora_bf16_autocast(self, loss_type, pre_compute, gen_during_eval): trainer.save_model() @require_peft - def test_dpo_lora_tags(self): + def test_dpo_lora_tags(self, model_id): from peft import LoraConfig - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" tokenizer = AutoTokenizer.from_pretrained(model_id) lora_config = LoraConfig( @@ -824,8 +828,7 @@ def test_dpo_lora_tags(self): assert tag in trainer.model.model_tags @require_peft - def test_dpo_tags(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_dpo_tags(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) # lora model @@ -995,8 +998,7 @@ def test_dpo_trainer_dtype(self): train_dataset=dummy_dataset["train"], ) - def test_dpo_loss_alpha_div_f(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_dpo_loss_alpha_div_f(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) # lora model @@ -1036,8 +1038,7 @@ def test_dpo_loss_alpha_div_f(self): ) assert torch.isfinite(losses).cpu().numpy().all() - def test_dpo_loss_js_div_f(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_dpo_loss_js_div_f(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) # lora model @@ -1230,10 +1231,10 @@ def test_padding_free(self): if param.sum() != 0: # ignore 0 biases assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12) - def test_compute_metrics(self): - model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + def test_compute_metrics(self, model_id): + model = AutoModelForCausalLM.from_pretrained(model_id) + ref_model = AutoModelForCausalLM.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer.pad_token = tokenizer.eos_token dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_preference") @@ -1265,8 +1266,7 @@ def dummy_compute_metrics(*args, **kwargs): assert trainer.state.log_history[-2]["eval_test"] == 0.0 - def test_train_with_length_desensitization(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train_with_length_desensitization(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -1379,8 +1379,7 @@ def test_dpo_trainer_with_liger(self, beta, loss_type): assert output is not None assert "loss" not in output.keys() - def test_train_with_iterable_dataset(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train_with_iterable_dataset(self, model_id): dataset = load_dataset( "trl-internal-testing/zen", "standard_preference", From f599123873b767436f0a8899a708d143604fe24b Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Oct 2025 17:24:28 +0200 Subject: [PATCH 05/18] Use model_id fixture in ORPO tests --- tests/test_orpo_trainer.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tests/test_orpo_trainer.py b/tests/test_orpo_trainer.py index f882cf756f8..48159662be5 100644 --- a/tests/test_orpo_trainer.py +++ b/tests/test_orpo_trainer.py @@ -23,6 +23,15 @@ class TestORPOTrainer(TrlTestCase): + @pytest.fixture( + scope="class", + params=[ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def model_id(self, request): + return request.param + def setup_method(self): self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" self.model = AutoModelForCausalLM.from_pretrained(self.model_id) @@ -144,9 +153,9 @@ def test_orpo_trainer_with_lora(self, config_name): if param.sum() != 0: # ignore 0 biases assert not torch.equal(param, new_param) - def test_compute_metrics(self): - model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + def test_compute_metrics(self, model_id): + model = AutoModelForCausalLM.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer.pad_token = tokenizer.eos_token dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_preference") From c922f28a748a3551784cd4262f591e7e82f74d92 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Oct 2025 17:28:10 +0200 Subject: [PATCH 06/18] Use model_id fixture in KTO tests --- tests/test_kto_trainer.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/test_kto_trainer.py b/tests/test_kto_trainer.py index e551c0073cd..f58a02ef8d4 100644 --- a/tests/test_kto_trainer.py +++ b/tests/test_kto_trainer.py @@ -25,6 +25,15 @@ class TestKTOTrainer(TrlTestCase): + @pytest.fixture( + scope="class", + params=[ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def model_id(self, request): + return request.param + def setup_method(self): self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" self.model = AutoModelForCausalLM.from_pretrained(self.model_id) @@ -390,10 +399,10 @@ def test_kto_trainer_with_liger(self): if param.sum() != 0: assert not torch.equal(param, new_param) - def test_compute_metrics(self): - model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + def test_compute_metrics(self, model_id): + model = AutoModelForCausalLM.from_pretrained(model_id) + ref_model = AutoModelForCausalLM.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer.pad_token = tokenizer.eos_token dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference") From 2e50260aab3505c339b2220f7f2f3f1ad9e198d7 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Oct 2025 17:30:22 +0200 Subject: [PATCH 07/18] Use model_id fixture in activation_offloading tests --- tests/test_activation_offloading.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tests/test_activation_offloading.py b/tests/test_activation_offloading.py index 12364c23d94..15cef523de8 100644 --- a/tests/test_activation_offloading.py +++ b/tests/test_activation_offloading.py @@ -11,8 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - +import pytest import torch from torch import nn from transformers import AutoModelForCausalLM @@ -29,11 +28,19 @@ class TestActivationOffloading(TrlTestCase): + @pytest.fixture( + scope="class", + params=[ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def model_id(self, request): + return request.param + @require_torch_accelerator @require_peft - def test_offloading_with_peft_models(self) -> None: + def test_offloading_with_peft_models(self, model_id) -> None: """Test that activation offloading works with PEFT models.""" - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device) peft_config = LoraConfig( lora_alpha=16, @@ -77,8 +84,7 @@ def test_offloading_with_peft_models(self) -> None: ) @require_torch_accelerator - def test_noop_manager_with_offloading(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_noop_manager_with_offloading(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device) inp = torch.randint(0, 100, (2, 10), device=torch_device) @@ -124,9 +130,8 @@ def test_min_offload_size(self): # that the logic handles both offloaded and non-offloaded tensors @require_torch_accelerator - def test_real_hf_model(self): + def test_real_hf_model(self, model_id): """Test with an actual HuggingFace model""" - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device) # Create small input From 29d25f91be163fc087711901ab332f9b96f45fb1 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Oct 2025 17:34:08 +0200 Subject: [PATCH 08/18] Refactor model_id in callbacks tests --- tests/test_callbacks.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tests/test_callbacks.py b/tests/test_callbacks.py index 811bcf79f37..986f95883a2 100644 --- a/tests/test_callbacks.py +++ b/tests/test_callbacks.py @@ -66,9 +66,10 @@ def __init__(self, model, ref_model, args, train_dataset, eval_dataset, processi class TestWinRateCallback(TrlTestCase): def setup_method(self): - self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - self.ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + self.model = AutoModelForCausalLM.from_pretrained(model_id) + self.ref_model = AutoModelForCausalLM.from_pretrained(model_id) + self.tokenizer = AutoTokenizer.from_pretrained(model_id) self.tokenizer.pad_token = self.tokenizer.eos_token dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only") dataset["train"] = dataset["train"].select(range(8)) @@ -224,8 +225,9 @@ def test_lora(self): class TestLogCompletionsCallback(TrlTestCase): def setup_method(self): - self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + self.model = AutoModelForCausalLM.from_pretrained(model_id) + self.tokenizer = AutoTokenizer.from_pretrained(model_id) self.tokenizer.pad_token = self.tokenizer.eos_token dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only") dataset["train"] = dataset["train"].select(range(8)) @@ -318,8 +320,9 @@ def test_basic_comet(self): @require_mergekit class TestMergeModelCallback(TrlTestCase): def setup_method(self): - self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + self.model = AutoModelForCausalLM.from_pretrained(model_id) + self.tokenizer = AutoTokenizer.from_pretrained(model_id) self.dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") def test_callback(self): @@ -374,8 +377,9 @@ def test_every_checkpoint(self): class TestBEMACallback(TrlTestCase): def setup_method(self): - self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + self.model = AutoModelForCausalLM.from_pretrained(model_id) + self.tokenizer = AutoTokenizer.from_pretrained(model_id) self.tokenizer.pad_token = self.tokenizer.eos_token dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") From 6c3f5225be06adf5a0a08247100370802adaf75c Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Oct 2025 17:37:12 +0200 Subject: [PATCH 09/18] Refactor model_id in dataset_formatting tests --- tests/test_dataset_formatting.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_dataset_formatting.py b/tests/test_dataset_formatting.py index a9817d8455f..33dfe0e3e01 100644 --- a/tests/test_dataset_formatting.py +++ b/tests/test_dataset_formatting.py @@ -121,8 +121,9 @@ def test_get_formatting_func_from_dataset_with_unknown_format(self): @pytest.mark.filterwarnings("ignore::FutureWarning") class TestSetupChatFormat(TrlTestCase): def setup_method(self): - self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") - self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + self.tokenizer = AutoTokenizer.from_pretrained(model_id) + self.model = AutoModelForCausalLM.from_pretrained(model_id) # remove built-in chat_template to simulate a model having no chat_template self.tokenizer.chat_template = None From 88dffd21f0045791796e3c9db05570b01176c5b7 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Oct 2025 17:39:40 +0200 Subject: [PATCH 10/18] Use model_id fixture in Online DPO tests --- tests/test_online_dpo_trainer.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/test_online_dpo_trainer.py b/tests/test_online_dpo_trainer.py index b5c9a1d9ee1..ec190d02afc 100644 --- a/tests/test_online_dpo_trainer.py +++ b/tests/test_online_dpo_trainer.py @@ -42,6 +42,15 @@ class TestOnlineDPOTrainer(TrlTestCase): + @pytest.fixture( + scope="class", + params=[ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def model_id(self, request): + return request.param + def setup_method(self): self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" self.model = AutoModelForCausalLM.from_pretrained(self.model_id) @@ -80,7 +89,7 @@ def test_training(self, config_name): # Check if training loss is available assert "train_loss" in trainer.state.log_history[-1] - def test_training_model_str(self): + def test_training_model_str(self, model_id): training_args = OnlineDPOConfig( output_dir=self.tmp_dir, per_device_train_batch_size=2, @@ -92,7 +101,7 @@ def test_training_model_str(self): dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only") trainer = OnlineDPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=self.reward_model, args=training_args, train_dataset=dummy_dataset["train"], From e08bdb27b339776f027147d03fb1516ca7972759 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Oct 2025 18:00:04 +0200 Subject: [PATCH 11/18] Use model_id fixture in RLOO tests --- tests/test_rloo_trainer.py | 129 ++++++++++++++++++++----------------- 1 file changed, 69 insertions(+), 60 deletions(-) diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py index 476fcfb0e72..ea874d77959 100644 --- a/tests/test_rloo_trainer.py +++ b/tests/test_rloo_trainer.py @@ -35,17 +35,26 @@ class TestRLOOTrainer(TrlTestCase): - def test_init_minimal(self): + @pytest.fixture( + scope="class", + params=[ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def model_id(self, request): + return request.param + + def test_init_minimal(self, model_id): # Test that RLOOTrainer can be instantiated with only model, reward_model and train_dataset dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", train_dataset=dataset, ) @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) - def test_training(self, config_name): + def test_training(self, config_name, model_id): dataset = load_dataset("trl-internal-testing/zen", config_name, split="train") training_args = RLOOConfig( @@ -57,7 +66,7 @@ def test_training(self, config_name): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -74,7 +83,7 @@ def test_training(self, config_name): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_eval(self): + def test_training_with_eval(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only") training_args = RLOOConfig( @@ -88,7 +97,7 @@ def test_training_with_eval(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset["train"], @@ -97,7 +106,7 @@ def test_training_with_eval(self): trainer.train() - def test_training_multiple_iterations(self): + def test_training_multiple_iterations(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( @@ -110,7 +119,7 @@ def test_training_multiple_iterations(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -128,8 +137,8 @@ def test_training_multiple_iterations(self): assert not torch.equal(param, new_param), f"Parameter {n} has not changed." @require_peft - def test_training_peft(self): - model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5") + def test_training_peft(self, model_id): + model = AutoModelForCausalLM.from_pretrained(model_id) base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()] dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -164,12 +173,12 @@ def test_training_peft(self): assert not torch.allclose(param, new_param), f"Parameter {n} has not changed." @require_peft - def test_training_peft_with_gradient_checkpointing(self): + def test_training_peft_with_gradient_checkpointing(self, model_id): """Test that training works with PEFT and gradient checkpointing enabled.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") model = AutoModelForCausalLM.from_pretrained( - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model_id, dtype=torch.float32, # Use float32 for testing to avoid precision issues ) @@ -212,7 +221,7 @@ def test_training_peft_with_gradient_checkpointing(self): else: # Base model parameters should not change assert torch.equal(param, new_param), f"Base parameter {n} has changed." - def test_training_different_reward_model(self): + def test_training_different_reward_model(self, model_id): # Use a reward model different from the model: different chat template, tokenization, etc. dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train") reward_model_id = "trl-internal-testing/tiny-LlamaForSequenceClassification-3.2" @@ -233,7 +242,7 @@ def test_training_different_reward_model(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_model, args=training_args, train_dataset=dataset, @@ -251,7 +260,7 @@ def test_training_different_reward_model(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_reward_func_standard(self): + def test_training_reward_func_standard(self, model_id): # Test if trainer can handle reward function with standard format dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -268,7 +277,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, @@ -285,7 +294,7 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_reward_func_conversational(self): + def test_training_reward_func_conversational(self, model_id): # Test if trainer can handle reward function with conversational format dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train") @@ -303,7 +312,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, @@ -320,7 +329,7 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_reward_funcs(self): + def test_training_multiple_reward_funcs(self, model_id): # Test that RLOOTrainer can be instantiated with multiple reward functions dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -341,7 +350,7 @@ def reward_func2(completions, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[reward_func1, reward_func2], args=training_args, train_dataset=dataset, @@ -358,7 +367,7 @@ def reward_func2(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_reward_funcs_with_None_output(self): + def test_training_multiple_reward_funcs_with_None_output(self, model_id): """Test that a valid math reward function is processed correctly while the code reward function returns None.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -380,7 +389,7 @@ def non_applicable_reward_func(completions, **kwargs): ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[ applicable_reward_func, non_applicable_reward_func, @@ -402,7 +411,7 @@ def non_applicable_reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_reward_funcs_with_weights(self): + def test_training_multiple_reward_funcs_with_weights(self, model_id): """Test that RLOOTrainer can handle multiple reward functions with weights.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -424,7 +433,7 @@ def reward_func2(completions, **kwargs): reward_weights=[0.7, 0.3], # weight of reward_func1 and reward_func2 respectively ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[reward_func1, reward_func2], args=training_args, train_dataset=dataset, @@ -446,7 +455,7 @@ def reward_func2(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_mixed_reward_funcs(self): + def test_training_multiple_mixed_reward_funcs(self, model_id): # Test if the trainer can handle a mix of reward functions and reward models dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -463,7 +472,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[reward_func, "trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5"], args=training_args, train_dataset=dataset, @@ -480,7 +489,7 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_reward_func_additional_column(self): + def test_training_reward_func_additional_column(self, model_id): # Test if trainer can handle reward function that rely on additional columns in the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -501,7 +510,7 @@ def reward_func(completions, some_values, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, @@ -518,7 +527,7 @@ def reward_func(completions, some_values, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_sync_ref_model(self): + def test_training_with_sync_ref_model(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( @@ -532,7 +541,7 @@ def test_training_with_sync_ref_model(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -549,7 +558,7 @@ def test_training_with_sync_ref_model(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_beta_zero(self): + def test_training_beta_zero(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( output_dir=self.tmp_dir, @@ -561,7 +570,7 @@ def test_training_beta_zero(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -658,7 +667,7 @@ def test_training_vllm_guided_decoding(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_additional_generation_kwargs(self): + def test_training_with_additional_generation_kwargs(self, model_id): """Test that training works with additional generation kwargs.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -676,7 +685,7 @@ def test_training_with_additional_generation_kwargs(self): ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -731,7 +740,7 @@ def test_training_vllm_with_additional_generation_kwargs(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_normalized_advantages(self): + def test_training_with_normalized_advantages(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( @@ -744,7 +753,7 @@ def test_training_with_normalized_advantages(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -761,7 +770,7 @@ def test_training_with_normalized_advantages(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_clipped_rewards(self): + def test_training_with_clipped_rewards(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( @@ -774,7 +783,7 @@ def test_training_with_clipped_rewards(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -792,7 +801,7 @@ def test_training_with_clipped_rewards(self): assert not torch.equal(param, new_param), f"Parameter {n} has not changed." @patch("transformers.generation.utils.GenerationMixin.generate") - def test_training_with_mask_truncated_completions(self, mock_generate): + def test_training_with_mask_truncated_completions(self, mock_generate, model_id): """Test that training works with mask_truncated_completions=True parameter.""" # We mock the generate method because the model's random weights make it extremely unlikely to produce a @@ -824,7 +833,7 @@ def fake_generate(input_ids, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -841,7 +850,7 @@ def fake_generate(input_ids, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_mask_truncated_completions_all_masked(self): + def test_training_with_mask_truncated_completions_all_masked(self, model_id): """ Test that when all generated completions are truncated (i.e., none contain an EOS token), and mask_truncated_completions=True, the model receives no effective learning signal and therefore does not update @@ -862,7 +871,7 @@ def test_training_with_mask_truncated_completions_all_masked(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -879,7 +888,7 @@ def test_training_with_mask_truncated_completions_all_masked(self): new_param = trainer.model.get_parameter(n) assert torch.equal(param, new_param), f"Parameter {n} has changed." - def test_warning_raised_all_rewards_none(self, caplog): + def test_warning_raised_all_rewards_none(self, model_id, caplog): """Test that a proper warning is raised when all rewards are None.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -896,7 +905,7 @@ def always_none_reward_func(completions, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=always_none_reward_func, args=training_args, train_dataset=dataset, @@ -908,7 +917,7 @@ def always_none_reward_func(completions, **kwargs): expected_warning = "All reward functions returned None for the following kwargs:" assert expected_warning in caplog.text - def test_training_num_generations_larger_than_batch_size(self): + def test_training_num_generations_larger_than_batch_size(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( @@ -921,7 +930,7 @@ def test_training_num_generations_larger_than_batch_size(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -938,7 +947,7 @@ def test_training_num_generations_larger_than_batch_size(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_multiple_dataloader_workers(self): + def test_training_multiple_dataloader_workers(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( @@ -951,7 +960,7 @@ def test_training_multiple_dataloader_workers(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -968,7 +977,7 @@ def test_training_multiple_dataloader_workers(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_generation_kwargs(self): + def test_training_with_generation_kwargs(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( @@ -981,7 +990,7 @@ def test_training_with_generation_kwargs(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -998,7 +1007,7 @@ def test_training_with_generation_kwargs(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_training_with_reward_func_accessing_trainer_state(self): + def test_training_with_reward_func_accessing_trainer_state(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") def reward_func(completions, **kwargs): @@ -1016,14 +1025,14 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, ) trainer.train() - def test_prepare_input_called_with_correct_data(self): + def test_prepare_input_called_with_correct_data(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( output_dir=self.tmp_dir, @@ -1039,7 +1048,7 @@ def test_prepare_input_called_with_correct_data(self): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, @@ -1291,7 +1300,7 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." - def test_mismatched_reward_processing_classes_length(self): + def test_mismatched_reward_processing_classes_length(self, model_id): """Test that mismatched length between reward_funcs and reward_processing_classes raises error.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1310,14 +1319,14 @@ def test_mismatched_reward_processing_classes_length(self): with pytest.raises(ValueError, match="must match"): RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_models, reward_processing_classes=single_processing_class, # only one, but need two args=training_args, train_dataset=dataset, ) - def test_correct_reward_processing_classes_list(self): + def test_correct_reward_processing_classes_list(self, model_id): """Test that correct list of reward_processing_classes works properly.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1339,7 +1348,7 @@ def test_correct_reward_processing_classes_list(self): correct_processing_classes = [processing_class1, processing_class2] trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_models, reward_processing_classes=correct_processing_classes, args=training_args, @@ -1348,7 +1357,7 @@ def test_correct_reward_processing_classes_list(self): assert len(trainer.reward_processing_classes) == len(reward_models) - def test_single_reward_model_with_single_processing_class(self): + def test_single_reward_model_with_single_processing_class(self, model_id): """Test that single reward model with single processing class works.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1363,7 +1372,7 @@ def test_single_reward_model_with_single_processing_class(self): training_args = RLOOConfig(output_dir=self.tmp_dir, report_to="none") trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=reward_model, reward_processing_classes=single_processing_class, # single object for single reward model args=training_args, From f11a09cfeb922551b41ac4fb3a7caf063805de6d Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Oct 2025 18:07:49 +0200 Subject: [PATCH 12/18] Use model_id fixture in BCO tests --- tests/experimental/test_bco_trainer.py | 42 ++++++++++++-------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/tests/experimental/test_bco_trainer.py b/tests/experimental/test_bco_trainer.py index 9e70fbac075..4738361ccd8 100644 --- a/tests/experimental/test_bco_trainer.py +++ b/tests/experimental/test_bco_trainer.py @@ -33,6 +33,15 @@ @pytest.mark.low_priority class TestBCOTrainer(TrlTestCase): + @pytest.fixture( + scope="class", + params=[ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def model_id(self, request): + return request.param + @pytest.mark.parametrize( "config_name", [ @@ -45,8 +54,7 @@ class TestBCOTrainer(TrlTestCase): ], ) @require_sklearn - def test_train(self, config_name): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train(self, config_name, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -81,8 +89,7 @@ def test_train(self, config_name): assert not torch.equal(param.cpu(), new_param.cpu()) @require_sklearn - def test_train_with_precompute(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train_with_precompute(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -118,8 +125,7 @@ def test_train_with_precompute(self): assert not torch.equal(param.cpu(), new_param.cpu()) @require_sklearn - def test_train_eval(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train_eval(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -146,8 +152,7 @@ def test_train_eval(self): trainer.train() @require_sklearn - def test_init_with_ref_model_is_model(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_init_with_ref_model_is_model(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -169,8 +174,7 @@ def test_init_with_ref_model_is_model(self): ) @require_sklearn - def test_tokenize_and_process_tokens(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_tokenize_and_process_tokens(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -225,8 +229,7 @@ def test_tokenize_and_process_tokens(self): assert processed_dataset["completion_labels"][0] == [-100, -100, -100, -100, 27261, 13, 151645] @require_sklearn - def test_train_without_providing_ref_model(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train_without_providing_ref_model(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -259,8 +262,7 @@ def test_train_without_providing_ref_model(self): assert not torch.equal(param.cpu(), new_param.cpu()) @require_sklearn - def test_train_udm(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train_udm(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -309,8 +311,7 @@ def embed_prompt(input_ids, attention_mask, model): @require_sklearn @require_peft - def test_train_without_providing_ref_model_with_lora(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_train_without_providing_ref_model_with_lora(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) lora_config = LoraConfig(r=16, lora_alpha=32, lora_dropout=0.05, task_type="CAUSAL_LM") tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -347,8 +348,7 @@ def test_train_without_providing_ref_model_with_lora(self): @require_sklearn @require_no_wandb - def test_generate_during_eval_no_wandb(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_generate_during_eval_no_wandb(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -378,8 +378,7 @@ def test_generate_during_eval_no_wandb(self): @require_sklearn @require_peft - def test_lora_train_and_save(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_lora_train_and_save(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) lora_config = LoraConfig(r=16, lora_alpha=32, lora_dropout=0.05, task_type="CAUSAL_LM") tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -410,8 +409,7 @@ def test_lora_train_and_save(self): AutoModelForCausalLM.from_pretrained(self.tmp_dir) @require_sklearn - def test_compute_metrics(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_compute_metrics(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) From 9827e99106cbc1841e774a466782a17f3fce77af Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Oct 2025 18:13:16 +0200 Subject: [PATCH 13/18] Use model_id fixture in trainers args tests --- tests/test_trainers_args.py | 39 ++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/tests/test_trainers_args.py b/tests/test_trainers_args.py index 2005b54337c..384c746da5d 100644 --- a/tests/test_trainers_args.py +++ b/tests/test_trainers_args.py @@ -43,9 +43,17 @@ class TestTrainerArg(TrlTestCase): + @pytest.fixture( + scope="class", + params=[ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def model_id(self, request): + return request.param + @require_sklearn - def test_bco(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_bco(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train") training_args = BCOConfig( @@ -91,8 +99,7 @@ def test_bco(self): assert trainer.args.min_density_ratio == 0.2 assert trainer.args.max_density_ratio == 20.0 - def test_cpo(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_cpo(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") training_args = CPOConfig( @@ -132,8 +139,7 @@ def test_cpo(self): assert trainer.args.model_init_kwargs == {"trust_remote_code": True} assert trainer.args.dataset_num_proc == 4 - def test_dpo(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_dpo(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") training_args = DPOConfig( @@ -199,8 +205,7 @@ def test_dpo(self): assert trainer.args.rpo_alpha == 0.5 assert trainer.args.discopop_tau == 0.1 - def test_kto(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_kto(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train") training_args = KTOConfig( @@ -245,8 +250,7 @@ def test_kto(self): assert trainer.args.dataset_num_proc == 4 @pytest.mark.parametrize("mixtures_coef_list", [False, True]) - def test_nash_md(self, mixtures_coef_list): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_nash_md(self, mixtures_coef_list, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) @@ -267,8 +271,7 @@ def test_nash_md(self, mixtures_coef_list): assert trainer.args.mixture_coef == (0.5 if not mixtures_coef_list else [0.5, 0.6]) @pytest.mark.parametrize("beta_list", [False, True]) - def test_online_dpo(self, beta_list): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_online_dpo(self, beta_list, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) @@ -297,8 +300,7 @@ def test_online_dpo(self, beta_list): assert trainer.args.beta == (0.6 if not beta_list else [0.6, 0.7]) assert trainer.args.loss_type == "hinge" - def test_orpo(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_orpo(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") training_args = ORPOConfig( @@ -324,8 +326,7 @@ def test_orpo(self): assert not trainer.args.disable_dropout assert trainer.args.label_pad_token_id == -99 - def test_reward(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_reward(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") @@ -345,8 +346,7 @@ def test_reward(self): assert trainer.args.dataset_num_proc == 4 assert trainer.args.center_rewards_coefficient == 0.1 - def test_sft(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_sft(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") training_args = SFTConfig( self.tmp_dir, @@ -371,8 +371,7 @@ def test_sft(self): assert trainer.args.eval_packing @pytest.mark.parametrize("alpha_list", [False, True]) - def test_xpo(self, alpha_list): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_xpo(self, alpha_list, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) From 224e38836e8f1018853bb73a6157fc9b52242c75 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 23 Oct 2025 18:17:39 +0200 Subject: [PATCH 14/18] Use model_id fixture in CLI tests --- tests/test_cli.py | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 48087f5054c..c3f463add4a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -30,17 +30,26 @@ "to fail on Python <3.10.", # let's say it's a known issue, but not expected to be fixed, because too niche ) class TestCLI(TrlTestCase): - def test_dpo(self): + @pytest.fixture( + scope="class", + params=[ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def model_id(self, request): + return request.param + + def test_dpo(self, model_id): from trl.cli import main - command = f"trl dpo --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_preference --report_to none" + command = f"trl dpo --output_dir {self.tmp_dir} --model_name_or_path {model_id} --dataset_name trl-internal-testing/zen --dataset_config standard_preference --report_to none" with patch("sys.argv", command.split(" ")): main() - def test_dpo_multiple_loss_types(self): + def test_dpo_multiple_loss_types(self, model_id): from trl.cli import main - command = f"trl dpo --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_preference --report_to none --loss_type sigmoid bco_pair --loss_weights 1.0 0.5" + command = f"trl dpo --output_dir {self.tmp_dir} --model_name_or_path {model_id} --dataset_name trl-internal-testing/zen --dataset_config standard_preference --report_to none --loss_type sigmoid bco_pair --loss_weights 1.0 0.5" with patch("sys.argv", command.split(" ")): main() @@ -53,17 +62,17 @@ def test_env(self, mock_stdout): main() assert "TRL version: " in mock_stdout.getvalue().strip() - def test_grpo(self): + def test_grpo(self, model_id): from trl.cli import main - command = f"trl grpo --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --reward_model_name_or_path trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_prompt_only --num_generations 4 --max_completion_length 32 --report_to none" + command = f"trl grpo --output_dir {self.tmp_dir} --model_name_or_path {model_id} --reward_model_name_or_path trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_prompt_only --num_generations 4 --max_completion_length 32 --report_to none" with patch("sys.argv", command.split(" ")): main() - def test_kto(self): + def test_kto(self, model_id): from trl.cli import main - command = f"trl kto --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_unpaired_preference --report_to none" + command = f"trl kto --output_dir {self.tmp_dir} --model_name_or_path {model_id} --dataset_name trl-internal-testing/zen --dataset_config standard_unpaired_preference --report_to none" with patch("sys.argv", command.split(" ")): main() @@ -74,21 +83,21 @@ def test_reward(self): with patch("sys.argv", command.split(" ")): main() - def test_rloo(self): + def test_rloo(self, model_id): from trl.cli import main - command = f"trl rloo --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --reward_model_name_or_path trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_prompt_only --num_generations 2 --max_completion_length 32 --report_to none" + command = f"trl rloo --output_dir {self.tmp_dir} --model_name_or_path {model_id} --reward_model_name_or_path trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_prompt_only --num_generations 2 --max_completion_length 32 --report_to none" with patch("sys.argv", command.split(" ")): main() - def test_sft(self): + def test_sft(self, model_id): from trl.cli import main - command = f"trl sft --output_dir {self.tmp_dir} --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name trl-internal-testing/zen --dataset_config standard_language_modeling --report_to none" + command = f"trl sft --output_dir {self.tmp_dir} --model_name_or_path {model_id} --dataset_name trl-internal-testing/zen --dataset_config standard_language_modeling --report_to none" with patch("sys.argv", command.split(" ")): main() - def test_sft_config_file(self): + def test_sft_config_file(self, model_id): from trl.cli import main output_dir = os.path.join(self.tmp_dir, "output") @@ -96,7 +105,7 @@ def test_sft_config_file(self): # Create a temporary config file config_path = os.path.join(self.tmp_dir, "config.yaml") config_content = { - "model_name_or_path": "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + "model_name_or_path": model_id, "dataset_name": "trl-internal-testing/zen", "dataset_config": "standard_language_modeling", "report_to": "none", From 9ab06fdaef79d1cd3ef041460134c12e55d95269 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 30 Oct 2025 11:09:14 +0100 Subject: [PATCH 15/18] Revert "Use fixture instead" This reverts commit bc1211006a79708cb9b8b2764d96448ca0c2a28a. --- tests/test_sft_trainer.py | 171 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 162 insertions(+), 9 deletions(-) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index cdbba837797..b7e64dd37e1 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -256,15 +256,6 @@ def test_multiple_examples(self): class TestSFTTrainer(TrlTestCase): - @pytest.fixture( - scope="class", - params=[ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) - def model_id(self, request): - return request.param - @pytest.mark.parametrize( "model_id", [ @@ -320,6 +311,12 @@ def test_train_gpt_oss(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_train_model(self, model_id): # Instantiate the model model = AutoModelForCausalLM.from_pretrained(model_id) @@ -345,6 +342,12 @@ def test_train_model(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_train_dft_loss(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") @@ -409,6 +412,12 @@ def test_train_moe_model_with_aux_loss(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_train_with_formatting_func(self, model_id): # Dummy formatting function def formatting_prompts_func(example): @@ -441,6 +450,12 @@ def formatting_prompts_func(example): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_train_model_dtype(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -474,6 +489,12 @@ def test_train_model_dtype(self, model_id): assert new_param.dtype == torch.float16 assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft def test_train_dense_with_peft_config_lora(self, model_id): # Get the base model parameter names @@ -510,6 +531,12 @@ def test_train_dense_with_peft_config_lora(self, model_id): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize( "peft_type", [ @@ -607,6 +634,12 @@ def test_train_moe_with_peft_config(self): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft def test_train_peft_model(self, model_id): # Get the base model @@ -643,6 +676,12 @@ def test_train_peft_model(self, model_id): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft def test_train_dense_with_peft_config_and_gradient_checkpointing(self, model_id): # Get the base model parameter names @@ -716,6 +755,12 @@ def test_train_moe_with_peft_config_and_gradient_checkpointing(self): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft def test_train_with_peft_model_and_gradient_checkpointing(self, model_id): # Get the base model parameter names @@ -751,6 +796,12 @@ def test_train_with_peft_model_and_gradient_checkpointing(self, model_id): elif "base_layer" not in n: # We expect the peft parameters to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_liger_kernel def test_train_with_liger(self, model_id): # Get the dataset @@ -774,6 +825,12 @@ def test_train_with_liger(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_train_with_non_chatml_conversational_data(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "conversational_language_modeling", split="train") @@ -802,6 +859,12 @@ def rename_fields(example: list[dict]): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_train_with_pretokenized_data(self, model_id): # Get the dataset tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -831,6 +894,12 @@ def tokenize_example(example): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_train_with_iterable_dataset(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train", streaming=True) @@ -853,6 +922,12 @@ def test_train_with_iterable_dataset(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_flash_attn def test_train_padding_free(self, model_id): # Get the dataset @@ -882,6 +957,12 @@ def test_train_padding_free(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("packing_strategy", ["bfd", "wrapped"]) @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning) @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning) @@ -909,6 +990,12 @@ def test_train_packing(self, packing_strategy, model_id): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning) @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning) def test_eval_packing(self, model_id): @@ -943,6 +1030,12 @@ def test_eval_packing(self, model_id): assert len(trainer.train_dataset["input_ids"]) == 3 # w/ this dataset, we end up with 46 seqs assert len(trainer.eval_dataset["input_ids"]) == 1 # w/ this dataset, we end up with 6 seqs + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @ignore_warnings(message="You are using packing, but the attention implementation is not.*", category=UserWarning) @ignore_warnings(message="Padding-free training is enabled, but the attention.*", category=UserWarning) def test_only_train_packing(self, model_id): @@ -978,6 +1071,12 @@ def test_only_train_packing(self, model_id): assert len(trainer.train_dataset["input_ids"]) == 3 # w/ this dataset, we end up with 46 seqs assert len(trainer.eval_dataset["input_ids"]) == 2 # w/ this dataset, we end up with 6 seqs + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_train_with_chat_template_kwargs(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -1204,6 +1303,12 @@ def test_train_with_set_chat_template_from_path(self): original_template_content = f.read() assert template_content == original_template_content, "Chat template content does not match the original" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_train_toolcall_data(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/toolcall", split="train") @@ -1226,6 +1331,12 @@ def test_train_toolcall_data(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_train_with_eval(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") @@ -1245,6 +1356,12 @@ def test_train_with_eval(self, model_id): # Check that the eval loss is not None assert trainer.state.log_history[0]["eval_loss"] is not None + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_train_with_multiple_eval_dataset(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling") @@ -1264,6 +1381,12 @@ def test_train_with_multiple_eval_dataset(self, model_id): assert trainer.state.log_history[-3]["eval_data1_loss"] is not None assert trainer.state.log_history[-2]["eval_data2_loss"] is not None + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_train_with_gradient_checkpointing(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -1286,6 +1409,12 @@ def test_train_with_gradient_checkpointing(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_tag_added(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") @@ -1296,6 +1425,12 @@ def test_tag_added(self, model_id): for tag in ["sft", "trl"]: assert tag in trainer.model.model_tags + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft def test_tag_added_peft(self, model_id): # Get the dataset @@ -1550,6 +1685,12 @@ def test_train_vlm_text_only_data(self, model_id): else: assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12), f"Param {n} is not updated" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft def test_prompt_tuning(self, model_id): """Test that SFT works with Prompt Tuning.""" @@ -1582,6 +1723,12 @@ def test_prompt_tuning(self, model_id): else: raise ValueError(f"Unexpected parameter {n} in model: {trainer.model}") + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft @require_bitsandbytes def test_peft_model_with_quantization(self, model_id): @@ -1675,6 +1822,12 @@ def test_peft_model_with_quantization(self, model_id): "All original LoRA parameters should remain trainable after SFTTrainer initialization" ) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft def test_prompt_tuning_peft_model(self, model_id): """Test that SFT works with Prompt Tuning and a pre-converted PeftModel""" From 23395efb79dce63aab8b1df3d363f07fd19659a2 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 30 Oct 2025 11:14:21 +0100 Subject: [PATCH 16/18] Use explicit tiny-Qwen2ForCausalLM-2.5 model_id param in experimental trainer args tests --- tests/experimental/test_trainers_args.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/experimental/test_trainers_args.py b/tests/experimental/test_trainers_args.py index bd86bb61b5d..c04b291bae6 100644 --- a/tests/experimental/test_trainers_args.py +++ b/tests/experimental/test_trainers_args.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pytest from datasets import load_dataset from transformers import AutoTokenizer @@ -21,9 +22,14 @@ class TestTrainerArg(TrlTestCase): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn - def test_bco(self): - model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" + def test_bco(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train") training_args = BCOConfig( From 09347968dcf1ed9c917e4ad51b812a981f6165d3 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 30 Oct 2025 11:37:35 +0100 Subject: [PATCH 17/18] Replace model_id fixture with parameter --- tests/experimental/test_bco_trainer.py | 69 +++++++- tests/slow/test_grpo_slow.py | 12 +- tests/test_activation_offloading.py | 21 ++- tests/test_cli.py | 45 +++++- tests/test_dpo_trainer.py | 75 +++++++-- tests/test_grpo_trainer.py | 211 ++++++++++++++++++++++++- tests/test_kto_trainer.py | 15 +- tests/test_online_dpo_trainer.py | 15 +- tests/test_orpo_trainer.py | 15 +- tests/test_rloo_trainer.py | 183 ++++++++++++++++++++- tests/test_trainers_args.py | 57 ++++++- 11 files changed, 640 insertions(+), 78 deletions(-) diff --git a/tests/experimental/test_bco_trainer.py b/tests/experimental/test_bco_trainer.py index 4738361ccd8..ecbfdbb569e 100644 --- a/tests/experimental/test_bco_trainer.py +++ b/tests/experimental/test_bco_trainer.py @@ -33,15 +33,12 @@ @pytest.mark.low_priority class TestBCOTrainer(TrlTestCase): - @pytest.fixture( - scope="class", - params=[ + @pytest.mark.parametrize( + "model_id", + [ "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", ], ) - def model_id(self, request): - return request.param - @pytest.mark.parametrize( "config_name", [ @@ -88,6 +85,12 @@ def test_train(self, config_name, model_id): if param.sum() != 0: # ignore 0 biases assert not torch.equal(param.cpu(), new_param.cpu()) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn def test_train_with_precompute(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) @@ -124,6 +127,12 @@ def test_train_with_precompute(self, model_id): if param.sum() != 0: # ignore 0 biases assert not torch.equal(param.cpu(), new_param.cpu()) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn def test_train_eval(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) @@ -151,6 +160,12 @@ def test_train_eval(self, model_id): trainer.train() + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn def test_init_with_ref_model_is_model(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) @@ -173,6 +188,12 @@ def test_init_with_ref_model_is_model(self, model_id): train_dataset=dataset, ) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn def test_tokenize_and_process_tokens(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) @@ -228,6 +249,12 @@ def test_tokenize_and_process_tokens(self, model_id): assert processed_dataset["completion_attention_mask"][0] == [1, 1, 1, 1, 1, 1, 1] assert processed_dataset["completion_labels"][0] == [-100, -100, -100, -100, 27261, 13, 151645] + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn def test_train_without_providing_ref_model(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) @@ -261,6 +288,12 @@ def test_train_without_providing_ref_model(self, model_id): if param.sum() != 0: # ignore 0 biases assert not torch.equal(param.cpu(), new_param.cpu()) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn def test_train_udm(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) @@ -309,6 +342,12 @@ def embed_prompt(input_ids, attention_mask, model): if param.sum() != 0: # ignore 0 biases assert not torch.equal(param.cpu(), new_param.cpu()) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn @require_peft def test_train_without_providing_ref_model_with_lora(self, model_id): @@ -346,6 +385,12 @@ def test_train_without_providing_ref_model_with_lora(self, model_id): if param.sum() != 0: # ignore 0 biases assert not torch.equal(param.cpu(), new_param.cpu()) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn @require_no_wandb def test_generate_during_eval_no_wandb(self, model_id): @@ -376,6 +421,12 @@ def test_generate_during_eval_no_wandb(self, model_id): eval_dataset=dataset["test"], ) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn @require_peft def test_lora_train_and_save(self, model_id): @@ -408,6 +459,12 @@ def test_lora_train_and_save(self, model_id): # assert that the model is loaded without giving OSError AutoModelForCausalLM.from_pretrained(self.tmp_dir) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_sklearn def test_compute_metrics(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) diff --git a/tests/slow/test_grpo_slow.py b/tests/slow/test_grpo_slow.py index bf63984d645..4f14f1646e8 100644 --- a/tests/slow/test_grpo_slow.py +++ b/tests/slow/test_grpo_slow.py @@ -341,10 +341,16 @@ def reward_func(prompts, completions, **kwargs): release_memory(model, trainer) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_vllm @require_bitsandbytes @require_peft - def test_vlm_processor_vllm_colocate_mode(self): + def test_vlm_processor_vllm_colocate_mode(self, model_id): """ Test that VLM processors work with vLLM in colocate mode. @@ -423,9 +429,7 @@ def dummy_reward_func(completions, **kwargs): try: # Load model with quantization for memory efficiency model = AutoModelForCausalLM.from_pretrained( - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - quantization_config=quantization_config, - dtype=torch.bfloat16, + model_id, quantization_config=quantization_config, dtype=torch.bfloat16 ) trainer = GRPOTrainer( diff --git a/tests/test_activation_offloading.py b/tests/test_activation_offloading.py index 15cef523de8..6e8d8e2e937 100644 --- a/tests/test_activation_offloading.py +++ b/tests/test_activation_offloading.py @@ -28,15 +28,12 @@ class TestActivationOffloading(TrlTestCase): - @pytest.fixture( - scope="class", - params=[ + @pytest.mark.parametrize( + "model_id", + [ "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", ], ) - def model_id(self, request): - return request.param - @require_torch_accelerator @require_peft def test_offloading_with_peft_models(self, model_id) -> None: @@ -83,6 +80,12 @@ def test_offloading_with_peft_models(self, model_id) -> None: f"Gradient mismatch for {name_orig}" ) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_torch_accelerator def test_noop_manager_with_offloading(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device) @@ -129,6 +132,12 @@ def test_min_offload_size(self): # The test passes if no errors occur, as we're mainly testing # that the logic handles both offloaded and non-offloaded tensors + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_torch_accelerator def test_real_hf_model(self, model_id): """Test with an actual HuggingFace model""" diff --git a/tests/test_cli.py b/tests/test_cli.py index c3f463add4a..3c96e579e51 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -30,15 +30,12 @@ "to fail on Python <3.10.", # let's say it's a known issue, but not expected to be fixed, because too niche ) class TestCLI(TrlTestCase): - @pytest.fixture( - scope="class", - params=[ + @pytest.mark.parametrize( + "model_id", + [ "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", ], ) - def model_id(self, request): - return request.param - def test_dpo(self, model_id): from trl.cli import main @@ -46,6 +43,12 @@ def test_dpo(self, model_id): with patch("sys.argv", command.split(" ")): main() + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_dpo_multiple_loss_types(self, model_id): from trl.cli import main @@ -62,6 +65,12 @@ def test_env(self, mock_stdout): main() assert "TRL version: " in mock_stdout.getvalue().strip() + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_grpo(self, model_id): from trl.cli import main @@ -69,6 +78,12 @@ def test_grpo(self, model_id): with patch("sys.argv", command.split(" ")): main() + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_kto(self, model_id): from trl.cli import main @@ -83,6 +98,12 @@ def test_reward(self): with patch("sys.argv", command.split(" ")): main() + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_rloo(self, model_id): from trl.cli import main @@ -90,6 +111,12 @@ def test_rloo(self, model_id): with patch("sys.argv", command.split(" ")): main() + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_sft(self, model_id): from trl.cli import main @@ -97,6 +124,12 @@ def test_sft(self, model_id): with patch("sys.argv", command.split(" ")): main() + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_sft_config_file(self, model_id): from trl.cli import main diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py index 17ac7b224b8..223fb70bf4f 100644 --- a/tests/test_dpo_trainer.py +++ b/tests/test_dpo_trainer.py @@ -153,15 +153,6 @@ def test_tokenize_row_with_truncation_and_special_tokens(self): class TestDPOTrainer(TrlTestCase): - @pytest.fixture( - scope="class", - params=[ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) - def model_id(self, request): - return request.param - def setup_method(self): self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" self.model = AutoModelForCausalLM.from_pretrained(self.model_id) @@ -169,6 +160,12 @@ def setup_method(self): self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer.pad_token = self.tokenizer.eos_token + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_train(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -197,6 +194,12 @@ def test_train(self, model_id): if param.sum() != 0: # ignore 0 biases assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize( "loss_type", [ @@ -308,6 +311,12 @@ def test_dpo_trainer_with_weighting(self): if param.sum() != 0: # ignore 0 biases assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_train_with_multiple_loss_types(self, model_id): """ Tests multi-loss combinations, loss type inference, and weight configuration. MPO combines DPO (sigmoid), BCO @@ -641,6 +650,12 @@ def test_dpo_lora_save(self): except OSError: pytest.fail("Loading the saved peft adapter failed") + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft @require_torch_gpu_if_bnb_not_multi_backend_enabled def test_dpo_lora_bf16_autocast_llama(self, model_id): @@ -782,6 +797,12 @@ def test_dpo_lora_bf16_autocast(self, loss_type, pre_compute, gen_during_eval): # save peft adapter trainer.save_model() + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft def test_dpo_lora_tags(self, model_id): from peft import LoraConfig @@ -827,6 +848,12 @@ def test_dpo_lora_tags(self, model_id): for tag in ["dpo", "trl"]: assert tag in trainer.model.model_tags + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft def test_dpo_tags(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -998,6 +1025,12 @@ def test_dpo_trainer_dtype(self): train_dataset=dummy_dataset["train"], ) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_dpo_loss_alpha_div_f(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -1038,6 +1071,12 @@ def test_dpo_loss_alpha_div_f(self, model_id): ) assert torch.isfinite(losses).cpu().numpy().all() + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_dpo_loss_js_div_f(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -1231,6 +1270,12 @@ def test_padding_free(self): if param.sum() != 0: # ignore 0 biases assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_compute_metrics(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) @@ -1266,6 +1311,12 @@ def dummy_compute_metrics(*args, **kwargs): assert trainer.state.log_history[-2]["eval_test"] == 0.0 + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_train_with_length_desensitization(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -1379,6 +1430,12 @@ def test_dpo_trainer_with_liger(self, beta, loss_type): assert output is not None assert "loss" not in output.keys() + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_train_with_iterable_dataset(self, model_id): dataset = load_dataset( "trl-internal-testing/zen", diff --git a/tests/test_grpo_trainer.py b/tests/test_grpo_trainer.py index 88a2dac7b83..01e5d646872 100644 --- a/tests/test_grpo_trainer.py +++ b/tests/test_grpo_trainer.py @@ -108,15 +108,12 @@ def test_compute_entropy_all_masked(self): class TestGRPOTrainer(TrlTestCase): - @pytest.fixture( - scope="class", - params=[ + @pytest.mark.parametrize( + "model_id", + [ "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", ], ) - def model_id(self, request): - return request.param - def test_init_minimal(self, model_id): # Test that GRPOTrainer can be instantiated with only model, reward_model and train_dataset dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -126,6 +123,12 @@ def test_init_minimal(self, model_id): train_dataset=dataset, ) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) def test_training(self, config_name, model_id): dataset = load_dataset("trl-internal-testing/zen", config_name, split="train") @@ -156,6 +159,12 @@ def test_training(self, config_name, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("loss_type", ["bnpo", "dr_grpo", "dapo"]) def test_training_loss_types(self, loss_type, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -188,6 +197,12 @@ def test_training_loss_types(self, loss_type, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_with_eval(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only") @@ -211,6 +226,12 @@ def test_training_with_eval(self, model_id): trainer.train() + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_multiple_iterations(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -241,6 +262,12 @@ def test_training_multiple_iterations(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft def test_training_peft(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) @@ -277,6 +304,12 @@ def test_training_peft(self, model_id): elif "base_layer" not in n: # We expect the peft params to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft def test_training_peft_with_gradient_checkpointing(self, model_id): """Test that training works with PEFT and gradient checkpointing enabled.""" @@ -326,6 +359,12 @@ def test_training_peft_with_gradient_checkpointing(self, model_id): else: # Base model parameters should not change assert torch.equal(param, new_param), f"Base parameter {n} has changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_different_reward_model(self, model_id): # Use a reward model different from the model: different chat template, tokenization, etc. dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train") @@ -365,6 +404,12 @@ def test_training_different_reward_model(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_reward_func_standard(self, model_id): # Test if trainer can handle reward function with standard format dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -399,6 +444,12 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_reward_func_conversational(self, model_id): # Test if trainer can handle reward function with conversational format dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train") @@ -434,6 +485,12 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_multiple_reward_funcs(self, model_id): # Test that GRPOTrainer can be instantiated with multiple reward functions dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -472,6 +529,12 @@ def reward_func2(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_multiple_reward_funcs_with_None_output(self, model_id): """Test that a valid math reward function is processed correctly while the code reward function returns None.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -516,6 +579,12 @@ def non_applicable_reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_multiple_reward_funcs_with_weights(self, model_id): """Test that GRPOTrainer can handle multiple reward functions with weights.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -560,6 +629,12 @@ def reward_func2(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_multiple_mixed_reward_funcs(self, model_id): # Test if the trainer can handle a mix of reward functions and reward models dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -594,6 +669,12 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_reward_func_additional_column(self, model_id): # Test if trainer can handle reward function that rely on additional columns in the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -632,6 +713,12 @@ def reward_func(completions, some_values, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_with_sync_ref_model(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -663,6 +750,12 @@ def test_training_with_sync_ref_model(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_beta_non_zero(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -692,6 +785,12 @@ def test_training_beta_non_zero(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_with_entropy_filter(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -836,6 +935,12 @@ def test_training_vllm_importance_sampling_correction(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_with_additional_generation_kwargs(self, model_id): """Test that training works with additional generation kwargs.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -909,6 +1014,12 @@ def test_training_vllm_with_additional_generation_kwargs(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("scale_rewards", [False, "group", "batch", True, "none"]) def test_training_scale_rewards(self, scale_rewards, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -940,6 +1051,12 @@ def test_training_scale_rewards(self, scale_rewards, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @patch("transformers.generation.utils.GenerationMixin.generate") def test_training_with_mask_truncated_completions(self, mock_generate, model_id): """Test that training works with mask_truncated_completions=True parameter.""" @@ -990,6 +1107,12 @@ def fake_generate(input_ids, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_with_mask_truncated_completions_all_masked(self, model_id): """ Test that when all generated completions are truncated (i.e., none contain an EOS token), and @@ -1028,6 +1151,12 @@ def test_training_with_mask_truncated_completions_all_masked(self, model_id): new_param = trainer.model.get_parameter(n) assert torch.equal(param, new_param), f"Parameter {n} has changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_warning_raised_all_rewards_none(self, model_id, caplog): """Test that a proper warning is raised when all rewards are None.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1057,6 +1186,12 @@ def always_none_reward_func(completions, **kwargs): expected_warning = "All reward functions returned None for the following kwargs:" assert expected_warning in caplog.text + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_num_generations_larger_than_batch_size(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1087,6 +1222,12 @@ def test_training_num_generations_larger_than_batch_size(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_delta_clipping(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1117,6 +1258,12 @@ def test_training_delta_clipping(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_multiple_dataloader_workers(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1147,6 +1294,12 @@ def test_training_multiple_dataloader_workers(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_with_generation_kwargs(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1177,6 +1330,12 @@ def test_training_with_generation_kwargs(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_with_reward_func_accessing_trainer_state(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1202,6 +1361,12 @@ def reward_func(completions, **kwargs): ) trainer.train() + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_prepare_input_called_with_correct_data(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -1584,6 +1749,12 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_sequence_importance_sampling(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1645,6 +1816,12 @@ def test_training_with_chat_template_kwargs(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_mismatched_reward_processing_classes_length(self, model_id): """Test that mismatched length between reward_funcs and reward_processing_classes raises error.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1671,6 +1848,12 @@ def test_mismatched_reward_processing_classes_length(self, model_id): train_dataset=dataset, ) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_correct_reward_processing_classes_list(self, model_id): """Test that correct list of reward_processing_classes works properly.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1702,6 +1885,12 @@ def test_correct_reward_processing_classes_list(self, model_id): assert len(trainer.reward_processing_classes) == len(reward_models) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_single_reward_model_with_single_processing_class(self, model_id): """Test that single reward model with single processing class works.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1729,7 +1918,13 @@ def test_single_reward_model_with_single_processing_class(self, model_id): class TestGSPOTokenTrainer(TrlTestCase): - def test_training(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = GRPOConfig( @@ -1743,7 +1938,7 @@ def test_training(self): report_to="none", ) trainer = GSPOTokenTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=training_args, train_dataset=dataset, diff --git a/tests/test_kto_trainer.py b/tests/test_kto_trainer.py index f58a02ef8d4..7bc6278765e 100644 --- a/tests/test_kto_trainer.py +++ b/tests/test_kto_trainer.py @@ -25,15 +25,6 @@ class TestKTOTrainer(TrlTestCase): - @pytest.fixture( - scope="class", - params=[ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) - def model_id(self, request): - return request.param - def setup_method(self): self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" self.model = AutoModelForCausalLM.from_pretrained(self.model_id) @@ -399,6 +390,12 @@ def test_kto_trainer_with_liger(self): if param.sum() != 0: assert not torch.equal(param, new_param) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_compute_metrics(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) ref_model = AutoModelForCausalLM.from_pretrained(model_id) diff --git a/tests/test_online_dpo_trainer.py b/tests/test_online_dpo_trainer.py index d2ea8690d05..a34aab347c7 100644 --- a/tests/test_online_dpo_trainer.py +++ b/tests/test_online_dpo_trainer.py @@ -42,15 +42,6 @@ class TestOnlineDPOTrainer(TrlTestCase): - @pytest.fixture( - scope="class", - params=[ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) - def model_id(self, request): - return request.param - def setup_method(self): self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" self.model = AutoModelForCausalLM.from_pretrained(self.model_id) @@ -89,6 +80,12 @@ def test_training(self, config_name): # Check if training loss is available assert "train_loss" in trainer.state.log_history[-1] + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_model_str(self, model_id): training_args = OnlineDPOConfig( output_dir=self.tmp_dir, diff --git a/tests/test_orpo_trainer.py b/tests/test_orpo_trainer.py index 48159662be5..6d3d4549c23 100644 --- a/tests/test_orpo_trainer.py +++ b/tests/test_orpo_trainer.py @@ -23,15 +23,6 @@ class TestORPOTrainer(TrlTestCase): - @pytest.fixture( - scope="class", - params=[ - "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", - ], - ) - def model_id(self, request): - return request.param - def setup_method(self): self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" self.model = AutoModelForCausalLM.from_pretrained(self.model_id) @@ -153,6 +144,12 @@ def test_orpo_trainer_with_lora(self, config_name): if param.sum() != 0: # ignore 0 biases assert not torch.equal(param, new_param) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_compute_metrics(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py index f146c2905c6..42492ab8546 100644 --- a/tests/test_rloo_trainer.py +++ b/tests/test_rloo_trainer.py @@ -35,15 +35,12 @@ class TestRLOOTrainer(TrlTestCase): - @pytest.fixture( - scope="class", - params=[ + @pytest.mark.parametrize( + "model_id", + [ "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", ], ) - def model_id(self, request): - return request.param - def test_init_minimal(self, model_id): # Test that RLOOTrainer can be instantiated with only model, reward_model and train_dataset dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -53,6 +50,12 @@ def test_init_minimal(self, model_id): train_dataset=dataset, ) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("config_name", ["standard_prompt_only", "conversational_prompt_only"]) def test_training(self, config_name, model_id): dataset = load_dataset("trl-internal-testing/zen", config_name, split="train") @@ -83,6 +86,12 @@ def test_training(self, config_name, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_with_eval(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only") @@ -106,6 +115,12 @@ def test_training_with_eval(self, model_id): trainer.train() + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_multiple_iterations(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -136,6 +151,12 @@ def test_training_multiple_iterations(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft def test_training_peft(self, model_id): model = AutoModelForCausalLM.from_pretrained(model_id) @@ -172,6 +193,12 @@ def test_training_peft(self, model_id): elif "base_layer" not in n: # We expect the peft params to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @require_peft def test_training_peft_with_gradient_checkpointing(self, model_id): """Test that training works with PEFT and gradient checkpointing enabled.""" @@ -221,6 +248,12 @@ def test_training_peft_with_gradient_checkpointing(self, model_id): else: # Base model parameters should not change assert torch.equal(param, new_param), f"Base parameter {n} has changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_different_reward_model(self, model_id): # Use a reward model different from the model: different chat template, tokenization, etc. dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train") @@ -260,6 +293,12 @@ def test_training_different_reward_model(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_reward_func_standard(self, model_id): # Test if trainer can handle reward function with standard format dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -294,6 +333,12 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_reward_func_conversational(self, model_id): # Test if trainer can handle reward function with conversational format dataset = load_dataset("trl-internal-testing/zen", "conversational_prompt_only", split="train") @@ -329,6 +374,12 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_multiple_reward_funcs(self, model_id): # Test that RLOOTrainer can be instantiated with multiple reward functions dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -367,6 +418,12 @@ def reward_func2(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_multiple_reward_funcs_with_None_output(self, model_id): """Test that a valid math reward function is processed correctly while the code reward function returns None.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -411,6 +468,12 @@ def non_applicable_reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_multiple_reward_funcs_with_weights(self, model_id): """Test that RLOOTrainer can handle multiple reward functions with weights.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -455,6 +518,12 @@ def reward_func2(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_multiple_mixed_reward_funcs(self, model_id): # Test if the trainer can handle a mix of reward functions and reward models dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -489,6 +558,12 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_reward_func_additional_column(self, model_id): # Test if trainer can handle reward function that rely on additional columns in the dataset dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -527,6 +602,12 @@ def reward_func(completions, some_values, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_with_sync_ref_model(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -558,6 +639,12 @@ def test_training_with_sync_ref_model(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_beta_zero(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( @@ -667,6 +754,12 @@ def test_training_vllm_guided_decoding(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_with_additional_generation_kwargs(self, model_id): """Test that training works with additional generation kwargs.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -740,6 +833,12 @@ def test_training_vllm_with_additional_generation_kwargs(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_with_normalized_advantages(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -770,6 +869,12 @@ def test_training_with_normalized_advantages(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_with_clipped_rewards(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -800,6 +905,12 @@ def test_training_with_clipped_rewards(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @patch("transformers.generation.utils.GenerationMixin.generate") def test_training_with_mask_truncated_completions(self, mock_generate, model_id): """Test that training works with mask_truncated_completions=True parameter.""" @@ -850,6 +961,12 @@ def fake_generate(input_ids, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_with_mask_truncated_completions_all_masked(self, model_id): """ Test that when all generated completions are truncated (i.e., none contain an EOS token), and @@ -888,6 +1005,12 @@ def test_training_with_mask_truncated_completions_all_masked(self, model_id): new_param = trainer.model.get_parameter(n) assert torch.equal(param, new_param), f"Parameter {n} has changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_warning_raised_all_rewards_none(self, model_id, caplog): """Test that a proper warning is raised when all rewards are None.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -917,6 +1040,12 @@ def always_none_reward_func(completions, **kwargs): expected_warning = "All reward functions returned None for the following kwargs:" assert expected_warning in caplog.text + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_num_generations_larger_than_batch_size(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -947,6 +1076,12 @@ def test_training_num_generations_larger_than_batch_size(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_multiple_dataloader_workers(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -977,6 +1112,12 @@ def test_training_multiple_dataloader_workers(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_with_generation_kwargs(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1007,6 +1148,12 @@ def test_training_with_generation_kwargs(self, model_id): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_training_with_reward_func_accessing_trainer_state(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1032,6 +1179,12 @@ def reward_func(completions, **kwargs): ) trainer.train() + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_prepare_input_called_with_correct_data(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") training_args = RLOOConfig( @@ -1347,6 +1500,12 @@ def test_training_with_chat_template_kwargs(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_mismatched_reward_processing_classes_length(self, model_id): """Test that mismatched length between reward_funcs and reward_processing_classes raises error.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1373,6 +1532,12 @@ def test_mismatched_reward_processing_classes_length(self, model_id): train_dataset=dataset, ) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_correct_reward_processing_classes_list(self, model_id): """Test that correct list of reward_processing_classes works properly.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") @@ -1404,6 +1569,12 @@ def test_correct_reward_processing_classes_list(self, model_id): assert len(trainer.reward_processing_classes) == len(reward_models) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_single_reward_model_with_single_processing_class(self, model_id): """Test that single reward model with single processing class works.""" dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") diff --git a/tests/test_trainers_args.py b/tests/test_trainers_args.py index de35fb5d07e..edbcaf9c04c 100644 --- a/tests/test_trainers_args.py +++ b/tests/test_trainers_args.py @@ -41,15 +41,12 @@ class TestTrainerArg(TrlTestCase): - @pytest.fixture( - scope="class", - params=[ + @pytest.mark.parametrize( + "model_id", + [ "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", ], ) - def model_id(self, request): - return request.param - def test_cpo(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") @@ -90,6 +87,12 @@ def test_cpo(self, model_id): assert trainer.args.model_init_kwargs == {"trust_remote_code": True} assert trainer.args.dataset_num_proc == 4 + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_dpo(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") @@ -156,6 +159,12 @@ def test_dpo(self, model_id): assert trainer.args.rpo_alpha == 0.5 assert trainer.args.discopop_tau == 0.1 + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_kto(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train") @@ -200,6 +209,12 @@ def test_kto(self, model_id): assert trainer.args.ref_model_init_kwargs == {"trust_remote_code": True} assert trainer.args.dataset_num_proc == 4 + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("mixtures_coef_list", [False, True]) def test_nash_md(self, mixtures_coef_list, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -221,6 +236,12 @@ def test_nash_md(self, mixtures_coef_list, model_id): ) assert trainer.args.mixture_coef == (0.5 if not mixtures_coef_list else [0.5, 0.6]) + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("beta_list", [False, True]) def test_online_dpo(self, beta_list, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -251,6 +272,12 @@ def test_online_dpo(self, beta_list, model_id): assert trainer.args.beta == (0.6 if not beta_list else [0.6, 0.7]) assert trainer.args.loss_type == "hinge" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_orpo(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") @@ -277,6 +304,12 @@ def test_orpo(self, model_id): assert not trainer.args.disable_dropout assert trainer.args.label_pad_token_id == -99 + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_reward(self, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) @@ -297,6 +330,12 @@ def test_reward(self, model_id): assert trainer.args.dataset_num_proc == 4 assert trainer.args.center_rewards_coefficient == 0.1 + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) def test_sft(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") training_args = SFTConfig( @@ -321,6 +360,12 @@ def test_sft(self, model_id): assert trainer.args.dataset_kwargs["append_concat_token"] assert trainer.args.eval_packing + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) @pytest.mark.parametrize("alpha_list", [False, True]) def test_xpo(self, alpha_list, model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) From f6f5f827b0f0e1d8ae5ec9193a71d80b3a614302 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 30 Oct 2025 11:42:03 +0100 Subject: [PATCH 18/18] Use model_id param in experimental GRPO with replay buffer tests --- .../test_grpo_with_replay_buffer_trainer.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/experimental/test_grpo_with_replay_buffer_trainer.py b/tests/experimental/test_grpo_with_replay_buffer_trainer.py index cad66f8034c..a1696a48267 100644 --- a/tests/experimental/test_grpo_with_replay_buffer_trainer.py +++ b/tests/experimental/test_grpo_with_replay_buffer_trainer.py @@ -83,11 +83,12 @@ def test_sample(self): @pytest.mark.low_priority class TestUpdateWithReplayBuffer: def setup_method(self): + model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" config = GRPOWithReplayBufferConfig( replay_buffer_size=5, ) self.trainer = GRPOWithReplayBufferTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5", args=config, train_dataset=None, @@ -238,7 +239,13 @@ def test_update_with_inputs_different_seq_len(self): @pytest.mark.low_priority class TestGRPOWithReplayBufferTrainer(TrlTestCase): - def test_training_with_replay_buffer(self): + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + ], + ) + def test_training_with_replay_buffer(self, model_id): dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") # Guarantee that some rewards have 0 std @@ -258,7 +265,7 @@ def custom_reward_func(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + model=model_id, reward_funcs=[custom_reward_func], args=training_args, train_dataset=dataset,