diff --git a/tests/test_grpo_trainer.py b/tests/test_grpo_trainer.py index 50efa9e45cd..02a1d84266f 100644 --- a/tests/test_grpo_trainer.py +++ b/tests/test_grpo_trainer.py @@ -1305,8 +1305,14 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + ], + ) @require_vision - def test_training_vlm_beta_non_zero(self): + def test_training_vlm_beta_non_zero(self, model_id): dataset = load_dataset("trl-internal-testing/zen-image", "conversational_prompt_only", split="train") def reward_func(completions, **kwargs): @@ -1323,7 +1329,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, @@ -1345,12 +1351,16 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + ], + ) @require_vision @require_peft - def test_training_vlm_peft(self): - model = AutoModelForImageTextToText.from_pretrained( - "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration" - ) + def test_training_vlm_peft(self, model_id): + model = AutoModelForImageTextToText.from_pretrained(model_id) base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()] dataset = load_dataset("trl-internal-testing/zen-image", "conversational_prompt_only", split="train") @@ -1388,8 +1398,14 @@ def reward_func(completions, **kwargs): elif "base_layer" not in n: # We expect the peft params to be different (except for the base layer) assert not torch.allclose(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + ], + ) @require_vision - def test_training_vlm_and_importance_sampling(self): + def test_training_vlm_and_importance_sampling(self, model_id): dataset = load_dataset("trl-internal-testing/zen-image", "conversational_prompt_only", split="train") def reward_func(completions, **kwargs): @@ -1406,7 +1422,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, @@ -1428,9 +1444,15 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + ], + ) @require_vision @require_liger_kernel - def test_training_vlm_and_liger(self): + def test_training_vlm_and_liger(self, model_id): dataset = load_dataset("trl-internal-testing/zen-image", "conversational_prompt_only", split="train") def reward_func(completions, **kwargs): @@ -1448,7 +1470,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, @@ -1515,8 +1537,14 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + ], + ) @require_vision - def test_training_vlm_multi_image(self): + def test_training_vlm_multi_image(self, model_id): dataset = load_dataset("trl-internal-testing/zen-multi-image", "conversational_prompt_only", split="train") def reward_func(completions, **kwargs): @@ -1533,7 +1561,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = GRPOTrainer( - model="trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py index 476fcfb0e72..a6cf327ac71 100644 --- a/tests/test_rloo_trainer.py +++ b/tests/test_rloo_trainer.py @@ -1129,8 +1129,14 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + ], + ) @require_vision - def test_training_vlm_beta_non_zero(self): + def test_training_vlm_beta_non_zero(self, model_id): dataset = load_dataset("trl-internal-testing/zen-image", "conversational_prompt_only", split="train") def reward_func(completions, **kwargs): @@ -1147,7 +1153,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, @@ -1169,12 +1175,16 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + ], + ) @require_vision @require_peft - def test_training_vlm_peft(self): - model = AutoModelForImageTextToText.from_pretrained( - "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration" - ) + def test_training_vlm_peft(self, model_id): + model = AutoModelForImageTextToText.from_pretrained(model_id) base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()] dataset = load_dataset("trl-internal-testing/zen-image", "conversational_prompt_only", split="train") @@ -1257,8 +1267,14 @@ def reward_func(completions, **kwargs): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + ], + ) @require_vision - def test_training_vlm_multi_image(self): + def test_training_vlm_multi_image(self, model_id): dataset = load_dataset("trl-internal-testing/zen-multi-image", "conversational_prompt_only", split="train") def reward_func(completions, **kwargs): @@ -1275,7 +1291,7 @@ def reward_func(completions, **kwargs): report_to="none", ) trainer = RLOOTrainer( - model="trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + model=model_id, reward_funcs=reward_func, args=training_args, train_dataset=dataset, diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 4416d9d5c9a..ccd8c95137c 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -1381,13 +1381,19 @@ def test_train_vlm(self, model_id): continue assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12), f"Param {n} is not updated" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + ], + ) @pytest.mark.xfail( parse_version(transformers.__version__) < parse_version("4.57.0"), reason="Mixing text-only and image+text examples is only supported in transformers >= 4.57.0", strict=False, ) @require_vision - def test_train_vlm_multi_image(self): + def test_train_vlm_multi_image(self, model_id): # Get the dataset dataset = load_dataset( "trl-internal-testing/zen-multi-image", "conversational_prompt_completion", split="train" @@ -1400,7 +1406,7 @@ def test_train_vlm_multi_image(self): report_to="none", ) trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + model=model_id, args=training_args, train_dataset=dataset, ) @@ -1419,8 +1425,14 @@ def test_train_vlm_multi_image(self): new_param = trainer.model.get_parameter(n) assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12), f"Param {n} is not updated" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + ], + ) @require_vision - def test_train_vlm_prompt_completion(self): + def test_train_vlm_prompt_completion(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen-image", "conversational_prompt_completion", split="train") @@ -1431,7 +1443,7 @@ def test_train_vlm_prompt_completion(self): report_to="none", ) trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + model=model_id, args=training_args, train_dataset=dataset, ) @@ -1519,15 +1531,21 @@ def test_train_vlm_gemma_3n(self): continue assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12), f"Param {n} is not updated" + @pytest.mark.parametrize( + "model_id", + [ + "trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + ], + ) @require_vision - def test_train_vlm_text_only_data(self): + def test_train_vlm_text_only_data(self, model_id): # Get the dataset dataset = load_dataset("trl-internal-testing/zen", "conversational_language_modeling", split="train") # Initialize the trainer training_args = SFTConfig(output_dir=self.tmp_dir, report_to="none") trainer = SFTTrainer( - model="trl-internal-testing/tiny-Qwen2_5_VLForConditionalGeneration", + model=model_id, args=training_args, train_dataset=dataset, )