Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
8126245
Use explicit tiny-Qwen2ForCausalLM-2.5 model_id param in SFT tests
albertvillanova Oct 23, 2025
bc12110
Use fixture instead
albertvillanova Oct 23, 2025
ba30f5a
Use model_id fixture in GRPO tests
albertvillanova Oct 23, 2025
919e8f0
Use model_id fixture in DPO tests
albertvillanova Oct 23, 2025
f599123
Use model_id fixture in ORPO tests
albertvillanova Oct 23, 2025
c922f28
Use model_id fixture in KTO tests
albertvillanova Oct 23, 2025
2e50260
Use model_id fixture in activation_offloading tests
albertvillanova Oct 23, 2025
29d25f9
Refactor model_id in callbacks tests
albertvillanova Oct 23, 2025
6c3f522
Refactor model_id in dataset_formatting tests
albertvillanova Oct 23, 2025
88dffd2
Use model_id fixture in Online DPO tests
albertvillanova Oct 23, 2025
e08bdb2
Use model_id fixture in RLOO tests
albertvillanova Oct 23, 2025
f11a09c
Use model_id fixture in BCO tests
albertvillanova Oct 23, 2025
9827e99
Use model_id fixture in trainers args tests
albertvillanova Oct 23, 2025
224e388
Use model_id fixture in CLI tests
albertvillanova Oct 23, 2025
2fee2fb
Merge branch 'main' into refactor-ci-explicit-model-id-tiny-qwen2-for…
qgallouedec Oct 28, 2025
8085674
Merge remote-tracking branch 'upstream/main' into refactor-ci-explici…
albertvillanova Oct 30, 2025
9ab06fd
Revert "Use fixture instead"
albertvillanova Oct 30, 2025
23395ef
Use explicit tiny-Qwen2ForCausalLM-2.5 model_id param in experimental…
albertvillanova Oct 30, 2025
0934796
Replace model_id fixture with parameter
albertvillanova Oct 30, 2025
f6f5f82
Use model_id param in experimental GRPO with replay buffer tests
albertvillanova Oct 30, 2025
57b6d09
Merge remote-tracking branch 'upstream/main' into refactor-ci-explici…
albertvillanova Nov 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 77 additions & 22 deletions tests/experimental/test_bco_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@

@pytest.mark.low_priority
class TestBCOTrainer(TrlTestCase):
@pytest.mark.parametrize(
"model_id",
[
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
],
)
@pytest.mark.parametrize(
"config_name",
[
Expand All @@ -45,8 +51,7 @@ class TestBCOTrainer(TrlTestCase):
],
)
@require_sklearn
def test_train(self, config_name):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
def test_train(self, config_name, model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
ref_model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
Expand Down Expand Up @@ -80,9 +85,14 @@ def test_train(self, config_name):
if param.sum() != 0: # ignore 0 biases
assert not torch.equal(param.cpu(), new_param.cpu())

@pytest.mark.parametrize(
"model_id",
[
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
],
)
@require_sklearn
def test_train_with_precompute(self):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
def test_train_with_precompute(self, model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
ref_model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
Expand Down Expand Up @@ -117,9 +127,14 @@ def test_train_with_precompute(self):
if param.sum() != 0: # ignore 0 biases
assert not torch.equal(param.cpu(), new_param.cpu())

@pytest.mark.parametrize(
"model_id",
[
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
],
)
@require_sklearn
def test_train_eval(self):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
def test_train_eval(self, model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
ref_model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
Expand All @@ -145,9 +160,14 @@ def test_train_eval(self):

trainer.train()

@pytest.mark.parametrize(
"model_id",
[
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
],
)
@require_sklearn
def test_init_with_ref_model_is_model(self):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
def test_init_with_ref_model_is_model(self, model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)

Expand All @@ -168,9 +188,14 @@ def test_init_with_ref_model_is_model(self):
train_dataset=dataset,
)

@pytest.mark.parametrize(
"model_id",
[
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
],
)
@require_sklearn
def test_tokenize_and_process_tokens(self):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
def test_tokenize_and_process_tokens(self, model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
ref_model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
Expand Down Expand Up @@ -224,9 +249,14 @@ def test_tokenize_and_process_tokens(self):
assert processed_dataset["completion_attention_mask"][0] == [1, 1, 1, 1, 1, 1, 1]
assert processed_dataset["completion_labels"][0] == [-100, -100, -100, -100, 27261, 13, 151645]

@pytest.mark.parametrize(
"model_id",
[
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
],
)
@require_sklearn
def test_train_without_providing_ref_model(self):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
def test_train_without_providing_ref_model(self, model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)

Expand Down Expand Up @@ -258,9 +288,14 @@ def test_train_without_providing_ref_model(self):
if param.sum() != 0: # ignore 0 biases
assert not torch.equal(param.cpu(), new_param.cpu())

@pytest.mark.parametrize(
"model_id",
[
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
],
)
@require_sklearn
def test_train_udm(self):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
def test_train_udm(self, model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)

Expand Down Expand Up @@ -307,10 +342,15 @@ def embed_prompt(input_ids, attention_mask, model):
if param.sum() != 0: # ignore 0 biases
assert not torch.equal(param.cpu(), new_param.cpu())

@pytest.mark.parametrize(
"model_id",
[
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
],
)
@require_sklearn
@require_peft
def test_train_without_providing_ref_model_with_lora(self):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
def test_train_without_providing_ref_model_with_lora(self, model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
lora_config = LoraConfig(r=16, lora_alpha=32, lora_dropout=0.05, task_type="CAUSAL_LM")
tokenizer = AutoTokenizer.from_pretrained(model_id)
Expand Down Expand Up @@ -345,10 +385,15 @@ def test_train_without_providing_ref_model_with_lora(self):
if param.sum() != 0: # ignore 0 biases
assert not torch.equal(param.cpu(), new_param.cpu())

@pytest.mark.parametrize(
"model_id",
[
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
],
)
@require_sklearn
@require_no_wandb
def test_generate_during_eval_no_wandb(self):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
def test_generate_during_eval_no_wandb(self, model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)

Expand Down Expand Up @@ -376,10 +421,15 @@ def test_generate_during_eval_no_wandb(self):
eval_dataset=dataset["test"],
)

@pytest.mark.parametrize(
"model_id",
[
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
],
)
@require_sklearn
@require_peft
def test_lora_train_and_save(self):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
def test_lora_train_and_save(self, model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
lora_config = LoraConfig(r=16, lora_alpha=32, lora_dropout=0.05, task_type="CAUSAL_LM")
tokenizer = AutoTokenizer.from_pretrained(model_id)
Expand Down Expand Up @@ -409,9 +459,14 @@ def test_lora_train_and_save(self):
# assert that the model is loaded without giving OSError
AutoModelForCausalLM.from_pretrained(self.tmp_dir)

@pytest.mark.parametrize(
"model_id",
[
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
],
)
@require_sklearn
def test_compute_metrics(self):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
def test_compute_metrics(self, model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
ref_model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
Expand Down
13 changes: 10 additions & 3 deletions tests/experimental/test_grpo_with_replay_buffer_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,12 @@ def test_sample(self):
@pytest.mark.low_priority
class TestUpdateWithReplayBuffer:
def setup_method(self):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
config = GRPOWithReplayBufferConfig(
replay_buffer_size=5,
)
self.trainer = GRPOWithReplayBufferTrainer(
model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
model=model_id,
reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
args=config,
train_dataset=None,
Expand Down Expand Up @@ -251,7 +252,13 @@ def test_update_with_inputs_different_seq_len(self):

@pytest.mark.low_priority
class TestGRPOWithReplayBufferTrainer(TrlTestCase):
def test_training_with_replay_buffer(self):
@pytest.mark.parametrize(
"model_id",
[
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
],
)
def test_training_with_replay_buffer(self, model_id):
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")

# Guarantee that some rewards have 0 std
Expand All @@ -271,7 +278,7 @@ def custom_reward_func(completions, **kwargs):
report_to="none",
)
trainer = GRPOWithReplayBufferTrainer(
model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
model=model_id,
reward_funcs=[custom_reward_func],
args=training_args,
train_dataset=dataset,
Expand Down
10 changes: 8 additions & 2 deletions tests/experimental/test_trainers_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
from datasets import load_dataset
from transformers import AutoTokenizer

Expand All @@ -21,9 +22,14 @@


class TestTrainerArg(TrlTestCase):
@pytest.mark.parametrize(
"model_id",
[
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
],
)
@require_sklearn
def test_bco(self):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
def test_bco(self, model_id):
tokenizer = AutoTokenizer.from_pretrained(model_id)
dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train")
training_args = BCOConfig(
Expand Down
30 changes: 22 additions & 8 deletions tests/test_activation_offloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import pytest
import torch
from torch import nn
from transformers import AutoModelForCausalLM
Expand All @@ -29,11 +28,16 @@


class TestActivationOffloading(TrlTestCase):
@pytest.mark.parametrize(
"model_id",
[
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
],
)
@require_torch_accelerator
@require_peft
def test_offloading_with_peft_models(self) -> None:
def test_offloading_with_peft_models(self, model_id) -> None:
"""Test that activation offloading works with PEFT models."""
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
peft_config = LoraConfig(
lora_alpha=16,
Expand Down Expand Up @@ -76,9 +80,14 @@ def test_offloading_with_peft_models(self) -> None:
f"Gradient mismatch for {name_orig}"
)

@pytest.mark.parametrize(
"model_id",
[
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
],
)
@require_torch_accelerator
def test_noop_manager_with_offloading(self):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
def test_noop_manager_with_offloading(self, model_id):
model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)
inp = torch.randint(0, 100, (2, 10), device=torch_device)

Expand Down Expand Up @@ -123,10 +132,15 @@ def test_min_offload_size(self):
# The test passes if no errors occur, as we're mainly testing
# that the logic handles both offloaded and non-offloaded tensors

@pytest.mark.parametrize(
"model_id",
[
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
],
)
@require_torch_accelerator
def test_real_hf_model(self):
def test_real_hf_model(self, model_id):
"""Test with an actual HuggingFace model"""
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
model = AutoModelForCausalLM.from_pretrained(model_id).to(torch_device)

# Create small input
Expand Down
22 changes: 13 additions & 9 deletions tests/test_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,10 @@ def __init__(self, model, ref_model, args, train_dataset, eval_dataset, processi

class TestWinRateCallback(TrlTestCase):
def setup_method(self):
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(model_id)
self.ref_model = AutoModelForCausalLM.from_pretrained(model_id)
self.tokenizer = AutoTokenizer.from_pretrained(model_id)
self.tokenizer.pad_token = self.tokenizer.eos_token
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
dataset["train"] = dataset["train"].select(range(8))
Expand Down Expand Up @@ -224,8 +225,9 @@ def test_lora(self):

class TestLogCompletionsCallback(TrlTestCase):
def setup_method(self):
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(model_id)
self.tokenizer = AutoTokenizer.from_pretrained(model_id)
self.tokenizer.pad_token = self.tokenizer.eos_token
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
dataset["train"] = dataset["train"].select(range(8))
Expand Down Expand Up @@ -318,8 +320,9 @@ def test_basic_comet(self):
@require_mergekit
class TestMergeModelCallback(TrlTestCase):
def setup_method(self):
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(model_id)
self.tokenizer = AutoTokenizer.from_pretrained(model_id)
self.dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")

def test_callback(self):
Expand Down Expand Up @@ -374,8 +377,9 @@ def test_every_checkpoint(self):

class TestBEMACallback(TrlTestCase):
def setup_method(self):
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(model_id)
self.tokenizer = AutoTokenizer.from_pretrained(model_id)
self.tokenizer.pad_token = self.tokenizer.eos_token
dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling")

Expand Down
Loading
Loading