Skip to content

Commit 61bf96c

Browse files
Move tests of BCO trainer args to tests/experimental (#4354)
Co-authored-by: Quentin Gallouédec <[email protected]>
1 parent b8f23ef commit 61bf96c

File tree

2 files changed

+71
-51
lines changed

2 files changed

+71
-51
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Copyright 2020-2025 The HuggingFace Team. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from datasets import load_dataset
16+
from transformers import AutoTokenizer
17+
18+
from trl.experimental.bco import BCOConfig, BCOTrainer
19+
20+
from ..testing_utils import TrlTestCase, require_sklearn
21+
22+
23+
class TestTrainerArg(TrlTestCase):
24+
@require_sklearn
25+
def test_bco(self):
26+
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
27+
tokenizer = AutoTokenizer.from_pretrained(model_id)
28+
dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train")
29+
training_args = BCOConfig(
30+
self.tmp_dir,
31+
max_length=256,
32+
max_prompt_length=64,
33+
max_completion_length=64,
34+
beta=0.5,
35+
label_pad_token_id=-99,
36+
padding_value=-99,
37+
truncation_mode="keep_start",
38+
# generate_during_eval=True, # ignore this one, it requires wandb
39+
is_encoder_decoder=True,
40+
precompute_ref_log_probs=True,
41+
model_init_kwargs={"trust_remote_code": True},
42+
ref_model_init_kwargs={"trust_remote_code": True},
43+
dataset_num_proc=4,
44+
prompt_sample_size=512,
45+
min_density_ratio=0.2,
46+
max_density_ratio=20.0,
47+
)
48+
trainer = BCOTrainer(
49+
model=model_id,
50+
ref_model=model_id,
51+
args=training_args,
52+
train_dataset=dataset,
53+
processing_class=tokenizer,
54+
)
55+
assert trainer.args.max_length == 256
56+
assert trainer.args.max_prompt_length == 64
57+
assert trainer.args.max_completion_length == 64
58+
assert trainer.args.beta == 0.5
59+
assert trainer.args.label_pad_token_id == -99
60+
assert trainer.args.padding_value == -99
61+
assert trainer.args.truncation_mode == "keep_start"
62+
# self.assertEqual(trainer.args.generate_during_eval, True)
63+
assert trainer.args.is_encoder_decoder
64+
assert trainer.args.precompute_ref_log_probs
65+
assert trainer.args.model_init_kwargs == {"trust_remote_code": True}
66+
assert trainer.args.ref_model_init_kwargs == {"trust_remote_code": True}
67+
assert trainer.args.dataset_num_proc == 4
68+
assert trainer.args.prompt_sample_size == 512
69+
assert trainer.args.min_density_ratio == 0.2
70+
assert trainer.args.max_density_ratio == 20.0

tests/test_trainers_args.py

Lines changed: 1 addition & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616
from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer
1717

1818
from trl import (
19-
BCOConfig,
20-
BCOTrainer,
2119
CPOConfig,
2220
CPOTrainer,
2321
DPOConfig,
@@ -39,58 +37,10 @@
3937
XPOTrainer,
4038
)
4139

42-
from .testing_utils import TrlTestCase, require_sklearn
40+
from .testing_utils import TrlTestCase
4341

4442

4543
class TestTrainerArg(TrlTestCase):
46-
@require_sklearn
47-
def test_bco(self):
48-
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
49-
tokenizer = AutoTokenizer.from_pretrained(model_id)
50-
dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train")
51-
training_args = BCOConfig(
52-
self.tmp_dir,
53-
max_length=256,
54-
max_prompt_length=64,
55-
max_completion_length=64,
56-
beta=0.5,
57-
label_pad_token_id=-99,
58-
padding_value=-99,
59-
truncation_mode="keep_start",
60-
# generate_during_eval=True, # ignore this one, it requires wandb
61-
is_encoder_decoder=True,
62-
precompute_ref_log_probs=True,
63-
model_init_kwargs={"trust_remote_code": True},
64-
ref_model_init_kwargs={"trust_remote_code": True},
65-
dataset_num_proc=4,
66-
prompt_sample_size=512,
67-
min_density_ratio=0.2,
68-
max_density_ratio=20.0,
69-
)
70-
trainer = BCOTrainer(
71-
model=model_id,
72-
ref_model=model_id,
73-
args=training_args,
74-
train_dataset=dataset,
75-
processing_class=tokenizer,
76-
)
77-
assert trainer.args.max_length == 256
78-
assert trainer.args.max_prompt_length == 64
79-
assert trainer.args.max_completion_length == 64
80-
assert trainer.args.beta == 0.5
81-
assert trainer.args.label_pad_token_id == -99
82-
assert trainer.args.padding_value == -99
83-
assert trainer.args.truncation_mode == "keep_start"
84-
# self.assertEqual(trainer.args.generate_during_eval, True)
85-
assert trainer.args.is_encoder_decoder
86-
assert trainer.args.precompute_ref_log_probs
87-
assert trainer.args.model_init_kwargs == {"trust_remote_code": True}
88-
assert trainer.args.ref_model_init_kwargs == {"trust_remote_code": True}
89-
assert trainer.args.dataset_num_proc == 4
90-
assert trainer.args.prompt_sample_size == 512
91-
assert trainer.args.min_density_ratio == 0.2
92-
assert trainer.args.max_density_ratio == 20.0
93-
9444
def test_cpo(self):
9545
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
9646
tokenizer = AutoTokenizer.from_pretrained(model_id)

0 commit comments

Comments
 (0)