Skip to content

Commit b9af152

Browse files
authored
[tokenizer] sanitize saved config (#21483)
* [tokenizer] sanitize saved config * rm config["name_or_path"] test
1 parent 67d0748 commit b9af152

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

src/transformers/tokenization_utils_base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2153,6 +2153,10 @@ def convert_added_tokens(obj: Union[AddedToken, Any], add_type_field=True):
21532153
if self._auto_class is not None:
21542154
custom_object_save(self, save_directory, config=tokenizer_config)
21552155

2156+
# remove private information
2157+
if "name_or_path" in tokenizer_config:
2158+
tokenizer_config.pop("name_or_path")
2159+
21562160
with open(tokenizer_config_file, "w", encoding="utf-8") as f:
21572161
out_str = json.dumps(tokenizer_config, indent=2, sort_keys=True, ensure_ascii=False) + "\n"
21582162
f.write(out_str)

tests/models/auto/test_tokenization_auto.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,6 @@ def test_get_tokenizer_config(self):
230230

231231
# Check the class of the tokenizer was properly saved (note that it always saves the slow class).
232232
self.assertEqual(config["tokenizer_class"], "BertTokenizer")
233-
# Check other keys just to make sure the config was properly saved /reloaded.
234-
self.assertEqual(config["name_or_path"], SMALL_MODEL_IDENTIFIER)
235233

236234
def test_new_tokenizer_registration(self):
237235
try:

0 commit comments

Comments
 (0)