@@ -230,7 +230,7 @@ def _get_part_names(self):
230230 def _set_vocab_gpt2 (self ):
231231 dir_model = self .dir_model
232232 hparams = self .hparams
233- tokens : list [bytearray ] = []
233+ tokens : list [str ] = []
234234 toktypes : list [int ] = []
235235
236236 from transformers import AutoTokenizer
@@ -243,8 +243,7 @@ def _set_vocab_gpt2(self):
243243
244244 for i in range (vocab_size ):
245245 if i not in reverse_vocab :
246- pad_token = f"[PAD{ i } ]" .encode ('utf-8' )
247- tokens .append (bytearray (pad_token ))
246+ tokens .append (f"[PAD{ i } ]" )
248247 toktypes .append (gguf .TokenType .USER_DEFINED )
249248 elif reverse_vocab [i ] in added_vocab :
250249 tokens .append (reverse_vocab [i ])
@@ -266,7 +265,7 @@ def _set_vocab_gpt2(self):
266265 def _set_vocab_qwen (self ):
267266 dir_model = self .dir_model
268267 hparams = self .hparams
269- tokens : list [bytearray ] = []
268+ tokens : list [str ] = []
270269 toktypes : list [int ] = []
271270
272271 from transformers import AutoTokenizer
@@ -291,8 +290,7 @@ def _set_vocab_qwen(self):
291290
292291 for i in range (vocab_size ):
293292 if i not in reverse_vocab :
294- pad_token = f"[PAD{ i } ]" .encode ("utf-8" )
295- tokens .append (bytearray (pad_token ))
293+ tokens .append (f"[PAD{ i } ]" )
296294 toktypes .append (gguf .TokenType .USER_DEFINED )
297295 elif reverse_vocab [i ] in added_vocab :
298296 tokens .append (reverse_vocab [i ])
0 commit comments