|
94 | 94 | "output.weight": "lm_head.weight", |
95 | 95 | "output_norm": "model.norm", |
96 | 96 | }, |
| 97 | + "phi3": { |
| 98 | + "token_embd": "model.embed_tokens", |
| 99 | + "blk": "model.layers", |
| 100 | + "ffn_up": "mlp.gate_up_proj", |
| 101 | + "ffn_down": "mlp.down_proj", |
| 102 | + "ffn_gate": "mlp.gate_up_proj", |
| 103 | + "ffn_norm": "post_attention_layernorm", |
| 104 | + "attn_norm": "input_layernorm", |
| 105 | + "attn_qkv": "self_attn.qkv_proj", |
| 106 | + "attn_output": "self_attn.o_proj", |
| 107 | + "output.weight": "lm_head.weight", |
| 108 | + "output_norm": "model.norm", |
| 109 | + }, |
97 | 110 | } |
98 | 111 |
|
99 | 112 |
|
|
156 | 169 | "ggml.unknown_token_id": "unk_token_id", |
157 | 170 | "ggml.padding_token_id": "pad_token_id", |
158 | 171 | }, |
| 172 | + "phi3": { |
| 173 | + "context_length": "max_position_embeddings", |
| 174 | + "block_count": "num_hidden_layers", |
| 175 | + "feed_forward_length": "intermediate_size", |
| 176 | + "embedding_length": "hidden_size", |
| 177 | + "rope.dimension_count": None, |
| 178 | + "rope.freq_base": "rope_theta", |
| 179 | + "attention.head_count": "num_attention_heads", |
| 180 | + "attention.head_count_kv": "num_key_value_heads", |
| 181 | + "attention.layer_norm_rms_epsilon": "rms_norm_eps", |
| 182 | + "vocab_size": "vocab_size", |
| 183 | + }, |
159 | 184 | } |
160 | 185 |
|
161 | 186 | GGUF_TOKENIZER_MAPPING = { |
@@ -390,10 +415,86 @@ def converted(self) -> Tokenizer: |
390 | 415 | return tokenizer |
391 | 416 |
|
392 | 417 |
|
| 418 | +class GGUFPhi3Converter(LlamaConverter): |
| 419 | + def __init__(self, tokenizer_dict): |
| 420 | + self.proto = GGUFTokenizerSkeleton(tokenizer_dict) |
| 421 | + self.original_tokenizer = self.proto |
| 422 | + self.additional_kwargs = {} |
| 423 | + |
| 424 | + def vocab(self, proto): |
| 425 | + return list(zip(proto.tokens, proto.scores)) |
| 426 | + |
| 427 | + def merges(self, proto): |
| 428 | + return proto.merges |
| 429 | + |
| 430 | + def tokenizer(self, proto): |
| 431 | + vocab_scores = self.vocab(self.proto) |
| 432 | + merges = self.merges(self.proto) |
| 433 | + bpe_vocab = {word: i for i, (word, _score) in enumerate(vocab_scores)} |
| 434 | + |
| 435 | + tokenizer = Tokenizer(BPE(bpe_vocab, merges)) |
| 436 | + # add the special tokens from phi3 tokenizer config |
| 437 | + tokenizer.add_special_tokens( |
| 438 | + [ |
| 439 | + AddedToken("</s>", rstrip=True, lstrip=False, normalized=False, special=True), |
| 440 | + AddedToken("<|endoftext|>", normalized=False, special=True), |
| 441 | + AddedToken("<|assistant|>", rstrip=True, normalized=False, special=True), |
| 442 | + AddedToken("<|placeholder1|>", rstrip=True, normalized=False, special=True), |
| 443 | + AddedToken("<|placeholder2|>", rstrip=True, normalized=False, special=True), |
| 444 | + AddedToken("<|placeholder3|>", rstrip=True, normalized=False, special=True), |
| 445 | + AddedToken("<|placeholder4|>", rstrip=True, normalized=False, special=True), |
| 446 | + AddedToken("<|system|>", rstrip=True, normalized=False, special=True), |
| 447 | + AddedToken("<|end|>", rstrip=True, normalized=False, special=True), |
| 448 | + AddedToken("<|placeholder5|>", rstrip=True, normalized=False, special=True), |
| 449 | + AddedToken("<|placeholder6|>", rstrip=True, normalized=False, special=True), |
| 450 | + AddedToken("<|user|>", rstrip=True, normalized=False, special=True), |
| 451 | + ] |
| 452 | + ) |
| 453 | + |
| 454 | + self.additional_kwargs["unk_token"] = ( |
| 455 | + proto.tokens[proto.unk_token_id] if proto.unk_token_id is not None else None |
| 456 | + ) |
| 457 | + self.additional_kwargs["eos_token"] = ( |
| 458 | + proto.tokens[proto.eos_token_id] if proto.eos_token_id is not None else None |
| 459 | + ) |
| 460 | + self.additional_kwargs["bos_token"] = ( |
| 461 | + proto.tokens[proto.bos_token_id] if proto.bos_token_id is not None else None |
| 462 | + ) |
| 463 | + self.additional_kwargs["pad_token"] = ( |
| 464 | + proto.tokens[proto.pad_token_id] if proto.pad_token_id is not None else None |
| 465 | + ) |
| 466 | + |
| 467 | + return tokenizer |
| 468 | + |
| 469 | + def decoder(self, replacement, add_prefix_space): |
| 470 | + sequence = [ |
| 471 | + decoders.ByteFallback(), |
| 472 | + decoders.Fuse(), |
| 473 | + decoders.Replace(replacement, " "), |
| 474 | + ] |
| 475 | + |
| 476 | + if add_prefix_space: |
| 477 | + sequence += [decoders.Strip(content=" ", left=1)] |
| 478 | + return decoders.Sequence(sequence) |
| 479 | + |
| 480 | + def converted(self) -> Tokenizer: |
| 481 | + tokenizer = self.tokenizer(self.proto) |
| 482 | + |
| 483 | + replacement = "▁" |
| 484 | + add_prefix_space = True |
| 485 | + if hasattr(self.original_tokenizer, "add_prefix_space"): |
| 486 | + add_prefix_space = self.original_tokenizer.add_prefix_space |
| 487 | + |
| 488 | + tokenizer.decoder = self.decoder(replacement, add_prefix_space) |
| 489 | + |
| 490 | + return tokenizer |
| 491 | + |
| 492 | + |
393 | 493 | GGUF_TO_FAST_CONVERTERS = { |
394 | 494 | "llama": GGUFLlamaConverter, |
395 | 495 | "qwen2": GGUFQwen2Converter, |
396 | 496 | "qwen2_moe": GGUFQwen2Converter, |
| 497 | + "phi3": GGUFPhi3Converter, |
397 | 498 | } |
398 | 499 |
|
399 | 500 |
|
|
0 commit comments