@@ -54,6 +54,9 @@ class GgufIntegrationTests(unittest.TestCase):
5454 gpt2_model_id = "mradermacher/gpt2-GGUF"
5555 gpt2_original_model_id = "openai-community/gpt2"
5656 gpt2_xl_model_id = "RichardErkhov/openai-community_-_gpt2-xl-gguf"
57+ starcoder2_model_id = "QuantFactory/starcoder2-3b-GGUF"
58+ starcoder2_fp16_model_id = "brittlewis12/starcoder2-3b-GGUF"
59+ starcoder2_original_model_id = "bigcode/starcoder2-3b"
5760
5861 # standard quants
5962 q4_0_gguf_model_id = "tinyllama-1.1b-chat-v1.0.Q4_0.gguf"
@@ -93,6 +96,8 @@ class GgufIntegrationTests(unittest.TestCase):
9396 fp16_gpt2_model_id = "gpt2.f16.gguf"
9497 q8_gpt2_model_id = "gpt2.Q8_0.gguf"
9598 q6_k_gpt2_xl_model_id = "gpt2-xl.Q6_K.gguf"
99+ q6_k_starcoder2_model_id = "starcoder2-3b.Q6_K.gguf"
100+ fp16_starcoder2_gguf_model_id = "starcoder2-3b.fp16.gguf"
96101
97102 example_text = "Hello"
98103
@@ -650,6 +655,45 @@ def test_stablelm_weights_conversion_fp16(self):
650655 self .assertTrue (original_params .shape == converted_state_dict [layer_name ].shape )
651656 torch .testing .assert_close (original_params , converted_state_dict [layer_name ])
652657
658+ def test_starcoder2_weights_conversion_fp16 (self ):
659+ original_model = AutoModelForCausalLM .from_pretrained (
660+ self .starcoder2_original_model_id ,
661+ device_map = "auto" ,
662+ torch_dtype = torch .float16 ,
663+ )
664+
665+ converted_model = AutoModelForCausalLM .from_pretrained (
666+ self .starcoder2_fp16_model_id ,
667+ gguf_file = self .fp16_starcoder2_gguf_model_id ,
668+ device_map = "auto" ,
669+ torch_dtype = torch .float16 ,
670+ )
671+
672+ converted_state_dict = converted_model .state_dict ()
673+ original_state_dict = original_model .state_dict ()
674+
675+ for layer_name , original_params in original_state_dict .items ():
676+ if layer_name in converted_state_dict and layer_name != "lm_head.weight" :
677+ # quantized models do not contain "lm_head.weight" layer
678+ self .assertTrue (original_params .shape == converted_state_dict [layer_name ].shape )
679+ torch .testing .assert_close (original_params , converted_state_dict [layer_name ])
680+
681+ def test_starcoder2_q6_k (self ):
682+ example_function_text = "def print_hello_world():"
683+ model = AutoModelForCausalLM .from_pretrained (
684+ self .starcoder2_model_id ,
685+ gguf_file = self .q6_k_starcoder2_model_id ,
686+ device_map = "auto" ,
687+ torch_dtype = torch .float16 ,
688+ )
689+
690+ tokenizer = AutoTokenizer .from_pretrained (self .starcoder2_model_id , gguf_file = self .q6_k_starcoder2_model_id )
691+ text = tokenizer (example_function_text , return_tensors = "pt" ).to (torch_device )
692+ out = model .generate (** text , max_new_tokens = 10 )
693+
694+ EXPECTED_TEXT = 'def print_hello_world():\n \t print("Hello World")\n \n def print'
695+ self .assertEqual (tokenizer .decode (out [0 ], skip_special_tokens = True ), EXPECTED_TEXT )
696+
653697 def test_tokenization_xnli (self ):
654698 import tqdm
655699 from datasets import load_dataset
0 commit comments