1818
1919logger = logging .getLogger ("gguf-convert-endian" )
2020
21+ def byteswap_q4_0 (tensor , block_offs ):
22+ # Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations.
23+
24+ # Byte-Swap f16 sized delta field
25+ delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
26+ delta .byteswap (inplace = True )
27+
28+ def byteswap_q8_0 (tensor , block_offs ):
29+ # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
30+
31+ # Byte-Swap f16 sized delta field
32+ delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
33+ delta .byteswap (inplace = True )
34+
35+ def byteswap_q4_k (tensor , block_offs ):
36+ # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
37+
38+ # Byte-Swap f16 sized fields
39+ delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
40+ delta .byteswap (inplace = True )
41+
42+ delta = tensor .data [block_offs + 2 :block_offs + 4 ].view (dtype = np .uint16 )
43+ delta .byteswap (inplace = True )
44+
45+ def byteswap_q6_k (tensor , block_offs ):
46+ # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
47+
48+ # Byte-Swap f16 sized field
49+ delta = tensor .data [block_offs + 208 :block_offs + 210 ].view (dtype = np .uint16 )
50+ delta .byteswap (inplace = True )
51+
52+ byteswap_tensors = {
53+ gguf .GGMLQuantizationType .Q4_0 : {
54+ "block_size" : 18 , # 18 bytes = <f16 delta scaling factor> + 16 * <int8 quant>
55+ "byteswap_func" : byteswap_q4_0 ,
56+ },
57+ gguf .GGMLQuantizationType .Q8_0 : {
58+ "block_size" : 34 , # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
59+ "byteswap_func" : byteswap_q8_0 ,
60+ },
61+ gguf .GGMLQuantizationType .Q4_K : {
62+ "block_size" : 144 , # 144 bytes = 2 * <f16 delta scaling factor> + 140 * <int8 quant>
63+ "byteswap_func" : byteswap_q4_k ,
64+ },
65+ gguf .GGMLQuantizationType .Q6_K : {
66+ "block_size" : 210 , # 210 bytes = <f16 delta scaling factor> + 208 * <int8 quant>
67+ "byteswap_func" : byteswap_q6_k ,
68+ },
69+ }
2170
2271def convert_byteorder (reader : gguf .GGUFReader , args : argparse .Namespace ) -> None :
2372 file_endian = reader .endianess .name
@@ -32,13 +81,10 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
3281 sys .exit (0 )
3382 logger .info ("* Checking tensors for conversion compatibility" )
3483 for tensor in reader .tensors :
35- if tensor .tensor_type not in (
36- gguf .GGMLQuantizationType .F32 ,
37- gguf .GGMLQuantizationType .F16 ,
38- gguf .GGMLQuantizationType .Q4_0 ,
39- gguf .GGMLQuantizationType .Q8_0 ,
40- gguf .GGMLQuantizationType .Q4_K ,
41- gguf .GGMLQuantizationType .Q6_K ,
84+ if tensor .tensor_type not in byteswap_tensors and \
85+ tensor .tensor_type not in (
86+ gguf .GGMLQuantizationType .F32 ,
87+ gguf .GGMLQuantizationType .F16 ,
4288 ):
4389 raise ValueError (f"Cannot handle type { tensor .tensor_type .name } for tensor { repr (tensor .name )} " )
4490 logger .info (f"* Preparing to convert from { file_endian } to { order } " )
@@ -73,69 +119,7 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
73119 part .byteswap (inplace = True )
74120
75121 # Byte-swap tensor data if necessary
76- if tensor .tensor_type == gguf .GGMLQuantizationType .Q4_0 :
77- # Handle Q4_0 tensor blocks (block_q4_0)
78- # Specific handling of block_q4_0 is required.
79- # Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations.
80-
81- # first flatten structure
82- oldshape = tensor .data .shape
83- newshape = 1
84- for i in tensor .data .shape :
85- newshape *= i
86-
87- tensor .data .resize (newshape )
88-
89- block_size = 18 # 18 bytes = <f16 delta scaling factor> + 16 * <int8 quant>
90-
91- n_blocks = len (tensor .data ) // block_size
92- for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
93- block_offs = block_num * block_size
94-
95- # Byte-Swap f16 sized delta field
96- delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
97- delta .byteswap (inplace = True )
98-
99- # Byte-Swap Q8 weights
100- if block_num % 100000 == 0 :
101- inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
102-
103- # restore old shape in case it's ever used
104- tensor .data .resize (oldshape )
105- elif tensor .tensor_type == gguf .GGMLQuantizationType .Q8_0 :
106- # Handle Q8_0 tensor blocks (block_q8_0)
107- # Specific handling of block_q8_0 is required.
108- # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
109-
110- # first flatten structure
111- oldshape = tensor .data .shape
112- newshape = 1
113- for i in tensor .data .shape :
114- newshape *= i
115-
116- tensor .data .resize (newshape )
117-
118- block_size = 34 # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
119-
120- n_blocks = len (tensor .data ) // block_size
121- for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
122- block_offs = block_num * block_size
123-
124- # Byte-Swap f16 sized delta field
125- delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
126- delta .byteswap (inplace = True )
127-
128- # Byte-Swap Q8 weights
129- if block_num % 100000 == 0 :
130- inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
131-
132- # restore old shape in case it's ever used
133- tensor .data .resize (oldshape )
134- elif tensor .tensor_type == gguf .GGMLQuantizationType .Q4_K :
135- # Handle Q4_K tensor blocks (block_q4_k)
136- # Specific handling of block_q4_k is required.
137- # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
138-
122+ if tensor .tensor_type in byteswap_tensors :
139123 # first flatten structure
140124 oldshape = tensor .data .shape
141125 newshape = 1
@@ -144,47 +128,15 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
144128
145129 tensor .data .resize (newshape )
146130
147- block_size = 144
148- n_blocks = len (tensor .data ) // block_size
149- for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
150- block_offs = block_num * block_size
151-
152- # Byte-Swap f16 sized fields
153- delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
154- delta .byteswap (inplace = True )
155-
156- delta = tensor .data [block_offs + 2 :block_offs + 4 ].view (dtype = np .uint16 )
157- delta .byteswap (inplace = True )
158-
159- # Byte-Swap
160- if block_num % 100000 == 0 :
161- inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
162-
163- # restore old shape in case it's ever used
164- tensor .data .resize (oldshape )
165- elif tensor .tensor_type == gguf .GGMLQuantizationType .Q6_K :
166- # Handle Q6_K tensor blocks (block_q6_k)
167- # Specific handling of block_q6_k is required.
168- # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
169-
170- # first flatten structure
171- oldshape = tensor .data .shape
172- newshape = 1
173- for i in tensor .data .shape :
174- newshape *= i
175-
176- tensor .data .resize (newshape )
131+ block_size = byteswap_tensors [tensor .tensor_type ]["block_size" ]
132+ byteswap_func = byteswap_tensors [tensor .tensor_type ]["byteswap_func" ]
177133
178- block_size = 210
179134 n_blocks = len (tensor .data ) // block_size
180135 for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
181136 block_offs = block_num * block_size
182137
183- # Byte-Swap f16 sized field
184- delta = tensor .data [block_offs + 208 :block_offs + 210 ].view (dtype = np .uint16 )
185- delta .byteswap (inplace = True )
138+ byteswap_func (tensor , block_offs )
186139
187- # Byte-Swap
188140 if block_num % 100000 == 0 :
189141 inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
190142
0 commit comments