@@ -261,44 +261,44 @@ def quant_tensor_k_quant_cpu(data, num_bits=4, group_size=32):
261261 scale: scale
262262 zero_point: zero point
263263 """
264- data = np .reshape (data , (- 1 , group_size )).astype (np .float32 ) # nb = data.shape[0], (nb, group_size)
264+ data = np .reshape (data , (- 1 , group_size )).astype (np .float32 ) # nb = data.shape[0], (nb, group_size)
265265 maxq = 2 ** num_bits - 1
266266 minq = 0
267- sum_x2 = np .sum (data ** 2 , axis = 1 , keepdims = True ) # (nb, 1)
268- av_x = np .sqrt (sum_x2 / group_size ) # (nb, 1)
269- weights = np .add (av_x , np .abs (data )) # (nb, group_size)
270- rmin = np .min (data , axis = 1 , keepdims = True ) # (nb, 1)
271- rmax = np .max (data , axis = 1 , keepdims = True ) # (nb, 1)
272- sum_w = np .sum (weights , axis = 1 , keepdims = True ) # (nb, 1)
273- sum_x = np .sum (weights * data , axis = 1 , keepdims = True ) # (nb, group_size)
274- iscale = np .ones (rmax .shape , dtype = data .dtype ) # (nb, 1)
267+ sum_x2 = np .sum (data ** 2 , axis = 1 , keepdims = True ) # (nb, 1)
268+ av_x = np .sqrt (sum_x2 / group_size ) # (nb, 1)
269+ weights = np .add (av_x , np .abs (data )) # (nb, group_size)
270+ rmin = np .min (data , axis = 1 , keepdims = True ) # (nb, 1)
271+ rmax = np .max (data , axis = 1 , keepdims = True ) # (nb, 1)
272+ sum_w = np .sum (weights , axis = 1 , keepdims = True ) # (nb, 1)
273+ sum_x = np .sum (weights * data , axis = 1 , keepdims = True ) # (nb, group_size)
274+ iscale = np .ones (rmax .shape , dtype = data .dtype ) # (nb, 1)
275275 mask = rmin != rmax
276276 iscale [mask ] = (maxq - minq ) / (rmax [mask ] - rmin [mask ])
277277 scale = 1 / iscale
278- quant_data = np .clip (np .round (iscale * (data - rmin )), minq , maxq ) # (nb, group_size)
279- diff = scale * quant_data + rmin - data # (nb, group_size)
280- best_mad = np .sum (weights * diff ** 2 , axis = 1 , keepdims = True ) # (nb, 1)
278+ quant_data = np .clip (np .round (iscale * (data - rmin )), minq , maxq ) # (nb, group_size)
279+ diff = scale * quant_data + rmin - data # (nb, group_size)
280+ best_mad = np .sum (weights * diff ** 2 , axis = 1 , keepdims = True ) # (nb, 1)
281281 nstep = 20
282282 rdelta = 0.1
283283 # nstep * rdelta = -2 * rrmin, maxq - minq = 2**num_bits - 1
284284 rrmin = - 1
285285 for is_ in range (nstep ):
286- iscale_new = np .ones (rmax .shape , dtype = data .dtype ) # (nb, 1)
286+ iscale_new = np .ones (rmax .shape , dtype = data .dtype ) # (nb, 1)
287287 factor = np .array ([rrmin + rdelta * is_ + maxq - minq ]).astype (data .dtype )[0 ]
288288 mask = rmin != rmax
289289 iscale_new [mask ] = factor / (rmax [mask ] - rmin [mask ])
290- quant_data_new = np .clip (np .round (iscale_new * (data - rmin )), minq , maxq ) # (nb, group_size)
290+ quant_data_new = np .clip (np .round (iscale_new * (data - rmin )), minq , maxq ) # (nb, group_size)
291291 mul_weights_quant_data_new = weights * quant_data_new
292- sum_l = np .sum (mul_weights_quant_data_new , axis = 1 , keepdims = True ) # (nb, 1)
293- sum_l2 = np .sum (mul_weights_quant_data_new * quant_data_new , axis = 1 , keepdims = True ) # (nb, 1)
294- sum_xl = np .sum (mul_weights_quant_data_new * data , axis = 1 , keepdims = True ) # (nb, 1)
295- D = np .subtract (sum_w * sum_l2 , sum_l ** 2 ) # (nb, 1)
292+ sum_l = np .sum (mul_weights_quant_data_new , axis = 1 , keepdims = True ) # (nb, 1)
293+ sum_l2 = np .sum (mul_weights_quant_data_new * quant_data_new , axis = 1 , keepdims = True ) # (nb, 1)
294+ sum_xl = np .sum (mul_weights_quant_data_new * data , axis = 1 , keepdims = True ) # (nb, 1)
295+ D = np .subtract (sum_w * sum_l2 , sum_l ** 2 ) # (nb, 1)
296296
297- this_scale = (sum_w * sum_xl - sum_x * sum_l ) / D # (nb, 1)
298- this_min = (sum_l2 * sum_x - sum_l * sum_xl ) / D # (nb, 1)
297+ this_scale = (sum_w * sum_xl - sum_x * sum_l ) / D # (nb, 1)
298+ this_min = (sum_l2 * sum_x - sum_l * sum_xl ) / D # (nb, 1)
299299
300- diff = this_scale * quant_data_new + this_min - data # (nb, group_size)
301- mad = np .sum (weights * diff ** 2 , axis = 1 , keepdims = True ) # (nb, 1)
300+ diff = this_scale * quant_data_new + this_min - data # (nb, group_size)
301+ mad = np .sum (weights * diff ** 2 , axis = 1 , keepdims = True ) # (nb, 1)
302302
303303 mad_1 = np .array (mad )
304304 best_mad_1 = np .array (best_mad )
@@ -318,6 +318,7 @@ def quant_tensor_k_quant_cpu(data, num_bits=4, group_size=32):
318318
319319 return q_weight , scale , zero_point
320320
321+
321322def quant_tensor_k_quant_cuda (data , num_bits = 4 , group_size = 32 ):
322323 """Quantize tensor per group based on k quant.
323324 Ref: https://github.com/ggml-org/llama.cpp/blob/64eda5deb9859e87a020e56bab5d2f9ca956f1de/ggml/src/ggml-quants.c
0 commit comments