We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 3ced5d5 commit 8c75966Copy full SHA for 8c75966
python/sglang/srt/layers/quantization/fp8_utils.py
@@ -243,8 +243,8 @@ def apply_fp8_linear(
243
if _is_cuda:
244
qinput, x_scale = sglang_per_token_quant_fp8(input_2d)
245
else:
246
- qinput, x_scale = per_token_group_quant_fp8(
247
- input_2d, group_size=input_2d.shape[1]
+ qinput, x_scale = ops.scaled_fp8_quant(
+ input_2d, input_scale, use_per_token_if_dynamic=use_per_token_if_dynamic
248
)
249
250
if cutlass_fp8_supported:
0 commit comments