Fix torch compile errors (sgl-project#5158)

kkHuang-amd · finger92 · commit 8c7596677935 · 2025-04-10T16:07:54.000+08:00
diff --git a/python/sglang/srt/layers/quantization/fp8_utils.py b/python/sglang/srt/layers/quantization/fp8_utils.py
@@ -243,8 +243,8 @@ def apply_fp8_linear(
         if _is_cuda:
             qinput, x_scale = sglang_per_token_quant_fp8(input_2d)
         else:
-            qinput, x_scale = per_token_group_quant_fp8(
-                input_2d, group_size=input_2d.shape[1]
+            qinput, x_scale = ops.scaled_fp8_quant(
+                input_2d, input_scale, use_per_token_if_dynamic=use_per_token_if_dynamic
             )
 
     if cutlass_fp8_supported: