[BugFix] Retain original dtype for topk_weights

peakcrosser7 · peakcrosser7 · commit 2b51947a46e9 · 2025-10-31T06:56:06.000Z
Signed-off-by: huanghaoyan.hhy &lt;huanghaoyan.hhy@alibaba-inc.com&gt;
diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py
@@ -230,6 +230,7 @@ def flashinfer_alltoall_dispatch(
     max_num_token = (
         max(global_num_tokens_cpu) if global_num_tokens_cpu is not None else x.shape[0]
     )
+    topk_weights_dtype = topk_weights.dtype
     alltoall_info, topk_ids, topk_weights, _ = (
         MnnvlMoe.mnnvl_moe_alltoallv_prepare_without_allgather(
             topk_ids,
@@ -244,6 +245,9 @@ def flashinfer_alltoall_dispatch(
             top_k,
         )
     )
+    # NOTE: Restore original dtype, as FlashInfer casts topk_weights
+    #       to int32. Can be removed after the bug is fixed.
+    topk_weights = topk_weights.view(topk_weights_dtype)
 
     x, x_sf = moe_kernel_quantize_input(
         x,

Original file line number	Diff line number	Diff line change
`@@ -230,6 +230,7 @@ def flashinfer_alltoall_dispatch(`
`230`	`230`	`max_num_token = (`
`231`	`231`	`max(global_num_tokens_cpu) if global_num_tokens_cpu is not None else x.shape[0]`
`232`	`232`	`)`
	`233`	`+ topk_weights_dtype = topk_weights.dtype`
`233`	`234`	`alltoall_info, topk_ids, topk_weights, _ = (`
`234`	`235`	`MnnvlMoe.mnnvl_moe_alltoallv_prepare_without_allgather(`
`235`	`236`	`topk_ids,`
`@@ -244,6 +245,9 @@ def flashinfer_alltoall_dispatch(`
`244`	`245`	`top_k,`
`245`	`246`	`)`
`246`	`247`	`)`
	`248`	`+ # NOTE: Restore original dtype, as FlashInfer casts topk_weights`
	`249`	`+ # to int32. Can be removed after the bug is fixed.`
	`250`	`+ topk_weights = topk_weights.view(topk_weights_dtype)`
`247`	`251`
`248`	`252`	`x, x_sf = moe_kernel_quantize_input(`
`249`	`253`	`x,`