From 5044dd4ab0083ac39ecff9c3c1e880c83101bd85 Mon Sep 17 00:00:00 2001 From: Umang Yadav Date: Thu, 7 Aug 2025 21:07:28 +0000 Subject: [PATCH 1/2] [EXTERNAL] Cherry-pick https://github.com/llvm/llvm-project/pull/148205 --- .../llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 12 +++++++++--- .../bug-multi-operands-to-update-after-fold.mir | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 external/llvm-project/llvm/test/CodeGen/AMDGPU/bug-multi-operands-to-update-after-fold.mir diff --git a/external/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/external/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 4715906fc8e0..6a6dd1613e1d 100644 --- a/external/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/external/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1657,6 +1657,7 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI, for (MachineInstr *Copy : CopiesToReplace) Copy->addImplicitDefUseOperands(*MF); + SetVector ConstantFoldCandidates; for (FoldCandidate &Fold : FoldList) { assert(!Fold.isReg() || Fold.OpToFold); if (Fold.isReg() && Fold.OpToFold->getReg().isVirtual()) { @@ -1679,9 +1680,8 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI, << static_cast(Fold.UseOpNo) << " of " << *Fold.UseMI); - if (Fold.isImm() && tryConstantFoldOp(Fold.UseMI)) { - LLVM_DEBUG(dbgs() << "Constant folded " << *Fold.UseMI); - Changed = true; + if (Fold.isImm()) { + ConstantFoldCandidates.insert(Fold.UseMI); } } else if (Fold.Commuted) { @@ -1689,6 +1689,12 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI, TII->commuteInstruction(*Fold.UseMI, false); } } + for (MachineInstr *MI : ConstantFoldCandidates) { + if (tryConstantFoldOp(MI)) { + LLVM_DEBUG(dbgs() << "Constant folded " << *MI); + Changed = true; + } + } return true; } diff --git a/external/llvm-project/llvm/test/CodeGen/AMDGPU/bug-multi-operands-to-update-after-fold.mir b/external/llvm-project/llvm/test/CodeGen/AMDGPU/bug-multi-operands-to-update-after-fold.mir new file mode 100644 index 000000000000..d0c9740c6954 --- /dev/null +++ b/external/llvm-project/llvm/test/CodeGen/AMDGPU/bug-multi-operands-to-update-after-fold.mir @@ -0,0 +1,15 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx1031 -run-pass=si-fold-operands -o - %s | FileCheck %s +--- +name: snork +body: | + bb.0: + ; CHECK-LABEL: name: snork + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3 + ; CHECK-NEXT: SI_RETURN + %0:sreg_32 = S_MOV_B32 0 + %1:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1, %0, %subreg.sub2, %0, %subreg.sub3 + %2:sreg_32 = S_OR_B32 %1.sub0, %1.sub3, implicit-def dead $scc + SI_RETURN +... From 9983a40f2dba52474f42068430016078bf1abfba Mon Sep 17 00:00:00 2001 From: Umang Yadav Date: Fri, 8 Aug 2025 13:42:05 +0000 Subject: [PATCH 2/2] add test to ensure compilation passes --- .../multioperands_fold_backend_bug.mlir | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 mlir/test/rocmlir-driver/multioperands_fold_backend_bug.mlir diff --git a/mlir/test/rocmlir-driver/multioperands_fold_backend_bug.mlir b/mlir/test/rocmlir-driver/multioperands_fold_backend_bug.mlir new file mode 100644 index 000000000000..76e2a47ab662 --- /dev/null +++ b/mlir/test/rocmlir-driver/multioperands_fold_backend_bug.mlir @@ -0,0 +1,14 @@ +// This test is just ensure backend LLVM compiler is not crashing while compiling this kernel. It was crashing earlier and fixed by https://github.com/llvm/llvm-project/pull/148205. +// RUN: sed -e 's/##TOKEN_ARCH##/%arch/g' %s | rocmlir-driver --kernel-pipeline=migraphx,highlevel,gpu,binary --arch %arch | FileCheck %s +// CHECK: gpu.binary +module { + func.func @test(%arg0: !migraphx.shaped<1x40x9419x128xf16, 48225280x1205632x128x1>, %arg1: !migraphx.shaped<1x9419x40x128xf16, 48225280x5120x128x1>) -> !migraphx.shaped<1x40x9419x9419xf16, 3548702440x88717561x9419x1> attributes {kernel = "mixr", arch = "##TOKEN_ARCH##"} { + %0 = migraphx.literal(dense<8.831780e-02> : tensor<1xf16>) : <1xf16, 1> + %1 = migraphx.transpose %arg1 {permutation = [0, 2, 3, 1]} : <1x9419x40x128xf16, 48225280x5120x128x1> -> <1x40x128x9419xf16, 48225280x128x1x5120> + %2 = migraphx.dot %arg0, %1 {perf_config = "v3:128,256,4,64,16,8,1,1,2,1,1"} : <1x40x9419x128xf16, 48225280x1205632x128x1>, <1x40x128x9419xf16, 48225280x128x1x5120> -> <1x40x9419x9419xf16, 3548702440x88717561x9419x1> + %3 = migraphx.multibroadcast %0 {out_dyn_dims = [], out_lens = [1, 40, 9419, 9419]} : <1xf16, 1> -> <1x40x9419x9419xf16, 0x0x0x0> + %4 = migraphx.mul %2, %3 : <1x40x9419x9419xf16, 3548702440x88717561x9419x1>, <1x40x9419x9419xf16, 0x0x0x0> -> <1x40x9419x9419xf16, 3548702440x88717561x9419x1> + return %4 : !migraphx.shaped<1x40x9419x9419xf16, 3548702440x88717561x9419x1> + } +} +