1515#include " AMDGPU.h"
1616#include " AMDGPUSubtarget.h"
1717#include " GCNHazardRecognizer.h"
18+ #include " MCTargetDesc/AMDGPUMCTargetDesc.h"
1819#include " SIDefines.h"
1920#include " SIMachineFunctionInfo.h"
2021#include " SIRegisterInfo.h"
21- #include " MCTargetDesc/AMDGPUMCTargetDesc.h"
2222#include " Utils/AMDGPUBaseInfo.h"
2323#include " llvm/ADT/APInt.h"
2424#include " llvm/ADT/ArrayRef.h"
2828#include " llvm/Analysis/AliasAnalysis.h"
2929#include " llvm/Analysis/MemoryLocation.h"
3030#include " llvm/Analysis/ValueTracking.h"
31+ #include " llvm/CodeGen/LiveVariables.h"
3132#include " llvm/CodeGen/MachineBasicBlock.h"
3233#include " llvm/CodeGen/MachineDominators.h"
3334#include " llvm/CodeGen/MachineFrameInfo.h"
@@ -2841,6 +2842,18 @@ static int64_t getFoldableImm(const MachineOperand* MO) {
28412842 return AMDGPU::NoRegister;
28422843}
28432844
2845+ static void updateLiveVariables (LiveVariables *LV, MachineInstr &MI,
2846+ MachineInstr &NewMI) {
2847+ if (LV) {
2848+ unsigned NumOps = MI.getNumOperands ();
2849+ for (unsigned I = 1 ; I < NumOps; ++I) {
2850+ MachineOperand &Op = MI.getOperand (I);
2851+ if (Op.isReg () && Op.isKill ())
2852+ LV->replaceKillInstruction (Op.getReg (), MI, NewMI);
2853+ }
2854+ }
2855+ }
2856+
28442857MachineInstr *SIInstrInfo::convertToThreeAddress (MachineFunction::iterator &MBB,
28452858 MachineInstr &MI,
28462859 LiveVariables *LV) const {
@@ -2888,43 +2901,53 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
28882901 const MachineOperand *Src2 = getNamedOperand (MI, AMDGPU::OpName::src2);
28892902 const MachineOperand *Clamp = getNamedOperand (MI, AMDGPU::OpName::clamp);
28902903 const MachineOperand *Omod = getNamedOperand (MI, AMDGPU::OpName::omod);
2904+ MachineInstrBuilder MIB;
28912905
28922906 if (!Src0Mods && !Src1Mods && !Clamp && !Omod &&
28932907 // If we have an SGPR input, we will violate the constant bus restriction.
2894- (ST.getConstantBusLimit (Opc) > 1 ||
2895- !Src0->isReg () ||
2908+ (ST.getConstantBusLimit (Opc) > 1 || !Src0->isReg () ||
28962909 !RI.isSGPRReg (MBB->getParent ()->getRegInfo (), Src0->getReg ()))) {
28972910 if (auto Imm = getFoldableImm (Src2)) {
28982911 unsigned NewOpc =
2899- IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
2900- : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
2901- if (pseudoToMCOpcode (NewOpc) != -1 )
2902- return BuildMI (*MBB, MI, MI.getDebugLoc (), get (NewOpc))
2903- .add (*Dst)
2904- .add (*Src0)
2905- .add (*Src1)
2906- .addImm (Imm);
2907- }
2908- unsigned NewOpc =
2909- IsFMA ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
2910- : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
2912+ IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
2913+ : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
2914+ if (pseudoToMCOpcode (NewOpc) != -1 ) {
2915+ MIB = BuildMI (*MBB, MI, MI.getDebugLoc (), get (NewOpc))
2916+ .add (*Dst)
2917+ .add (*Src0)
2918+ .add (*Src1)
2919+ .addImm (Imm);
2920+ updateLiveVariables (LV, MI, *MIB);
2921+ return MIB;
2922+ }
2923+ }
2924+ unsigned NewOpc = IsFMA
2925+ ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
2926+ : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
29112927 if (auto Imm = getFoldableImm (Src1)) {
2912- if (pseudoToMCOpcode (NewOpc) != -1 )
2913- return BuildMI (*MBB, MI, MI.getDebugLoc (), get (NewOpc))
2914- .add (*Dst)
2915- .add (*Src0)
2916- .addImm (Imm)
2917- .add (*Src2);
2928+ if (pseudoToMCOpcode (NewOpc) != -1 ) {
2929+ MIB = BuildMI (*MBB, MI, MI.getDebugLoc (), get (NewOpc))
2930+ .add (*Dst)
2931+ .add (*Src0)
2932+ .addImm (Imm)
2933+ .add (*Src2);
2934+ updateLiveVariables (LV, MI, *MIB);
2935+ return MIB;
2936+ }
29182937 }
29192938 if (auto Imm = getFoldableImm (Src0)) {
29202939 if (pseudoToMCOpcode (NewOpc) != -1 &&
2921- isOperandLegal (MI, AMDGPU::getNamedOperandIdx (NewOpc,
2922- AMDGPU::OpName::src0), Src1))
2923- return BuildMI (*MBB, MI, MI.getDebugLoc (), get (NewOpc))
2924- .add (*Dst)
2925- .add (*Src1)
2926- .addImm (Imm)
2927- .add (*Src2);
2940+ isOperandLegal (
2941+ MI, AMDGPU::getNamedOperandIdx (NewOpc, AMDGPU::OpName::src0),
2942+ Src1)) {
2943+ MIB = BuildMI (*MBB, MI, MI.getDebugLoc (), get (NewOpc))
2944+ .add (*Dst)
2945+ .add (*Src1)
2946+ .addImm (Imm)
2947+ .add (*Src2);
2948+ updateLiveVariables (LV, MI, *MIB);
2949+ return MIB;
2950+ }
29282951 }
29292952 }
29302953
@@ -2933,16 +2956,18 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
29332956 if (pseudoToMCOpcode (NewOpc) == -1 )
29342957 return nullptr ;
29352958
2936- return BuildMI (*MBB, MI, MI.getDebugLoc (), get (NewOpc))
2937- .add (*Dst)
2938- .addImm (Src0Mods ? Src0Mods->getImm () : 0 )
2939- .add (*Src0)
2940- .addImm (Src1Mods ? Src1Mods->getImm () : 0 )
2941- .add (*Src1)
2942- .addImm (0 ) // Src mods
2943- .add (*Src2)
2944- .addImm (Clamp ? Clamp->getImm () : 0 )
2945- .addImm (Omod ? Omod->getImm () : 0 );
2959+ MIB = BuildMI (*MBB, MI, MI.getDebugLoc (), get (NewOpc))
2960+ .add (*Dst)
2961+ .addImm (Src0Mods ? Src0Mods->getImm () : 0 )
2962+ .add (*Src0)
2963+ .addImm (Src1Mods ? Src1Mods->getImm () : 0 )
2964+ .add (*Src1)
2965+ .addImm (0 ) // Src mods
2966+ .add (*Src2)
2967+ .addImm (Clamp ? Clamp->getImm () : 0 )
2968+ .addImm (Omod ? Omod->getImm () : 0 );
2969+ updateLiveVariables (LV, MI, *MIB);
2970+ return MIB;
29462971}
29472972
29482973// It's not generally safe to move VALU instructions across these since it will
0 commit comments