3232#include " llvm/Analysis/AliasAnalysis.h"
3333#include " llvm/CodeGen/LivePhysRegs.h"
3434#include " llvm/CodeGen/MachineBasicBlock.h"
35+ #include " llvm/CodeGen/MachineDominators.h"
3536#include " llvm/CodeGen/MachineFunction.h"
3637#include " llvm/CodeGen/MachineFunctionPass.h"
3738#include " llvm/CodeGen/MachineInstr.h"
5051#include " llvm/IR/DerivedTypes.h"
5152#include " llvm/IR/Function.h"
5253#include " llvm/IR/Type.h"
54+ #include " llvm/InitializePasses.h"
5355#include " llvm/MC/MCInstrDesc.h"
5456#include " llvm/Pass.h"
5557#include " llvm/Support/Allocator.h"
@@ -1383,6 +1385,38 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
13831385 case ARM::t2STRi8:
13841386 case ARM::t2STRi12:
13851387 return ARM::t2STR_POST;
1388+
1389+ case ARM::MVE_VLDRBS16:
1390+ return ARM::MVE_VLDRBS16_post;
1391+ case ARM::MVE_VLDRBS32:
1392+ return ARM::MVE_VLDRBS32_post;
1393+ case ARM::MVE_VLDRBU16:
1394+ return ARM::MVE_VLDRBU16_post;
1395+ case ARM::MVE_VLDRBU32:
1396+ return ARM::MVE_VLDRBU32_post;
1397+ case ARM::MVE_VLDRHS32:
1398+ return ARM::MVE_VLDRHS32_post;
1399+ case ARM::MVE_VLDRHU32:
1400+ return ARM::MVE_VLDRHU32_post;
1401+ case ARM::MVE_VLDRBU8:
1402+ return ARM::MVE_VLDRBU8_post;
1403+ case ARM::MVE_VLDRHU16:
1404+ return ARM::MVE_VLDRHU16_post;
1405+ case ARM::MVE_VLDRWU32:
1406+ return ARM::MVE_VLDRWU32_post;
1407+ case ARM::MVE_VSTRB16:
1408+ return ARM::MVE_VSTRB16_post;
1409+ case ARM::MVE_VSTRB32:
1410+ return ARM::MVE_VSTRB32_post;
1411+ case ARM::MVE_VSTRH32:
1412+ return ARM::MVE_VSTRH32_post;
1413+ case ARM::MVE_VSTRBU8:
1414+ return ARM::MVE_VSTRBU8_post;
1415+ case ARM::MVE_VSTRHU16:
1416+ return ARM::MVE_VSTRHU16_post;
1417+ case ARM::MVE_VSTRWU32:
1418+ return ARM::MVE_VSTRWU32_post;
1419+
13861420 default : llvm_unreachable (" Unhandled opcode!" );
13871421 }
13881422}
@@ -2046,6 +2080,7 @@ namespace {
20462080 const TargetRegisterInfo *TRI;
20472081 const ARMSubtarget *STI;
20482082 MachineRegisterInfo *MRI;
2083+ MachineDominatorTree *DT;
20492084 MachineFunction *MF;
20502085
20512086 ARMPreAllocLoadStoreOpt () : MachineFunctionPass(ID) {}
@@ -2058,6 +2093,8 @@ namespace {
20582093
20592094 void getAnalysisUsage (AnalysisUsage &AU) const override {
20602095 AU.addRequired <AAResultsWrapperPass>();
2096+ AU.addRequired <MachineDominatorTree>();
2097+ AU.addPreserved <MachineDominatorTree>();
20612098 MachineFunctionPass::getAnalysisUsage (AU);
20622099 }
20632100
@@ -2071,14 +2108,19 @@ namespace {
20712108 unsigned Base, bool isLd,
20722109 DenseMap<MachineInstr*, unsigned > &MI2LocMap);
20732110 bool RescheduleLoadStoreInstrs (MachineBasicBlock *MBB);
2111+ bool DistributeIncrements ();
2112+ bool DistributeIncrements (Register Base);
20742113 };
20752114
20762115} // end anonymous namespace
20772116
20782117char ARMPreAllocLoadStoreOpt::ID = 0 ;
20792118
2080- INITIALIZE_PASS (ARMPreAllocLoadStoreOpt, " arm-prera-ldst-opt" ,
2081- ARM_PREALLOC_LOAD_STORE_OPT_NAME, false , false )
2119+ INITIALIZE_PASS_BEGIN (ARMPreAllocLoadStoreOpt, " arm-prera-ldst-opt" ,
2120+ ARM_PREALLOC_LOAD_STORE_OPT_NAME, false , false )
2121+ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
2122+ INITIALIZE_PASS_END(ARMPreAllocLoadStoreOpt, " arm-prera-ldst-opt" ,
2123+ ARM_PREALLOC_LOAD_STORE_OPT_NAME, false , false )
20822124
20832125// Limit the number of instructions to be rescheduled.
20842126// FIXME: tune this limit, and/or come up with some better heuristics.
@@ -2094,10 +2136,11 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
20942136 TII = STI->getInstrInfo ();
20952137 TRI = STI->getRegisterInfo ();
20962138 MRI = &Fn.getRegInfo ();
2139+ DT = &getAnalysis<MachineDominatorTree>();
20972140 MF = &Fn;
20982141 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults ();
20992142
2100- bool Modified = false ;
2143+ bool Modified = DistributeIncrements () ;
21012144 for (MachineBasicBlock &MFI : Fn)
21022145 Modified |= RescheduleLoadStoreInstrs (&MFI);
21032146
@@ -2475,6 +2518,198 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
24752518 return RetVal;
24762519}
24772520
2521+ // Get the Base register operand index from the memory access MachineInst if we
2522+ // should attempt to distribute postinc on it. Return -1 if not of a valid
2523+ // instruction type. If it returns an index, it is assumed that instruction is a
2524+ // r+i indexing mode, and getBaseOperandIndex() + 1 is the Offset index.
2525+ static int getBaseOperandIndex (MachineInstr &MI) {
2526+ switch (MI.getOpcode ()) {
2527+ case ARM::MVE_VLDRBS16:
2528+ case ARM::MVE_VLDRBS32:
2529+ case ARM::MVE_VLDRBU16:
2530+ case ARM::MVE_VLDRBU32:
2531+ case ARM::MVE_VLDRHS32:
2532+ case ARM::MVE_VLDRHU32:
2533+ case ARM::MVE_VLDRBU8:
2534+ case ARM::MVE_VLDRHU16:
2535+ case ARM::MVE_VLDRWU32:
2536+ case ARM::MVE_VSTRB16:
2537+ case ARM::MVE_VSTRB32:
2538+ case ARM::MVE_VSTRH32:
2539+ case ARM::MVE_VSTRBU8:
2540+ case ARM::MVE_VSTRHU16:
2541+ case ARM::MVE_VSTRWU32:
2542+ return 1 ;
2543+ }
2544+ return -1 ;
2545+ }
2546+
2547+ static MachineInstr *createPostIncLoadStore (MachineInstr *MI, int Offset,
2548+ Register NewReg,
2549+ const TargetInstrInfo *TII,
2550+ const TargetRegisterInfo *TRI) {
2551+ MachineFunction *MF = MI->getMF ();
2552+ MachineRegisterInfo &MRI = MF->getRegInfo ();
2553+
2554+ unsigned NewOpcode = getPostIndexedLoadStoreOpcode (
2555+ MI->getOpcode (), Offset > 0 ? ARM_AM::add : ARM_AM::sub);
2556+
2557+ const MCInstrDesc &MCID = TII->get (NewOpcode);
2558+ // Constrain the def register class
2559+ const TargetRegisterClass *TRC = TII->getRegClass (MCID, 0 , TRI, *MF);
2560+ MRI.constrainRegClass (NewReg, TRC);
2561+ // And do the same for the base operand
2562+ TRC = TII->getRegClass (MCID, 2 , TRI, *MF);
2563+ MRI.constrainRegClass (MI->getOperand (1 ).getReg (), TRC);
2564+
2565+ return BuildMI (*MI->getParent (), MI, MI->getDebugLoc (), MCID)
2566+ .addReg (NewReg, RegState::Define)
2567+ .add (MI->getOperand (0 ))
2568+ .add (MI->getOperand (1 ))
2569+ .addImm (Offset)
2570+ .add (MI->getOperand (3 ))
2571+ .add (MI->getOperand (4 ))
2572+ .cloneMemRefs (*MI);
2573+ }
2574+
2575+ // Given a Base Register, optimise the load/store uses to attempt to create more
2576+ // post-inc accesses. We do this by taking zero offset loads/stores with an add,
2577+ // and convert them to a postinc load/store of the same type. Any subsequent
2578+ // accesses will be adjusted to use and account for the post-inc value.
2579+ // For example:
2580+ // LDR #0 LDR_POSTINC #16
2581+ // LDR #4 LDR #-12
2582+ // LDR #8 LDR #-8
2583+ // LDR #12 LDR #-4
2584+ // ADD #16
2585+ bool ARMPreAllocLoadStoreOpt::DistributeIncrements (Register Base) {
2586+ // We are looking for:
2587+ // One zero offset load/store that can become postinc
2588+ MachineInstr *BaseAccess = nullptr ;
2589+ // An increment that can be folded in
2590+ MachineInstr *Increment = nullptr ;
2591+ // Other accesses after BaseAccess that will need to be updated to use the
2592+ // postinc value
2593+ SmallPtrSet<MachineInstr *, 8 > OtherAccesses;
2594+ for (auto &Use : MRI->use_nodbg_instructions (Base)) {
2595+ if (!Increment && getAddSubImmediate (Use) != 0 ) {
2596+ Increment = &Use;
2597+ continue ;
2598+ }
2599+
2600+ int BaseOp = getBaseOperandIndex (Use);
2601+ if (BaseOp == -1 )
2602+ return false ;
2603+
2604+ if (!Use.getOperand (BaseOp).isReg () ||
2605+ Use.getOperand (BaseOp).getReg () != Base)
2606+ return false ;
2607+ if (Use.getOperand (BaseOp + 1 ).getImm () == 0 )
2608+ BaseAccess = &Use;
2609+ else
2610+ OtherAccesses.insert (&Use);
2611+ }
2612+
2613+ if (!BaseAccess || !Increment ||
2614+ BaseAccess->getParent () != Increment->getParent ())
2615+ return false ;
2616+ Register PredReg;
2617+ if (Increment->definesRegister (ARM::CPSR) ||
2618+ getInstrPredicate (*Increment, PredReg) != ARMCC::AL)
2619+ return false ;
2620+
2621+ LLVM_DEBUG (dbgs () << " \n Attempting to distribute increments on VirtualReg "
2622+ << Base.virtRegIndex () << " \n " );
2623+
2624+ // Make sure that Increment has no uses before BaseAccess.
2625+ for (MachineInstr &Use :
2626+ MRI->use_nodbg_instructions (Increment->getOperand (0 ).getReg ())) {
2627+ if (!DT->dominates (BaseAccess, &Use) || &Use == BaseAccess) {
2628+ LLVM_DEBUG (dbgs () << " BaseAccess doesn't dominate use of increment\n " );
2629+ return false ;
2630+ }
2631+ }
2632+
2633+ // Make sure that Increment can be folded into Base
2634+ int IncrementOffset = getAddSubImmediate (*Increment);
2635+ unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode (
2636+ BaseAccess->getOpcode (), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
2637+ if (!isLegalAddressImm (NewPostIncOpcode, IncrementOffset, TII)) {
2638+ LLVM_DEBUG (dbgs () << " Illegal addressing mode immediate on postinc\n " );
2639+ return false ;
2640+ }
2641+
2642+ // And make sure that the negative value of increment can be added to all
2643+ // other offsets after the BaseAccess. We rely on either
2644+ // dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
2645+ // to keep things simple.
2646+ SmallPtrSet<MachineInstr *, 4 > SuccessorAccesses;
2647+ for (auto *Use : OtherAccesses) {
2648+ if (DT->dominates (BaseAccess, Use)) {
2649+ SuccessorAccesses.insert (Use);
2650+ unsigned BaseOp = getBaseOperandIndex (*Use);
2651+ if (!isLegalAddressImm (
2652+ Use->getOpcode (),
2653+ Use->getOperand (BaseOp + 1 ).getImm () - IncrementOffset, TII)) {
2654+ LLVM_DEBUG (dbgs () << " Illegal addressing mode immediate on use\n " );
2655+ return false ;
2656+ }
2657+ } else if (!DT->dominates (Use, BaseAccess)) {
2658+ LLVM_DEBUG (
2659+ dbgs () << " Unknown dominance relation between Base and Use\n " );
2660+ return false ;
2661+ }
2662+ }
2663+
2664+ // Replace BaseAccess with a post inc
2665+ LLVM_DEBUG (dbgs () << " Changing: " ; BaseAccess->dump ());
2666+ LLVM_DEBUG (dbgs () << " And : " ; Increment->dump ());
2667+ Register NewBaseReg = Increment->getOperand (0 ).getReg ();
2668+ MachineInstr *BaseAccessPost =
2669+ createPostIncLoadStore (BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
2670+ BaseAccess->eraseFromParent ();
2671+ Increment->eraseFromParent ();
2672+ LLVM_DEBUG (dbgs () << " To : " ; BaseAccessPost->dump ());
2673+
2674+ for (auto *Use : SuccessorAccesses) {
2675+ LLVM_DEBUG (dbgs () << " Changing: " ; Use->dump ());
2676+ unsigned BaseOp = getBaseOperandIndex (*Use);
2677+ Use->getOperand (BaseOp).setReg (NewBaseReg);
2678+ int OldOffset = Use->getOperand (BaseOp + 1 ).getImm ();
2679+ Use->getOperand (BaseOp + 1 ).setImm (OldOffset - IncrementOffset);
2680+ LLVM_DEBUG (dbgs () << " To : " ; Use->dump ());
2681+ }
2682+
2683+ // Remove the kill flag from all uses of NewBaseReg, in case any old uses
2684+ // remain.
2685+ for (MachineOperand &Op : MRI->use_nodbg_operands (NewBaseReg))
2686+ Op.setIsKill (false );
2687+ return true ;
2688+ }
2689+
2690+ bool ARMPreAllocLoadStoreOpt::DistributeIncrements () {
2691+ bool Changed = false ;
2692+ SmallSetVector<Register, 4 > Visited;
2693+ for (auto &MBB : *MF) {
2694+ for (auto &MI : MBB) {
2695+ int BaseOp = getBaseOperandIndex (MI);
2696+ if (BaseOp == -1 || !MI.getOperand (BaseOp).isReg ())
2697+ continue ;
2698+
2699+ Register Base = MI.getOperand (BaseOp).getReg ();
2700+ if (!Base.isVirtual () || Visited.count (Base))
2701+ continue ;
2702+
2703+ Visited.insert (Base);
2704+ }
2705+ }
2706+
2707+ for (auto Base : Visited)
2708+ Changed |= DistributeIncrements (Base);
2709+
2710+ return Changed;
2711+ }
2712+
24782713// / Returns an instance of the load / store optimization pass.
24792714FunctionPass *llvm::createARMLoadStoreOptimizationPass (bool PreAlloc) {
24802715 if (PreAlloc)
0 commit comments