@@ -60,12 +60,13 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
6060 MachineLoopInfo *MLI;
6161 MachineRegisterInfo *MRI;
6262
63+ using OpcodePair = std::pair<unsigned , unsigned >;
6364 template <typename T>
6465 using SplitAndOpcFunc =
65- std::function<Optional<unsigned >(T, unsigned , T &, T &)>;
66+ std::function<Optional<OpcodePair >(T, unsigned , T &, T &)>;
6667 using BuildMIFunc =
67- std::function<void (MachineInstr &, unsigned , unsigned , unsigned , Register ,
68- Register, Register)>;
68+ std::function<void (MachineInstr &, OpcodePair , unsigned , unsigned ,
69+ Register, Register, Register )>;
6970
7071 // / For instructions where an immediate operand could be split into two
7172 // / separate immediate instructions, use the splitTwoPartImm two handle the
@@ -93,6 +94,10 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
9394 bool visitADDSUB (unsigned PosOpc, unsigned NegOpc, MachineInstr &MI,
9495 SmallSetVector<MachineInstr *, 8 > &ToBeRemoved);
9596 template <typename T>
97+ bool visitADDSSUBS (OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI,
98+ SmallSetVector<MachineInstr *, 8 > &ToBeRemoved);
99+
100+ template <typename T>
96101 bool visitAND (unsigned Opc, MachineInstr &MI,
97102 SmallSetVector<MachineInstr *, 8 > &ToBeRemoved);
98103 bool visitORR (MachineInstr &MI,
@@ -171,20 +176,20 @@ bool AArch64MIPeepholeOpt::visitAND(
171176
172177 return splitTwoPartImm<T>(
173178 MI, ToBeRemoved,
174- [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<unsigned > {
179+ [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair > {
175180 if (splitBitmaskImm (Imm, RegSize, Imm0, Imm1))
176- return Opc;
181+ return std::make_pair ( Opc, Opc) ;
177182 return None;
178183 },
179- [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
184+ [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
180185 unsigned Imm1, Register SrcReg, Register NewTmpReg,
181186 Register NewDstReg) {
182187 DebugLoc DL = MI.getDebugLoc ();
183188 MachineBasicBlock *MBB = MI.getParent ();
184- BuildMI (*MBB, MI, DL, TII->get (Opcode), NewTmpReg)
189+ BuildMI (*MBB, MI, DL, TII->get (Opcode. first ), NewTmpReg)
185190 .addReg (SrcReg)
186191 .addImm (Imm0);
187- BuildMI (*MBB, MI, DL, TII->get (Opcode), NewDstReg)
192+ BuildMI (*MBB, MI, DL, TII->get (Opcode. second ), NewDstReg)
188193 .addReg (NewTmpReg)
189194 .addImm (Imm1);
190195 });
@@ -273,23 +278,64 @@ bool AArch64MIPeepholeOpt::visitADDSUB(
273278 return splitTwoPartImm<T>(
274279 MI, ToBeRemoved,
275280 [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
276- T &Imm1) -> Optional<unsigned > {
281+ T &Imm1) -> Optional<OpcodePair > {
277282 if (splitAddSubImm (Imm, RegSize, Imm0, Imm1))
278- return PosOpc;
283+ return std::make_pair ( PosOpc, PosOpc) ;
279284 if (splitAddSubImm (-Imm, RegSize, Imm0, Imm1))
280- return NegOpc;
285+ return std::make_pair ( NegOpc, NegOpc) ;
281286 return None;
282287 },
283- [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
288+ [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
289+ unsigned Imm1, Register SrcReg, Register NewTmpReg,
290+ Register NewDstReg) {
291+ DebugLoc DL = MI.getDebugLoc ();
292+ MachineBasicBlock *MBB = MI.getParent ();
293+ BuildMI (*MBB, MI, DL, TII->get (Opcode.first ), NewTmpReg)
294+ .addReg (SrcReg)
295+ .addImm (Imm0)
296+ .addImm (12 );
297+ BuildMI (*MBB, MI, DL, TII->get (Opcode.second ), NewDstReg)
298+ .addReg (NewTmpReg)
299+ .addImm (Imm1)
300+ .addImm (0 );
301+ });
302+ }
303+
304+ template <typename T>
305+ bool AArch64MIPeepholeOpt::visitADDSSUBS (
306+ OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI,
307+ SmallSetVector<MachineInstr *, 8 > &ToBeRemoved) {
308+ // Try the same transformation as ADDSUB but with additional requirement
309+ // that the condition code usages are only for Equal and Not Equal
310+ return splitTwoPartImm<T>(
311+ MI, ToBeRemoved,
312+ [PosOpcs, NegOpcs, &MI, &TRI = TRI, &MRI = MRI](
313+ T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair> {
314+ OpcodePair OP;
315+ if (splitAddSubImm (Imm, RegSize, Imm0, Imm1))
316+ OP = PosOpcs;
317+ else if (splitAddSubImm (-Imm, RegSize, Imm0, Imm1))
318+ OP = NegOpcs;
319+ else
320+ return None;
321+ // Check conditional uses last since it is expensive for scanning
322+ // proceeding instructions
323+ MachineInstr &SrcMI = *MRI->getUniqueVRegDef (MI.getOperand (1 ).getReg ());
324+ Optional<UsedNZCV> NZCVUsed = examineCFlagsUse (SrcMI, MI, *TRI);
325+ if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V )
326+ return None;
327+ return OP;
328+ },
329+ [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
284330 unsigned Imm1, Register SrcReg, Register NewTmpReg,
285331 Register NewDstReg) {
286332 DebugLoc DL = MI.getDebugLoc ();
287333 MachineBasicBlock *MBB = MI.getParent ();
288- BuildMI (*MBB, MI, DL, TII->get (Opcode), NewTmpReg)
334+ BuildMI (*MBB, MI, DL, TII->get (Opcode. first ), NewTmpReg)
289335 .addReg (SrcReg)
290336 .addImm (Imm0)
291337 .addImm (12 );
292- BuildMI (*MBB, MI, DL, TII->get (Opcode), NewDstReg)
338+ BuildMI (*MBB, MI, DL, TII->get (Opcode. second ), NewDstReg)
293339 .addReg (NewTmpReg)
294340 .addImm (Imm1)
295341 .addImm (0 );
@@ -357,32 +403,49 @@ bool AArch64MIPeepholeOpt::splitTwoPartImm(
357403 // number since it was sign extended when we assign to the 64-bit Imm.
358404 if (SubregToRegMI)
359405 Imm &= 0xFFFFFFFF ;
360- unsigned Opcode;
406+ OpcodePair Opcode;
361407 if (auto R = SplitAndOpc (Imm, RegSize, Imm0, Imm1))
362408 Opcode = R.getValue ();
363409 else
364410 return false ;
365411
366- // Create new ADD/SUB MIs.
412+ // Create new MIs using the first and second opcodes. Opcodes might differ for
413+ // flag setting operations that should only set flags on second instruction.
414+ // NewTmpReg = Opcode.first SrcReg Imm0
415+ // NewDstReg = Opcode.second NewTmpReg Imm1
416+
417+ // Determine register classes for destinations and register operands
367418 MachineFunction *MF = MI.getMF ();
368- const TargetRegisterClass *RC =
369- TII->getRegClass (TII->get (Opcode), 0 , TRI, *MF);
370- const TargetRegisterClass *ORC =
371- TII->getRegClass (TII->get (Opcode), 1 , TRI, *MF);
419+ const TargetRegisterClass *FirstInstrDstRC =
420+ TII->getRegClass (TII->get (Opcode.first ), 0 , TRI, *MF);
421+ const TargetRegisterClass *FirstInstrOperandRC =
422+ TII->getRegClass (TII->get (Opcode.first ), 1 , TRI, *MF);
423+ const TargetRegisterClass *SecondInstrDstRC =
424+ (Opcode.first == Opcode.second )
425+ ? FirstInstrDstRC
426+ : TII->getRegClass (TII->get (Opcode.second ), 0 , TRI, *MF);
427+ const TargetRegisterClass *SecondInstrOperandRC =
428+ (Opcode.first == Opcode.second )
429+ ? FirstInstrOperandRC
430+ : TII->getRegClass (TII->get (Opcode.second ), 1 , TRI, *MF);
431+
432+ // Get old registers destinations and new register destinations
372433 Register DstReg = MI.getOperand (0 ).getReg ();
373434 Register SrcReg = MI.getOperand (1 ).getReg ();
374- Register NewTmpReg = MRI->createVirtualRegister (RC );
375- Register NewDstReg = MRI->createVirtualRegister (RC );
435+ Register NewTmpReg = MRI->createVirtualRegister (FirstInstrDstRC );
436+ Register NewDstReg = MRI->createVirtualRegister (SecondInstrDstRC );
376437
377- MRI->constrainRegClass (SrcReg, RC);
378- MRI->constrainRegClass (NewTmpReg, ORC);
438+ // Constrain registers based on their new uses
439+ MRI->constrainRegClass (SrcReg, FirstInstrOperandRC);
440+ MRI->constrainRegClass (NewTmpReg, SecondInstrOperandRC);
379441 MRI->constrainRegClass (NewDstReg, MRI->getRegClass (DstReg));
380442
443+ // Call the delegating operation to build the instruction
381444 BuildInstr (MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
382445
383- MRI->replaceRegWith (DstReg, NewDstReg);
384446 // replaceRegWith changes MI's definition register. Keep it for SSA form until
385447 // deleting MI.
448+ MRI->replaceRegWith (DstReg, NewDstReg);
386449 MI.getOperand (0 ).setReg (DstReg);
387450
388451 // Record the MIs need to be removed.
@@ -439,6 +502,26 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
439502 Changed = visitADDSUB<uint64_t >(AArch64::SUBXri, AArch64::ADDXri, MI,
440503 ToBeRemoved);
441504 break ;
505+ case AArch64::ADDSWrr:
506+ Changed = visitADDSSUBS<uint32_t >({AArch64::ADDWri, AArch64::ADDSWri},
507+ {AArch64::SUBWri, AArch64::SUBSWri},
508+ MI, ToBeRemoved);
509+ break ;
510+ case AArch64::SUBSWrr:
511+ Changed = visitADDSSUBS<uint32_t >({AArch64::SUBWri, AArch64::SUBSWri},
512+ {AArch64::ADDWri, AArch64::ADDSWri},
513+ MI, ToBeRemoved);
514+ break ;
515+ case AArch64::ADDSXrr:
516+ Changed = visitADDSSUBS<uint64_t >({AArch64::ADDXri, AArch64::ADDSXri},
517+ {AArch64::SUBXri, AArch64::SUBSXri},
518+ MI, ToBeRemoved);
519+ break ;
520+ case AArch64::SUBSXrr:
521+ Changed = visitADDSSUBS<uint64_t >({AArch64::SUBXri, AArch64::SUBSXri},
522+ {AArch64::ADDXri, AArch64::ADDSXri},
523+ MI, ToBeRemoved);
524+ break ;
442525 }
443526 }
444527 }
0 commit comments