1111// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
1212// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
1313//
14+ // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
15+ // MOVi64imm + ADDXrr ==> ANDXri + ANDXri
16+ //
17+ // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
18+ // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
19+ //
1420// The mov pseudo instruction could be expanded to multiple mov instructions
1521// later. In this case, we could try to split the constant operand of mov
16- // instruction into two bitmask immediates. It makes two AND instructions
17- // intead of multiple `mov` + `and` instructions.
22+ // instruction into two immediates which can be directly encoded into
23+ // *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
24+ // multiple `mov` + `and/add/sub` instructions.
1825//
19- // 2 . Remove redundant ORRWrs which is generated by zero-extend.
26+ // 4 . Remove redundant ORRWrs which is generated by zero-extend.
2027//
2128// %3:gpr32 = ORRWrs $wzr, %2, 0
2229// %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
@@ -51,6 +58,12 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
5158 MachineLoopInfo *MLI;
5259 MachineRegisterInfo *MRI;
5360
61+ bool checkMovImmInstr (MachineInstr &MI, MachineInstr *&MovMI,
62+ MachineInstr *&SubregToRegMI);
63+
64+ template <typename T>
65+ bool visitADDSUB (MachineInstr &MI,
66+ SmallSetVector<MachineInstr *, 8 > &ToBeRemoved, bool IsAdd);
5467 template <typename T>
5568 bool visitAND (MachineInstr &MI,
5669 SmallSetVector<MachineInstr *, 8 > &ToBeRemoved);
@@ -131,36 +144,9 @@ bool AArch64MIPeepholeOpt::visitAND(
131144 assert ((RegSize == 32 || RegSize == 64 ) &&
132145 " Invalid RegSize for AND bitmask peephole optimization" );
133146
134- // Check whether AND's MBB is in loop and the AND is loop invariant.
135- MachineBasicBlock *MBB = MI.getParent ();
136- MachineLoop *L = MLI->getLoopFor (MBB);
137- if (L && !L->isLoopInvariant (MI))
138- return false ;
139-
140- // Check whether AND's operand is MOV with immediate.
141- MachineInstr *MovMI = MRI->getUniqueVRegDef (MI.getOperand (2 ).getReg ());
142- if (!MovMI)
143- return false ;
144-
145- MachineInstr *SubregToRegMI = nullptr ;
146- // If it is SUBREG_TO_REG, check its operand.
147- if (MovMI->getOpcode () == TargetOpcode::SUBREG_TO_REG) {
148- SubregToRegMI = MovMI;
149- MovMI = MRI->getUniqueVRegDef (MovMI->getOperand (2 ).getReg ());
150- if (!MovMI)
151- return false ;
152- }
153-
154- if (MovMI->getOpcode () != AArch64::MOVi32imm &&
155- MovMI->getOpcode () != AArch64::MOVi64imm)
156- return false ;
157-
158- // If the MOV has multiple uses, do not split the immediate because it causes
159- // more instructions.
160- if (!MRI->hasOneUse (MovMI->getOperand (0 ).getReg ()))
161- return false ;
162-
163- if (SubregToRegMI && !MRI->hasOneUse (SubregToRegMI->getOperand (0 ).getReg ()))
147+ // Perform several essential checks against current MI.
148+ MachineInstr *MovMI = nullptr , *SubregToRegMI = nullptr ;
149+ if (!checkMovImmInstr (MI, MovMI, SubregToRegMI))
164150 return false ;
165151
166152 // Split the bitmask immediate into two.
@@ -177,6 +163,7 @@ bool AArch64MIPeepholeOpt::visitAND(
177163
178164 // Create new AND MIs.
179165 DebugLoc DL = MI.getDebugLoc ();
166+ MachineBasicBlock *MBB = MI.getParent ();
180167 const TargetRegisterClass *ANDImmRC =
181168 (RegSize == 32 ) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
182169 Register DstReg = MI.getOperand (0 ).getReg ();
@@ -251,6 +238,144 @@ bool AArch64MIPeepholeOpt::visitORR(
251238 return true ;
252239}
253240
241+ template <typename T>
242+ static bool splitAddSubImm (T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
243+ // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
244+ // imm0 and imm1 are non-zero 12-bit unsigned int.
245+ if ((Imm & 0xfff000 ) == 0 || (Imm & 0xfff ) == 0 ||
246+ (Imm & ~static_cast <T>(0xffffff )) != 0 )
247+ return false ;
248+
249+ // The immediate can not be composed via a single instruction.
250+ SmallVector<AArch64_IMM::ImmInsnModel, 4 > Insn;
251+ AArch64_IMM::expandMOVImm (Imm, RegSize, Insn);
252+ if (Insn.size () == 1 )
253+ return false ;
254+
255+ // Split Imm into (Imm0 << 12) + Imm1;
256+ Imm0 = (Imm >> 12 ) & 0xfff ;
257+ Imm1 = Imm & 0xfff ;
258+ return true ;
259+ }
260+
261+ template <typename T>
262+ bool AArch64MIPeepholeOpt::visitADDSUB (
263+ MachineInstr &MI, SmallSetVector<MachineInstr *, 8 > &ToBeRemoved,
264+ bool IsAdd) {
265+ // Try below transformation.
266+ //
267+ // MOVi32imm + ADDWrr ==> ADDWri + ADDWri
268+ // MOVi64imm + ADDXrr ==> ADDXri + ADDXri
269+ //
270+ // MOVi32imm + SUBWrr ==> SUBWri + SUBWri
271+ // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
272+ //
273+ // The mov pseudo instruction could be expanded to multiple mov instructions
274+ // later. Let's try to split the constant operand of mov instruction into two
275+ // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
276+ // multiple `mov` + `and/sub` instructions.
277+
278+ unsigned RegSize = sizeof (T) * 8 ;
279+ assert ((RegSize == 32 || RegSize == 64 ) &&
280+ " Invalid RegSize for legal add/sub immediate peephole optimization" );
281+
282+ // Perform several essential checks against current MI.
283+ MachineInstr *MovMI, *SubregToRegMI;
284+ if (!checkMovImmInstr (MI, MovMI, SubregToRegMI))
285+ return false ;
286+
287+ // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
288+ T Imm = static_cast <T>(MovMI->getOperand (1 ).getImm ()), Imm0, Imm1;
289+ unsigned Opcode;
290+ if (splitAddSubImm (Imm, RegSize, Imm0, Imm1)) {
291+ if (IsAdd)
292+ Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
293+ else
294+ Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
295+ } else if (splitAddSubImm (-Imm, RegSize, Imm0, Imm1)) {
296+ if (IsAdd)
297+ Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
298+ else
299+ Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
300+ } else {
301+ return false ;
302+ }
303+
304+ // Create new ADD/SUB MIs.
305+ DebugLoc DL = MI.getDebugLoc ();
306+ MachineBasicBlock *MBB = MI.getParent ();
307+ const TargetRegisterClass *RC =
308+ (RegSize == 32 ) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
309+ Register DstReg = MI.getOperand (0 ).getReg ();
310+ Register SrcReg = MI.getOperand (1 ).getReg ();
311+ Register NewTmpReg = MRI->createVirtualRegister (RC);
312+ Register NewDstReg = MRI->createVirtualRegister (RC);
313+
314+ MRI->constrainRegClass (SrcReg, RC);
315+ BuildMI (*MBB, MI, DL, TII->get (Opcode), NewTmpReg)
316+ .addReg (SrcReg)
317+ .addImm (Imm0)
318+ .addImm (12 );
319+
320+ BuildMI (*MBB, MI, DL, TII->get (Opcode), NewDstReg)
321+ .addReg (NewTmpReg)
322+ .addImm (Imm1)
323+ .addImm (0 );
324+
325+ MRI->replaceRegWith (DstReg, NewDstReg);
326+ // replaceRegWith changes MI's definition register. Keep it for SSA form until
327+ // deleting MI.
328+ MI.getOperand (0 ).setReg (DstReg);
329+
330+ // Record the MIs need to be removed.
331+ ToBeRemoved.insert (&MI);
332+ if (SubregToRegMI)
333+ ToBeRemoved.insert (SubregToRegMI);
334+ ToBeRemoved.insert (MovMI);
335+
336+ return true ;
337+ }
338+
339+ // Checks if the corresponding MOV immediate instruction is applicable for
340+ // this peephole optimization.
341+ bool AArch64MIPeepholeOpt::checkMovImmInstr (MachineInstr &MI,
342+ MachineInstr *&MovMI,
343+ MachineInstr *&SubregToRegMI) {
344+ // Check whether current MBB is in loop and the AND is loop invariant.
345+ MachineBasicBlock *MBB = MI.getParent ();
346+ MachineLoop *L = MLI->getLoopFor (MBB);
347+ if (L && !L->isLoopInvariant (MI))
348+ return false ;
349+
350+ // Check whether current MI's operand is MOV with immediate.
351+ MovMI = MRI->getUniqueVRegDef (MI.getOperand (2 ).getReg ());
352+ if (!MovMI)
353+ return false ;
354+
355+ // If it is SUBREG_TO_REG, check its operand.
356+ SubregToRegMI = nullptr ;
357+ if (MovMI->getOpcode () == TargetOpcode::SUBREG_TO_REG) {
358+ SubregToRegMI = MovMI;
359+ MovMI = MRI->getUniqueVRegDef (MovMI->getOperand (2 ).getReg ());
360+ if (!MovMI)
361+ return false ;
362+ }
363+
364+ if (MovMI->getOpcode () != AArch64::MOVi32imm &&
365+ MovMI->getOpcode () != AArch64::MOVi64imm)
366+ return false ;
367+
368+ // If the MOV has multiple uses, do not split the immediate because it causes
369+ // more instructions.
370+ if (!MRI->hasOneUse (MovMI->getOperand (0 ).getReg ()))
371+ return false ;
372+ if (SubregToRegMI && !MRI->hasOneUse (SubregToRegMI->getOperand (0 ).getReg ()))
373+ return false ;
374+
375+ // It is OK to perform this peephole optimization.
376+ return true ;
377+ }
378+
254379bool AArch64MIPeepholeOpt::runOnMachineFunction (MachineFunction &MF) {
255380 if (skipFunction (MF.getFunction ()))
256381 return false ;
@@ -278,6 +403,18 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
278403 case AArch64::ORRWrs:
279404 Changed = visitORR (MI, ToBeRemoved);
280405 break ;
406+ case AArch64::ADDWrr:
407+ Changed = visitADDSUB<uint32_t >(MI, ToBeRemoved, true );
408+ break ;
409+ case AArch64::SUBWrr:
410+ Changed = visitADDSUB<uint32_t >(MI, ToBeRemoved, false );
411+ break ;
412+ case AArch64::ADDXrr:
413+ Changed = visitADDSUB<uint64_t >(MI, ToBeRemoved, true );
414+ break ;
415+ case AArch64::SUBXrr:
416+ Changed = visitADDSUB<uint64_t >(MI, ToBeRemoved, false );
417+ break ;
281418 }
282419 }
283420 }
0 commit comments