1111// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
1212// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
1313//
14+ // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
15+ // MOVi64imm + ADDXrr ==> ANDXri + ANDXri
16+ //
17+ // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
18+ // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
19+ //
1420// The mov pseudo instruction could be expanded to multiple mov instructions
1521// later. In this case, we could try to split the constant operand of mov
16- // instruction into two bitmask immediates. It makes two AND instructions
17- // intead of multiple `mov` + `and` instructions.
22+ // instruction into two immediates which can be directly encoded into
23+ // *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
24+ // multiple `mov` + `and/add/sub` instructions.
1825// ===----------------------------------------------------------------------===//
1926
2027#include " AArch64ExpandImm.h"
@@ -41,6 +48,13 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
4148 MachineLoopInfo *MLI;
4249 MachineRegisterInfo *MRI;
4350
51+ bool checkMovImmInstr (MachineInstr &MI, MachineInstr *&MovMI,
52+ MachineInstr *&SubregToRegMI);
53+
54+ template <typename T>
55+ bool visitADDSUB (MachineInstr &MI,
56+ SmallSetVector<MachineInstr *, 8 > &ToBeRemoved, bool IsAdd);
57+
4458 template <typename T>
4559 bool visitAND (MachineInstr &MI,
4660 SmallSetVector<MachineInstr *, 8 > &ToBeRemoved);
@@ -119,31 +133,9 @@ bool AArch64MIPeepholeOpt::visitAND(
119133 assert ((RegSize == 32 || RegSize == 64 ) &&
120134 " Invalid RegSize for AND bitmask peephole optimization" );
121135
122- // Check whether AND's MBB is in loop and the AND is loop invariant.
123- MachineBasicBlock *MBB = MI.getParent ();
124- MachineLoop *L = MLI->getLoopFor (MBB);
125- if (L && !L->isLoopInvariant (MI))
126- return false ;
127-
128- // Check whether AND's operand is MOV with immediate.
129- MachineInstr *MovMI = MRI->getUniqueVRegDef (MI.getOperand (2 ).getReg ());
130- MachineInstr *SubregToRegMI = nullptr ;
131- // If it is SUBREG_TO_REG, check its operand.
132- if (MovMI->getOpcode () == TargetOpcode::SUBREG_TO_REG) {
133- SubregToRegMI = MovMI;
134- MovMI = MRI->getUniqueVRegDef (MovMI->getOperand (2 ).getReg ());
135- }
136-
137- if (MovMI->getOpcode () != AArch64::MOVi32imm &&
138- MovMI->getOpcode () != AArch64::MOVi64imm)
139- return false ;
140-
141- // If the MOV has multiple uses, do not split the immediate because it causes
142- // more instructions.
143- if (!MRI->hasOneUse (MovMI->getOperand (0 ).getReg ()))
144- return false ;
145-
146- if (SubregToRegMI && !MRI->hasOneUse (SubregToRegMI->getOperand (0 ).getReg ()))
136+ // Perform several essential checks against current MI.
137+ MachineInstr *MovMI, *SubregToRegMI;
138+ if (!checkMovImmInstr (MI, MovMI, SubregToRegMI))
147139 return false ;
148140
149141 // Split the bitmask immediate into two.
@@ -160,6 +152,7 @@ bool AArch64MIPeepholeOpt::visitAND(
160152
161153 // Create new AND MIs.
162154 DebugLoc DL = MI.getDebugLoc ();
155+ MachineBasicBlock *MBB = MI.getParent ();
163156 const TargetRegisterClass *ANDImmRC =
164157 (RegSize == 32 ) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
165158 Register DstReg = MI.getOperand (0 ).getReg ();
@@ -185,6 +178,135 @@ bool AArch64MIPeepholeOpt::visitAND(
185178 return true ;
186179}
187180
181+ template <typename T>
182+ static bool splitAddSubImm (T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
183+ // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
184+ // imm0 and imm1 are non-zero 12-bit unsigned int.
185+ if ((Imm & 0xfff000 ) == 0 || (Imm & 0xfff ) == 0 ||
186+ (Imm & ~static_cast <T>(0xffffff )) != 0 )
187+ return false ;
188+
189+ // The immediate can not be composed via a single instruction.
190+ SmallVector<AArch64_IMM::ImmInsnModel, 4 > Insn;
191+ AArch64_IMM::expandMOVImm (Imm, RegSize, Insn);
192+ if (Insn.size () == 1 )
193+ return false ;
194+
195+ // Split Imm into (Imm0 << 12) + Imm1;
196+ Imm0 = (Imm >> 12 ) & 0xfff ;
197+ Imm1 = Imm & 0xfff ;
198+ return true ;
199+ }
200+
201+ template <typename T>
202+ bool AArch64MIPeepholeOpt::visitADDSUB (
203+ MachineInstr &MI, SmallSetVector<MachineInstr *, 8 > &ToBeRemoved,
204+ bool IsAdd) {
205+ // Try below transformation.
206+ //
207+ // MOVi32imm + ADDWrr ==> ANDWri + ANDWri
208+ // MOVi64imm + ADDXrr ==> ANDXri + ANDXri
209+ //
210+ // MOVi32imm + SUBWrr ==> SUBWri + SUBWri
211+ // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
212+ //
213+ // The mov pseudo instruction could be expanded to multiple mov instructions
214+ // later. Let's try to split the constant operand of mov instruction into two
215+ // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
216+ // multiple `mov` + `and/sub` instructions.
217+
218+ unsigned RegSize = sizeof (T) * 8 ;
219+ assert ((RegSize == 32 || RegSize == 64 ) &&
220+ " Invalid RegSize for legal add/sub immediate peephole optimization" );
221+
222+ // Perform several essential checks against current MI.
223+ MachineInstr *MovMI, *SubregToRegMI;
224+ if (!checkMovImmInstr (MI, MovMI, SubregToRegMI))
225+ return false ;
226+
227+ // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
228+ T Imm = static_cast <T>(MovMI->getOperand (1 ).getImm ()), Imm0, Imm1;
229+ unsigned Opcode;
230+ if (splitAddSubImm (Imm, RegSize, Imm0, Imm1)) {
231+ if (IsAdd)
232+ Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
233+ else
234+ Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
235+ } else if (splitAddSubImm (-Imm, RegSize, Imm0, Imm1)) {
236+ if (IsAdd)
237+ Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
238+ else
239+ Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
240+ } else {
241+ return false ;
242+ }
243+
244+ // Create new ADD/SUB MIs.
245+ DebugLoc DL = MI.getDebugLoc ();
246+ MachineBasicBlock *MBB = MI.getParent ();
247+ const TargetRegisterClass *RC =
248+ (RegSize == 32 ) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
249+ Register DstReg = MI.getOperand (0 ).getReg ();
250+ Register SrcReg = MI.getOperand (1 ).getReg ();
251+ Register TmpReg = MRI->createVirtualRegister (RC);
252+
253+ MRI->constrainRegClass (SrcReg, RC);
254+ BuildMI (*MBB, MI, DL, TII->get (Opcode), TmpReg)
255+ .addReg (SrcReg)
256+ .addImm (Imm0)
257+ .addImm (12 );
258+
259+ MRI->constrainRegClass (DstReg, RC);
260+ BuildMI (*MBB, MI, DL, TII->get (Opcode), DstReg)
261+ .addReg (TmpReg)
262+ .addImm (Imm1)
263+ .addImm (0 );
264+
265+ // Record the MIs need to be removed.
266+ ToBeRemoved.insert (&MI);
267+ if (SubregToRegMI)
268+ ToBeRemoved.insert (SubregToRegMI);
269+ ToBeRemoved.insert (MovMI);
270+
271+ return true ;
272+ }
273+
274+ // Checks if the corresponding MOV immediate instruction is applicable for
275+ // this peephole optimization.
276+ bool AArch64MIPeepholeOpt::checkMovImmInstr (MachineInstr &MI,
277+ MachineInstr *&MovMI,
278+ MachineInstr *&SubregToRegMI) {
279+ // Check whether current MI is in loop and is loop invariant.
280+ MachineBasicBlock *MBB = MI.getParent ();
281+ MachineLoop *L = MLI->getLoopFor (MBB);
282+ if (L && !L->isLoopInvariant (MI))
283+ return false ;
284+
285+ // Check whether current MI's operand is MOV with immediate.
286+ MovMI = MRI->getUniqueVRegDef (MI.getOperand (2 ).getReg ());
287+ SubregToRegMI = nullptr ;
288+ // If it is SUBREG_TO_REG, check its operand.
289+ if (MovMI->getOpcode () == TargetOpcode::SUBREG_TO_REG) {
290+ SubregToRegMI = MovMI;
291+ MovMI = MRI->getUniqueVRegDef (MovMI->getOperand (2 ).getReg ());
292+ }
293+
294+ if (MovMI->getOpcode () != AArch64::MOVi32imm &&
295+ MovMI->getOpcode () != AArch64::MOVi64imm)
296+ return false ;
297+
298+ // If the MOV has multiple uses, do not split the immediate because it causes
299+ // more instructions.
300+ if (!MRI->hasOneUse (MovMI->getOperand (0 ).getReg ()))
301+ return false ;
302+
303+ if (SubregToRegMI && !MRI->hasOneUse (SubregToRegMI->getOperand (0 ).getReg ()))
304+ return false ;
305+
306+ // It is OK to perform this peephole optimization.
307+ return true ;
308+ }
309+
188310bool AArch64MIPeepholeOpt::runOnMachineFunction (MachineFunction &MF) {
189311 if (skipFunction (MF.getFunction ()))
190312 return false ;
@@ -210,6 +332,18 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
210332 case AArch64::ANDXrr:
211333 Changed = visitAND<uint64_t >(MI, ToBeRemoved);
212334 break ;
335+ case AArch64::ADDWrr:
336+ Changed = visitADDSUB<uint32_t >(MI, ToBeRemoved, true );
337+ break ;
338+ case AArch64::SUBWrr:
339+ Changed = visitADDSUB<uint32_t >(MI, ToBeRemoved, false );
340+ break ;
341+ case AArch64::ADDXrr:
342+ Changed = visitADDSUB<uint64_t >(MI, ToBeRemoved, true );
343+ break ;
344+ case AArch64::SUBXrr:
345+ Changed = visitADDSUB<uint64_t >(MI, ToBeRemoved, false );
346+ break ;
213347 }
214348 }
215349 }
0 commit comments