Skip to content

Commit c42053f

Browse files
author
Jessica Paquette
committed
[AArch64][GlobalISel] Select arith extended add/sub in manual selection code
The manual selection code for add/sub was not checking if it was possible to fold in shifts + extends (the *rx opcode variants). As a result, we could never select things like ``` cmp x1, w0, uxtw #2 ``` Because we don't import any patterns for compares. This adds support for the arithmetic shifted register forms and updates tests for instructions selected using `emitADD`, `emitADDS`, and `emitSUBS`. This is a 0.1% geomean code size improvement on SPECINT2000 at -Os. Differential Revision: https://reviews.llvm.org/D91207
1 parent 20de182 commit c42053f

File tree

5 files changed

+189
-9
lines changed

5 files changed

+189
-9
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,8 @@ class AArch64InstructionSelector : public InstructionSelector {
188188
/// {{AArch64::ADDXri, AArch64::ADDWri},
189189
/// {AArch64::ADDXrs, AArch64::ADDWrs},
190190
/// {AArch64::ADDXrr, AArch64::ADDWrr},
191-
/// {AArch64::SUBXri, AArch64::SUBWri}}};
191+
/// {AArch64::SUBXri, AArch64::SUBWri},
192+
/// {AArch64::ADDXrx, AArch64::ADDWrx}}};
192193
/// \endcode
193194
///
194195
/// Each row in the table corresponds to a different addressing mode. Each
@@ -199,6 +200,7 @@ class AArch64InstructionSelector : public InstructionSelector {
199200
/// - Row 1: The rs opcode variants
200201
/// - Row 2: The rr opcode variants
201202
/// - Row 3: The ri opcode variants for negative immediates
203+
/// - Row 4: The rx opcode variants
202204
///
203205
/// \attention Columns must be structured as follows:
204206
/// - Column 0: The 64-bit opcode variants
@@ -208,7 +210,7 @@ class AArch64InstructionSelector : public InstructionSelector {
208210
/// \p LHS is the left-hand operand of the binop to emit.
209211
/// \p RHS is the right-hand operand of the binop to emit.
210212
MachineInstr *emitAddSub(
211-
const std::array<std::array<unsigned, 2>, 4> &AddrModeAndSizeToOpcode,
213+
const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
212214
Register Dst, MachineOperand &LHS, MachineOperand &RHS,
213215
MachineIRBuilder &MIRBuilder) const;
214216
MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
@@ -3821,7 +3823,7 @@ MachineInstr *AArch64InstructionSelector::emitInstr(
38213823
}
38223824

38233825
MachineInstr *AArch64InstructionSelector::emitAddSub(
3824-
const std::array<std::array<unsigned, 2>, 4> &AddrModeAndSizeToOpcode,
3826+
const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
38253827
Register Dst, MachineOperand &LHS, MachineOperand &RHS,
38263828
MachineIRBuilder &MIRBuilder) const {
38273829
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
@@ -3842,6 +3844,11 @@ MachineInstr *AArch64InstructionSelector::emitAddSub(
38423844
return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
38433845
MIRBuilder, Fns);
38443846

3847+
// INSTRrx form.
3848+
if (auto Fns = selectArithExtendedRegister(RHS))
3849+
return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
3850+
MIRBuilder, Fns);
3851+
38453852
// INSTRrs form.
38463853
if (auto Fns = selectShiftedRegister(RHS))
38473854
return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
@@ -3854,35 +3861,38 @@ MachineInstr *
38543861
AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
38553862
MachineOperand &RHS,
38563863
MachineIRBuilder &MIRBuilder) const {
3857-
const std::array<std::array<unsigned, 2>, 4> OpcTable{
3864+
const std::array<std::array<unsigned, 2>, 5> OpcTable{
38583865
{{AArch64::ADDXri, AArch64::ADDWri},
38593866
{AArch64::ADDXrs, AArch64::ADDWrs},
38603867
{AArch64::ADDXrr, AArch64::ADDWrr},
3861-
{AArch64::SUBXri, AArch64::SUBWri}}};
3868+
{AArch64::SUBXri, AArch64::SUBWri},
3869+
{AArch64::ADDXrx, AArch64::ADDWrx}}};
38623870
return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
38633871
}
38643872

38653873
MachineInstr *
38663874
AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
38673875
MachineOperand &RHS,
38683876
MachineIRBuilder &MIRBuilder) const {
3869-
const std::array<std::array<unsigned, 2>, 4> OpcTable{
3877+
const std::array<std::array<unsigned, 2>, 5> OpcTable{
38703878
{{AArch64::ADDSXri, AArch64::ADDSWri},
38713879
{AArch64::ADDSXrs, AArch64::ADDSWrs},
38723880
{AArch64::ADDSXrr, AArch64::ADDSWrr},
3873-
{AArch64::SUBSXri, AArch64::SUBSWri}}};
3881+
{AArch64::SUBSXri, AArch64::SUBSWri},
3882+
{AArch64::ADDSXrx, AArch64::ADDSWrx}}};
38743883
return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
38753884
}
38763885

38773886
MachineInstr *
38783887
AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
38793888
MachineOperand &RHS,
38803889
MachineIRBuilder &MIRBuilder) const {
3881-
const std::array<std::array<unsigned, 2>, 4> OpcTable{
3890+
const std::array<std::array<unsigned, 2>, 5> OpcTable{
38823891
{{AArch64::SUBSXri, AArch64::SUBSWri},
38833892
{AArch64::SUBSXrs, AArch64::SUBSWrs},
38843893
{AArch64::SUBSXrr, AArch64::SUBSWrr},
3885-
{AArch64::ADDSXri, AArch64::ADDSWri}}};
3894+
{AArch64::ADDSXri, AArch64::ADDSWri},
3895+
{AArch64::SUBSXrx, AArch64::SUBSWrx}}};
38863896
return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
38873897
}
38883898

llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,3 +603,36 @@ body: |
603603
%cmp:gpr(s32) = G_ICMP intpred(ne), %reg0(s32), %sub
604604
$w0 = COPY %cmp(s32)
605605
RET_ReallyLR implicit $w0
606+
607+
...
608+
---
609+
name: cmn_arith_extended_shl
610+
alignment: 4
611+
legalized: true
612+
regBankSelected: true
613+
tracksRegLiveness: true
614+
body: |
615+
bb.0:
616+
liveins: $w0, $x0, $x1
617+
; We should be able to fold away the extend + shift and select ADDSXrx.
618+
619+
; CHECK-LABEL: name: cmn_arith_extended_shl
620+
; CHECK: liveins: $w0, $x0, $x1
621+
; CHECK: %reg0:gpr64sp = COPY $x0
622+
; CHECK: %reg1:gpr32 = COPY $w0
623+
; CHECK: $xzr = ADDSXrx %reg0, %reg1, 50, implicit-def $nzcv
624+
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv
625+
; CHECK: $w0 = COPY %cmp
626+
; CHECK: RET_ReallyLR implicit $w0
627+
%reg0:gpr(s64) = COPY $x0
628+
%zero:gpr(s64) = G_CONSTANT i64 0
629+
%sub:gpr(s64) = G_SUB %zero, %reg0
630+
631+
%reg1:gpr(s32) = COPY $w0
632+
%ext:gpr(s64) = G_SEXT %reg1(s32)
633+
%cst:gpr(s64) = G_CONSTANT i64 2
634+
%shift:gpr(s64) = G_SHL %ext, %cst(s64)
635+
636+
%cmp:gpr(s32) = G_ICMP intpred(ne), %sub(s64), %shift
637+
$w0 = COPY %cmp(s32)
638+
RET_ReallyLR implicit $w0

llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,91 @@ body: |
182182
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst
183183
$w0 = COPY %cmp(s32)
184184
RET_ReallyLR implicit $w0
185+
...
186+
---
187+
name: cmp_arith_extended_s64
188+
legalized: true
189+
regBankSelected: true
190+
tracksRegLiveness: true
191+
body: |
192+
bb.0:
193+
liveins: $w0, $x1
194+
195+
; CHECK-LABEL: name: cmp_arith_extended_s64
196+
; CHECK: liveins: $w0, $x1
197+
; CHECK: %reg0:gpr32 = COPY $w0
198+
; CHECK: %reg1:gpr64sp = COPY $x1
199+
; CHECK: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %reg1, %reg0, 18, implicit-def $nzcv
200+
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
201+
; CHECK: $w0 = COPY %cmp
202+
; CHECK: RET_ReallyLR implicit $w0
203+
%reg0:gpr(s32) = COPY $w0
204+
%reg1:gpr(s64) = COPY $x1
205+
%ext:gpr(s64) = G_ZEXT %reg0(s32)
206+
%cst:gpr(s64) = G_CONSTANT i64 2
207+
%shift:gpr(s64) = G_SHL %ext, %cst(s64)
208+
%cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s64), %shift
209+
$w0 = COPY %cmp(s32)
210+
RET_ReallyLR implicit $w0
211+
212+
...
213+
---
214+
name: cmp_arith_extended_s32
215+
legalized: true
216+
regBankSelected: true
217+
tracksRegLiveness: true
218+
body: |
219+
bb.0:
220+
liveins: $w0, $w1, $h0
221+
222+
; CHECK-LABEL: name: cmp_arith_extended_s32
223+
; CHECK: liveins: $w0, $w1, $h0
224+
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
225+
; CHECK: %reg0:gpr32all = COPY [[SUBREG_TO_REG]]
226+
; CHECK: %reg1:gpr32sp = COPY $w1
227+
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %reg0
228+
; CHECK: [[SUBSWrx:%[0-9]+]]:gpr32 = SUBSWrx %reg1, [[COPY]], 10, implicit-def $nzcv
229+
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
230+
; CHECK: $w0 = COPY %cmp
231+
; CHECK: RET_ReallyLR implicit $w0
232+
%reg0:gpr(s16) = COPY $h0
233+
%reg1:gpr(s32) = COPY $w1
234+
%ext:gpr(s32) = G_ZEXT %reg0(s16)
235+
%cst:gpr(s32) = G_CONSTANT i32 2
236+
%shift:gpr(s32) = G_SHL %ext, %cst(s32)
237+
%cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s32), %shift
238+
$w0 = COPY %cmp(s32)
239+
RET_ReallyLR implicit $w0
240+
241+
...
242+
---
243+
name: cmp_arith_extended_shl_too_large
244+
legalized: true
245+
regBankSelected: true
246+
tracksRegLiveness: true
247+
body: |
248+
bb.0:
249+
liveins: $w0, $x1
250+
251+
; The constant on the G_SHL is > 4, so we won't sleect SUBSXrx
252+
253+
; CHECK-LABEL: name: cmp_arith_extended_shl_too_large
254+
; CHECK: liveins: $w0, $x1
255+
; CHECK: %reg0:gpr32 = COPY $w0
256+
; CHECK: %reg1:gpr64 = COPY $x1
257+
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg0, %subreg.sub_32
258+
; CHECK: %ext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31
259+
; CHECK: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %reg1, %ext, 5, implicit-def $nzcv
260+
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
261+
; CHECK: $w0 = COPY %cmp
262+
; CHECK: RET_ReallyLR implicit $w0
263+
%reg0:gpr(s32) = COPY $w0
264+
%reg1:gpr(s64) = COPY $x1
265+
%ext:gpr(s64) = G_ZEXT %reg0(s32)
266+
%cst:gpr(s64) = G_CONSTANT i64 5
267+
%shift:gpr(s64) = G_SHL %ext, %cst(s64)
268+
%cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s64), %shift
269+
$w0 = COPY %cmp(s32)
270+
RET_ReallyLR implicit $w0
271+
272+
...

llvm/test/CodeGen/AArch64/GlobalISel/select-ptr-add.mir

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,24 @@ body: |
8989
%2:gpr(p0) = G_PTR_ADD %0, %1(s64)
9090
$x0 = COPY %2(p0)
9191
...
92+
---
93+
name: ptr_add_arith_extended
94+
legalized: true
95+
regBankSelected: true
96+
body: |
97+
bb.0:
98+
liveins: $x0
99+
; CHECK-LABEL: name: ptr_add_arith_extended
100+
; CHECK: %reg0:gpr32 = COPY $w0
101+
; CHECK: %ptr:gpr64 = COPY $x1
102+
; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY %ptr
103+
; CHECK: %ptr_add:gpr64sp = ADDXrx [[COPY]], %reg0, 18
104+
; CHECK: $x0 = COPY %ptr_add
105+
%reg0:gpr(s32) = COPY $w0
106+
%ptr:gpr(p0) = COPY $x1
107+
%ext:gpr(s64) = G_ZEXT %reg0(s32)
108+
%cst:gpr(s64) = G_CONSTANT i64 2
109+
%shift:gpr(s64) = G_SHL %ext, %cst(s64)
110+
%ptr_add:gpr(p0) = G_PTR_ADD %ptr, %shift(s64)
111+
$x0 = COPY %ptr_add(p0)
112+
...

llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,31 @@ body: |
136136
%add:gpr(s32), %overflow:gpr(s1) = G_UADDO %copy, %constant
137137
$w0 = COPY %add(s32)
138138
RET_ReallyLR implicit $w0
139+
140+
...
141+
---
142+
name: uaddo_arith_extended
143+
alignment: 4
144+
legalized: true
145+
regBankSelected: true
146+
tracksRegLiveness: true
147+
body: |
148+
bb.1.entry:
149+
liveins: $w0, $x0
150+
; Check that we get ADDSXrx.
151+
; CHECK-LABEL: name: uaddo_arith_extended
152+
; CHECK: liveins: $w0, $x0
153+
; CHECK: %reg0:gpr64sp = COPY $x0
154+
; CHECK: %reg1:gpr32 = COPY $w0
155+
; CHECK: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv
156+
; CHECK: %flags:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
157+
; CHECK: $x0 = COPY %add
158+
; CHECK: RET_ReallyLR implicit $x0
159+
%reg0:gpr(s64) = COPY $x0
160+
%reg1:gpr(s32) = COPY $w0
161+
%ext:gpr(s64) = G_ZEXT %reg1(s32)
162+
%cst:gpr(s64) = G_CONSTANT i64 2
163+
%shift:gpr(s64) = G_SHL %ext, %cst(s64)
164+
%add:gpr(s64), %flags:gpr(s1) = G_UADDO %reg0, %shift
165+
$x0 = COPY %add(s64)
166+
RET_ReallyLR implicit $x0

0 commit comments

Comments
 (0)