Skip to content

Commit 2f083b3

Browse files
authored
[AArch64] Fix resource length computation for STP. (#81749)
On some uArchs, `STP [s|d], [s|d]` first combines the 2 input registers in a single register using a vector execution unit. IIUC AArch64StorePairSuppress tries to prevent forming STPs in case the critical resource are the vector units, in order to prevent adding more pressure on those units. The implementation however simply computes the new critical resource length by adding resource for another STP. If load/store units are the critical resource, this means we increase that length by one, and incorrectly prevent forming the STP. This patch adjusts the resource computation by also removing 2 STRs, as introducing a STP will remove 2 single stores. This should more accurately reflect the resource usage after introducing an STP, and does not prevent forming STPs if load/store units are the critical resources; in those cases, STP can actually help to reduce resource usage. PR: #81749
1 parent 0b1c25c commit 2f083b3

File tree

4 files changed

+40
-58
lines changed

4 files changed

+40
-58
lines changed

llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,15 +81,23 @@ bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB)
8181
MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB);
8282
unsigned ResLength = BBTrace.getResourceLength();
8383

84-
// Get the machine model's scheduling class for STPQi.
84+
// Get the machine model's scheduling class for STPDi and STRDui.
8585
// Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
8686
unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass();
87-
const MCSchedClassDesc *SCDesc =
87+
const MCSchedClassDesc *PairSCDesc =
8888
SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
8989

90-
// If a subtarget does not define resources for STPQi, bail here.
91-
if (SCDesc->isValid() && !SCDesc->isVariant()) {
92-
unsigned ResLenWithSTP = BBTrace.getResourceLength(std::nullopt, SCDesc);
90+
unsigned SCIdx2 = TII->get(AArch64::STRDui).getSchedClass();
91+
const MCSchedClassDesc *SingleSCDesc =
92+
SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx2);
93+
94+
// If a subtarget does not define resources for STPDi, bail here.
95+
if (PairSCDesc->isValid() && !PairSCDesc->isVariant() &&
96+
SingleSCDesc->isValid() && !SingleSCDesc->isVariant()) {
97+
// Compute the new critical resource length after replacing 2 separate
98+
// STRDui with one STPDi.
99+
unsigned ResLenWithSTP = BBTrace.getResourceLength(
100+
std::nullopt, PairSCDesc, {SingleSCDesc, SingleSCDesc});
93101
if (ResLenWithSTP > ResLength) {
94102
LLVM_DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber()
95103
<< " resources " << ResLength << " -> " << ResLenWithSTP

llvm/test/CodeGen/AArch64/arm64-stur.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,8 @@ declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
6565

6666
; CHECK-LABEL: unaligned:
6767
; CHECK-NOT: str q0
68-
; CHECK: str d[[REG:[0-9]+]], [x0]
69-
; CHECK: ext.16b v[[REG2:[0-9]+]], v[[REG]], v[[REG]], #8
70-
; CHECK: str d[[REG2]], [x0, #8]
68+
; CHECK: ext.16b v[[REG2:[0-9]+]], v[[REG:[0-9]+]], v[[REG]], #8
69+
; CHECK: stp d[[REG]], d[[REG2]], [x0]
7170
define void @unaligned(ptr %p, <4 x i32> %v) nounwind {
7271
store <4 x i32> %v, ptr %p, align 4
7372
ret void

llvm/test/CodeGen/AArch64/merge-store.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,7 @@ define void @merge_vec_extract_stores(<4 x float> %v1, ptr %ptr) {
4545
; SPLITTING-LABEL: merge_vec_extract_stores:
4646
; SPLITTING: // %bb.0:
4747
; SPLITTING-NEXT: ext v1.16b, v0.16b, v0.16b, #8
48-
; SPLITTING-NEXT: str d0, [x0, #24]
49-
; SPLITTING-NEXT: str d1, [x0, #32]
48+
; SPLITTING-NEXT: stp d0, d1, [x0, #24]
5049
; SPLITTING-NEXT: ret
5150
;
5251
; MISALIGNED-LABEL: merge_vec_extract_stores:

llvm/test/CodeGen/AArch64/storepairsuppress.ll

Lines changed: 24 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
4444
; SUPPRESS-NEXT: fmadd s0, s5, s0, s1
4545
; SUPPRESS-NEXT: fadd s1, s4, s2
4646
; SUPPRESS-NEXT: fadd s5, s0, s3
47-
; SUPPRESS-NEXT: str s1, [x8]
48-
; SUPPRESS-NEXT: str s5, [x8, #4]
47+
; SUPPRESS-NEXT: stp s1, s5, [x8]
4948
; SUPPRESS-NEXT: fsub s2, s2, s4
5049
; SUPPRESS-NEXT: fsub s0, s3, s0
51-
; SUPPRESS-NEXT: str s2, [x8, #8]
52-
; SUPPRESS-NEXT: str s0, [x8, #12]
50+
; SUPPRESS-NEXT: stp s2, s0, [x8, #8]
5351
; SUPPRESS-NEXT: ldr x9, [x0, #8]
5452
; SUPPRESS-NEXT: ldp s3, s4, [x9]
5553
; SUPPRESS-NEXT: ldp s6, s7, [x8, #16]
@@ -60,12 +58,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
6058
; SUPPRESS-NEXT: fmadd s3, s17, s3, s4
6159
; SUPPRESS-NEXT: fadd s4, s16, s6
6260
; SUPPRESS-NEXT: fadd s17, s3, s7
63-
; SUPPRESS-NEXT: str s4, [x8, #16]
64-
; SUPPRESS-NEXT: str s17, [x8, #20]
61+
; SUPPRESS-NEXT: stp s4, s17, [x8, #16]
6562
; SUPPRESS-NEXT: fsub s6, s6, s16
6663
; SUPPRESS-NEXT: fsub s3, s7, s3
67-
; SUPPRESS-NEXT: str s6, [x8, #24]
68-
; SUPPRESS-NEXT: str s3, [x8, #28]
64+
; SUPPRESS-NEXT: stp s6, s3, [x8, #24]
6965
; SUPPRESS-NEXT: ldr x9, [x0, #8]
7066
; SUPPRESS-NEXT: ldp s7, s16, [x9]
7167
; SUPPRESS-NEXT: fmul s18, s16, s17
@@ -74,12 +70,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
7470
; SUPPRESS-NEXT: fmadd s4, s16, s4, s17
7571
; SUPPRESS-NEXT: fadd s16, s7, s1
7672
; SUPPRESS-NEXT: fadd s17, s4, s5
77-
; SUPPRESS-NEXT: str s16, [x8]
78-
; SUPPRESS-NEXT: str s17, [x8, #4]
73+
; SUPPRESS-NEXT: stp s16, s17, [x8]
7974
; SUPPRESS-NEXT: fsub s1, s1, s7
8075
; SUPPRESS-NEXT: fsub s4, s5, s4
81-
; SUPPRESS-NEXT: str s1, [x8, #16]
82-
; SUPPRESS-NEXT: str s4, [x8, #20]
76+
; SUPPRESS-NEXT: stp s1, s4, [x8, #16]
8377
; SUPPRESS-NEXT: ldr x10, [x0, #8]
8478
; SUPPRESS-NEXT: lsl x9, x3, #4
8579
; SUPPRESS-NEXT: add x10, x10, x9
@@ -90,12 +84,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
9084
; SUPPRESS-NEXT: fmadd s3, s4, s6, s3
9185
; SUPPRESS-NEXT: fadd s4, s1, s2
9286
; SUPPRESS-NEXT: fadd s5, s3, s0
93-
; SUPPRESS-NEXT: str s4, [x8, #8]
94-
; SUPPRESS-NEXT: str s5, [x8, #12]
87+
; SUPPRESS-NEXT: stp s4, s5, [x8, #8]
9588
; SUPPRESS-NEXT: fsub s1, s2, s1
9689
; SUPPRESS-NEXT: fsub s0, s0, s3
97-
; SUPPRESS-NEXT: str s1, [x8, #24]
98-
; SUPPRESS-NEXT: str s0, [x8, #28]
90+
; SUPPRESS-NEXT: stp s1, s0, [x8, #24]
9991
; SUPPRESS-NEXT: ldr x10, [x0, #8]
10092
; SUPPRESS-NEXT: ldp s0, s1, [x10]
10193
; SUPPRESS-NEXT: ldp s2, s3, [x8, #32]
@@ -106,12 +98,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
10698
; SUPPRESS-NEXT: fmadd s0, s5, s0, s1
10799
; SUPPRESS-NEXT: fadd s1, s4, s2
108100
; SUPPRESS-NEXT: fadd s5, s0, s3
109-
; SUPPRESS-NEXT: str s1, [x8, #32]
110-
; SUPPRESS-NEXT: str s5, [x8, #36]
101+
; SUPPRESS-NEXT: stp s1, s5, [x8, #32]
111102
; SUPPRESS-NEXT: fsub s2, s2, s4
112103
; SUPPRESS-NEXT: fsub s3, s3, s0
113-
; SUPPRESS-NEXT: str s2, [x8, #40]
114-
; SUPPRESS-NEXT: str s3, [x8, #44]
104+
; SUPPRESS-NEXT: stp s2, s3, [x8, #40]
115105
; SUPPRESS-NEXT: ldr x10, [x0, #8]
116106
; SUPPRESS-NEXT: ldp s0, s4, [x10]
117107
; SUPPRESS-NEXT: ldp s6, s7, [x8, #48]
@@ -122,12 +112,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
122112
; SUPPRESS-NEXT: fmadd s0, s17, s0, s4
123113
; SUPPRESS-NEXT: fadd s4, s16, s6
124114
; SUPPRESS-NEXT: fadd s17, s0, s7
125-
; SUPPRESS-NEXT: str s4, [x8, #48]
126-
; SUPPRESS-NEXT: str s17, [x8, #52]
115+
; SUPPRESS-NEXT: stp s4, s17, [x8, #48]
127116
; SUPPRESS-NEXT: fsub s6, s6, s16
128117
; SUPPRESS-NEXT: fsub s0, s7, s0
129-
; SUPPRESS-NEXT: str s6, [x8, #56]
130-
; SUPPRESS-NEXT: str s0, [x8, #60]
118+
; SUPPRESS-NEXT: stp s6, s0, [x8, #56]
131119
; SUPPRESS-NEXT: ldr x10, [x0, #8]
132120
; SUPPRESS-NEXT: ldp s7, s16, [x10]
133121
; SUPPRESS-NEXT: fmul s18, s16, s17
@@ -136,12 +124,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
136124
; SUPPRESS-NEXT: fmadd s4, s16, s4, s17
137125
; SUPPRESS-NEXT: fadd s16, s7, s1
138126
; SUPPRESS-NEXT: fadd s17, s4, s5
139-
; SUPPRESS-NEXT: str s16, [x8, #32]
140-
; SUPPRESS-NEXT: str s17, [x8, #36]
127+
; SUPPRESS-NEXT: stp s16, s17, [x8, #32]
141128
; SUPPRESS-NEXT: fsub s7, s1, s7
142129
; SUPPRESS-NEXT: fsub s4, s5, s4
143-
; SUPPRESS-NEXT: str s7, [x8, #48]
144-
; SUPPRESS-NEXT: str s4, [x8, #52]
130+
; SUPPRESS-NEXT: stp s7, s4, [x8, #48]
145131
; SUPPRESS-NEXT: ldr x10, [x0, #8]
146132
; SUPPRESS-NEXT: add x9, x10, x9
147133
; SUPPRESS-NEXT: ldp s1, s5, [x9]
@@ -151,12 +137,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
151137
; SUPPRESS-NEXT: fmadd s5, s5, s6, s0
152138
; SUPPRESS-NEXT: fadd s6, s1, s2
153139
; SUPPRESS-NEXT: fadd s18, s5, s3
154-
; SUPPRESS-NEXT: str s6, [x8, #40]
155-
; SUPPRESS-NEXT: str s18, [x8, #44]
140+
; SUPPRESS-NEXT: stp s6, s18, [x8, #40]
156141
; SUPPRESS-NEXT: fsub s0, s2, s1
157142
; SUPPRESS-NEXT: fsub s1, s3, s5
158-
; SUPPRESS-NEXT: str s0, [x8, #56]
159-
; SUPPRESS-NEXT: str s1, [x8, #60]
143+
; SUPPRESS-NEXT: stp s0, s1, [x8, #56]
160144
; SUPPRESS-NEXT: ldr x9, [x0, #8]
161145
; SUPPRESS-NEXT: ldp s2, s3, [x9]
162146
; SUPPRESS-NEXT: ldp s5, s19, [x8]
@@ -166,12 +150,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
166150
; SUPPRESS-NEXT: fmadd s2, s17, s2, s3
167151
; SUPPRESS-NEXT: fadd s3, s16, s5
168152
; SUPPRESS-NEXT: fadd s17, s2, s19
169-
; SUPPRESS-NEXT: str s3, [x8]
170-
; SUPPRESS-NEXT: str s17, [x8, #4]
153+
; SUPPRESS-NEXT: stp s3, s17, [x8]
171154
; SUPPRESS-NEXT: fsub s3, s5, s16
172155
; SUPPRESS-NEXT: fsub s2, s19, s2
173-
; SUPPRESS-NEXT: str s3, [x8, #32]
174-
; SUPPRESS-NEXT: str s2, [x8, #36]
156+
; SUPPRESS-NEXT: stp s3, s2, [x8, #32]
175157
; SUPPRESS-NEXT: ldr x9, [x0, #8]
176158
; SUPPRESS-NEXT: add x9, x9, w3, sxtw #3
177159
; SUPPRESS-NEXT: ldp s2, s3, [x9]
@@ -182,12 +164,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
182164
; SUPPRESS-NEXT: fmadd s2, s18, s2, s3
183165
; SUPPRESS-NEXT: fadd s3, s6, s5
184166
; SUPPRESS-NEXT: fadd s17, s2, s16
185-
; SUPPRESS-NEXT: str s3, [x8, #8]
186-
; SUPPRESS-NEXT: str s17, [x8, #12]
167+
; SUPPRESS-NEXT: stp s3, s17, [x8, #8]
187168
; SUPPRESS-NEXT: fsub s3, s5, s6
188169
; SUPPRESS-NEXT: fsub s2, s16, s2
189-
; SUPPRESS-NEXT: str s3, [x8, #40]
190-
; SUPPRESS-NEXT: str s2, [x8, #44]
170+
; SUPPRESS-NEXT: stp s3, s2, [x8, #40]
191171
; SUPPRESS-NEXT: lsl x9, x3, #33
192172
; SUPPRESS-NEXT: ldr x10, [x0, #8]
193173
; SUPPRESS-NEXT: add x9, x10, x9, asr #29
@@ -199,12 +179,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
199179
; SUPPRESS-NEXT: fmadd s2, s4, s2, s3
200180
; SUPPRESS-NEXT: fadd s3, s7, s5
201181
; SUPPRESS-NEXT: fadd s4, s2, s6
202-
; SUPPRESS-NEXT: str s3, [x8, #16]
203-
; SUPPRESS-NEXT: str s4, [x8, #20]
182+
; SUPPRESS-NEXT: stp s3, s4, [x8, #16]
204183
; SUPPRESS-NEXT: fsub s3, s5, s7
205184
; SUPPRESS-NEXT: fsub s2, s6, s2
206-
; SUPPRESS-NEXT: str s3, [x8, #48]
207-
; SUPPRESS-NEXT: str s2, [x8, #52]
185+
; SUPPRESS-NEXT: stp s3, s2, [x8, #48]
208186
; SUPPRESS-NEXT: add w9, w3, w3, lsl #1
209187
; SUPPRESS-NEXT: ldr x10, [x0, #8]
210188
; SUPPRESS-NEXT: add x9, x10, w9, sxtw #3
@@ -216,12 +194,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
216194
; SUPPRESS-NEXT: fmadd s1, s1, s2, s3
217195
; SUPPRESS-NEXT: fadd s2, s0, s4
218196
; SUPPRESS-NEXT: fadd s3, s1, s5
219-
; SUPPRESS-NEXT: str s2, [x8, #24]
220-
; SUPPRESS-NEXT: str s3, [x8, #28]
197+
; SUPPRESS-NEXT: stp s2, s3, [x8, #24]
221198
; SUPPRESS-NEXT: fsub s0, s4, s0
222199
; SUPPRESS-NEXT: fsub s1, s5, s1
223-
; SUPPRESS-NEXT: str s0, [x8, #56]
224-
; SUPPRESS-NEXT: str s1, [x8, #60]
200+
; SUPPRESS-NEXT: stp s0, s1, [x8, #56]
225201
; SUPPRESS-NEXT: ret
226202
;
227203
; NOSUPPRESS-LABEL: load_store_units_critical:

0 commit comments

Comments
 (0)