Skip to content

Commit 97a04ec

Browse files
lukel97paulhuggett
authored andcommitted
[RISCV] Don't commute with shift if it would break sh{1,2,3}add pattern (llvm#119527)
This fixes a regression from llvm#101294 by checking if we might be clobbering a sh{1,2,3}add pattern. Only do this is the underlying add isn't going to be folded away into an address offset.
1 parent 06d3d81 commit 97a04ec

File tree

3 files changed

+446
-138
lines changed

3 files changed

+446
-138
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18386,6 +18386,15 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
1838618386

1838718387
auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
1838818388
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18389+
18390+
// Bail if we might break a sh{1,2,3}add pattern.
18391+
if (Subtarget.hasStdExtZba() && C2->getZExtValue() >= 1 &&
18392+
C2->getZExtValue() <= 3 && N->hasOneUse() &&
18393+
N->user_begin()->getOpcode() == ISD::ADD &&
18394+
!isUsedByLdSt(*N->user_begin(), nullptr) &&
18395+
!isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
18396+
return false;
18397+
1838918398
if (C1 && C2) {
1839018399
const APInt &C1Int = C1->getAPIntValue();
1839118400
APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
Lines changed: 245 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,28 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2-
; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefix=RV64 %s
2+
; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefixes=RV64,NO-ZBA %s
3+
; RUN: llc -mtriple=riscv64 -mattr=+zba < %s | FileCheck -check-prefixes=RV64,ZBA %s
34

45
define void @add_sext_shl_moreOneUse_add(ptr %array1, i32 %a, i32 %b) {
5-
; RV64-LABEL: add_sext_shl_moreOneUse_add:
6-
; RV64: # %bb.0: # %entry
7-
; RV64-NEXT: addi a3, a1, 5
8-
; RV64-NEXT: sext.w a1, a1
9-
; RV64-NEXT: slli a1, a1, 2
10-
; RV64-NEXT: add a0, a1, a0
11-
; RV64-NEXT: sw a2, 20(a0)
12-
; RV64-NEXT: sw a2, 24(a0)
13-
; RV64-NEXT: sw a3, 140(a0)
14-
; RV64-NEXT: ret
6+
; NO-ZBA-LABEL: add_sext_shl_moreOneUse_add:
7+
; NO-ZBA: # %bb.0: # %entry
8+
; NO-ZBA-NEXT: addi a3, a1, 5
9+
; NO-ZBA-NEXT: sext.w a1, a1
10+
; NO-ZBA-NEXT: slli a1, a1, 2
11+
; NO-ZBA-NEXT: add a0, a1, a0
12+
; NO-ZBA-NEXT: sw a2, 20(a0)
13+
; NO-ZBA-NEXT: sw a2, 24(a0)
14+
; NO-ZBA-NEXT: sw a3, 140(a0)
15+
; NO-ZBA-NEXT: ret
16+
;
17+
; ZBA-LABEL: add_sext_shl_moreOneUse_add:
18+
; ZBA: # %bb.0: # %entry
19+
; ZBA-NEXT: addi a3, a1, 5
20+
; ZBA-NEXT: sext.w a1, a1
21+
; ZBA-NEXT: sh2add a0, a1, a0
22+
; ZBA-NEXT: sw a2, 20(a0)
23+
; ZBA-NEXT: sw a2, 24(a0)
24+
; ZBA-NEXT: sw a3, 140(a0)
25+
; ZBA-NEXT: ret
1526
entry:
1627
%add = add nsw i32 %a, 5
1728
%idxprom = sext i32 %add to i64
@@ -29,19 +40,32 @@ entry:
2940
}
3041

3142
define void @add_sext_shl_moreOneUse_addexceedsign12(ptr %array1, i32 %a, i32 %b) {
32-
; RV64-LABEL: add_sext_shl_moreOneUse_addexceedsign12:
33-
; RV64: # %bb.0: # %entry
34-
; RV64-NEXT: addi a3, a1, 2047
35-
; RV64-NEXT: lui a4, 2
36-
; RV64-NEXT: sext.w a1, a1
37-
; RV64-NEXT: addi a3, a3, 1
38-
; RV64-NEXT: slli a1, a1, 2
39-
; RV64-NEXT: add a0, a0, a4
40-
; RV64-NEXT: add a0, a0, a1
41-
; RV64-NEXT: sw a2, 0(a0)
42-
; RV64-NEXT: sw a3, 4(a0)
43-
; RV64-NEXT: sw a2, 120(a0)
44-
; RV64-NEXT: ret
43+
; NO-ZBA-LABEL: add_sext_shl_moreOneUse_addexceedsign12:
44+
; NO-ZBA: # %bb.0: # %entry
45+
; NO-ZBA-NEXT: addi a3, a1, 2047
46+
; NO-ZBA-NEXT: lui a4, 2
47+
; NO-ZBA-NEXT: sext.w a1, a1
48+
; NO-ZBA-NEXT: addi a3, a3, 1
49+
; NO-ZBA-NEXT: slli a1, a1, 2
50+
; NO-ZBA-NEXT: add a0, a0, a4
51+
; NO-ZBA-NEXT: add a0, a0, a1
52+
; NO-ZBA-NEXT: sw a2, 0(a0)
53+
; NO-ZBA-NEXT: sw a3, 4(a0)
54+
; NO-ZBA-NEXT: sw a2, 120(a0)
55+
; NO-ZBA-NEXT: ret
56+
;
57+
; ZBA-LABEL: add_sext_shl_moreOneUse_addexceedsign12:
58+
; ZBA: # %bb.0: # %entry
59+
; ZBA-NEXT: addi a3, a1, 2047
60+
; ZBA-NEXT: lui a4, 2
61+
; ZBA-NEXT: sext.w a1, a1
62+
; ZBA-NEXT: addi a3, a3, 1
63+
; ZBA-NEXT: sh2add a0, a1, a0
64+
; ZBA-NEXT: add a0, a0, a4
65+
; ZBA-NEXT: sw a2, 0(a0)
66+
; ZBA-NEXT: sw a3, 4(a0)
67+
; ZBA-NEXT: sw a2, 120(a0)
68+
; ZBA-NEXT: ret
4569
entry:
4670
%add = add nsw i32 %a, 2048
4771
%idxprom = sext i32 %add to i64
@@ -57,16 +81,26 @@ entry:
5781
}
5882

5983
define void @add_sext_shl_moreOneUse_sext(ptr %array1, i32 %a, i32 %b) {
60-
; RV64-LABEL: add_sext_shl_moreOneUse_sext:
61-
; RV64: # %bb.0: # %entry
62-
; RV64-NEXT: sext.w a1, a1
63-
; RV64-NEXT: addi a3, a1, 5
64-
; RV64-NEXT: slli a1, a1, 2
65-
; RV64-NEXT: add a0, a1, a0
66-
; RV64-NEXT: sw a2, 20(a0)
67-
; RV64-NEXT: sw a2, 24(a0)
68-
; RV64-NEXT: sd a3, 140(a0)
69-
; RV64-NEXT: ret
84+
; NO-ZBA-LABEL: add_sext_shl_moreOneUse_sext:
85+
; NO-ZBA: # %bb.0: # %entry
86+
; NO-ZBA-NEXT: sext.w a1, a1
87+
; NO-ZBA-NEXT: addi a3, a1, 5
88+
; NO-ZBA-NEXT: slli a1, a1, 2
89+
; NO-ZBA-NEXT: add a0, a1, a0
90+
; NO-ZBA-NEXT: sw a2, 20(a0)
91+
; NO-ZBA-NEXT: sw a2, 24(a0)
92+
; NO-ZBA-NEXT: sd a3, 140(a0)
93+
; NO-ZBA-NEXT: ret
94+
;
95+
; ZBA-LABEL: add_sext_shl_moreOneUse_sext:
96+
; ZBA: # %bb.0: # %entry
97+
; ZBA-NEXT: sext.w a1, a1
98+
; ZBA-NEXT: addi a3, a1, 5
99+
; ZBA-NEXT: sh2add a0, a1, a0
100+
; ZBA-NEXT: sw a2, 20(a0)
101+
; ZBA-NEXT: sw a2, 24(a0)
102+
; ZBA-NEXT: sd a3, 140(a0)
103+
; ZBA-NEXT: ret
70104
entry:
71105
%add = add nsw i32 %a, 5
72106
%idxprom = sext i32 %add to i64
@@ -85,20 +119,34 @@ entry:
85119

86120
; test of jumpping, find add's operand has one more use can simplified
87121
define void @add_sext_shl_moreOneUse_add_inSelect(ptr %array1, i32 signext %a, i32 %b, i32 signext %x) {
88-
; RV64-LABEL: add_sext_shl_moreOneUse_add_inSelect:
89-
; RV64: # %bb.0: # %entry
90-
; RV64-NEXT: addi a4, a1, 5
91-
; RV64-NEXT: mv a5, a4
92-
; RV64-NEXT: bgtz a3, .LBB3_2
93-
; RV64-NEXT: # %bb.1: # %entry
94-
; RV64-NEXT: mv a5, a2
95-
; RV64-NEXT: .LBB3_2: # %entry
96-
; RV64-NEXT: slli a1, a1, 2
97-
; RV64-NEXT: add a0, a1, a0
98-
; RV64-NEXT: sw a5, 20(a0)
99-
; RV64-NEXT: sw a5, 24(a0)
100-
; RV64-NEXT: sw a4, 140(a0)
101-
; RV64-NEXT: ret
122+
; NO-ZBA-LABEL: add_sext_shl_moreOneUse_add_inSelect:
123+
; NO-ZBA: # %bb.0: # %entry
124+
; NO-ZBA-NEXT: addi a4, a1, 5
125+
; NO-ZBA-NEXT: mv a5, a4
126+
; NO-ZBA-NEXT: bgtz a3, .LBB3_2
127+
; NO-ZBA-NEXT: # %bb.1: # %entry
128+
; NO-ZBA-NEXT: mv a5, a2
129+
; NO-ZBA-NEXT: .LBB3_2: # %entry
130+
; NO-ZBA-NEXT: slli a1, a1, 2
131+
; NO-ZBA-NEXT: add a0, a1, a0
132+
; NO-ZBA-NEXT: sw a5, 20(a0)
133+
; NO-ZBA-NEXT: sw a5, 24(a0)
134+
; NO-ZBA-NEXT: sw a4, 140(a0)
135+
; NO-ZBA-NEXT: ret
136+
;
137+
; ZBA-LABEL: add_sext_shl_moreOneUse_add_inSelect:
138+
; ZBA: # %bb.0: # %entry
139+
; ZBA-NEXT: addi a4, a1, 5
140+
; ZBA-NEXT: mv a5, a4
141+
; ZBA-NEXT: bgtz a3, .LBB3_2
142+
; ZBA-NEXT: # %bb.1: # %entry
143+
; ZBA-NEXT: mv a5, a2
144+
; ZBA-NEXT: .LBB3_2: # %entry
145+
; ZBA-NEXT: sh2add a0, a1, a0
146+
; ZBA-NEXT: sw a5, 20(a0)
147+
; ZBA-NEXT: sw a5, 24(a0)
148+
; ZBA-NEXT: sw a4, 140(a0)
149+
; ZBA-NEXT: ret
102150
entry:
103151
%add = add nsw i32 %a, 5
104152
%cmp = icmp sgt i32 %x, 0
@@ -118,23 +166,40 @@ entry:
118166
}
119167

120168
define void @add_sext_shl_moreOneUse_add_inSelect_addexceedsign12(ptr %array1, i32 signext %a, i32 %b, i32 signext %x) {
121-
; RV64-LABEL: add_sext_shl_moreOneUse_add_inSelect_addexceedsign12:
122-
; RV64: # %bb.0: # %entry
123-
; RV64-NEXT: addi a4, a1, 2047
124-
; RV64-NEXT: lui a5, 2
125-
; RV64-NEXT: slli a6, a1, 2
126-
; RV64-NEXT: addi a1, a4, 1
127-
; RV64-NEXT: add a0, a0, a6
128-
; RV64-NEXT: add a0, a0, a5
129-
; RV64-NEXT: mv a4, a1
130-
; RV64-NEXT: bgtz a3, .LBB4_2
131-
; RV64-NEXT: # %bb.1: # %entry
132-
; RV64-NEXT: mv a4, a2
133-
; RV64-NEXT: .LBB4_2: # %entry
134-
; RV64-NEXT: sw a4, 0(a0)
135-
; RV64-NEXT: sw a4, 4(a0)
136-
; RV64-NEXT: sw a1, 120(a0)
137-
; RV64-NEXT: ret
169+
; NO-ZBA-LABEL: add_sext_shl_moreOneUse_add_inSelect_addexceedsign12:
170+
; NO-ZBA: # %bb.0: # %entry
171+
; NO-ZBA-NEXT: addi a4, a1, 2047
172+
; NO-ZBA-NEXT: lui a5, 2
173+
; NO-ZBA-NEXT: slli a6, a1, 2
174+
; NO-ZBA-NEXT: addi a1, a4, 1
175+
; NO-ZBA-NEXT: add a0, a0, a6
176+
; NO-ZBA-NEXT: add a0, a0, a5
177+
; NO-ZBA-NEXT: mv a4, a1
178+
; NO-ZBA-NEXT: bgtz a3, .LBB4_2
179+
; NO-ZBA-NEXT: # %bb.1: # %entry
180+
; NO-ZBA-NEXT: mv a4, a2
181+
; NO-ZBA-NEXT: .LBB4_2: # %entry
182+
; NO-ZBA-NEXT: sw a4, 0(a0)
183+
; NO-ZBA-NEXT: sw a4, 4(a0)
184+
; NO-ZBA-NEXT: sw a1, 120(a0)
185+
; NO-ZBA-NEXT: ret
186+
;
187+
; ZBA-LABEL: add_sext_shl_moreOneUse_add_inSelect_addexceedsign12:
188+
; ZBA: # %bb.0: # %entry
189+
; ZBA-NEXT: addi a4, a1, 2047
190+
; ZBA-NEXT: lui a5, 2
191+
; ZBA-NEXT: addi a4, a4, 1
192+
; ZBA-NEXT: sh2add a0, a1, a0
193+
; ZBA-NEXT: add a0, a0, a5
194+
; ZBA-NEXT: mv a1, a4
195+
; ZBA-NEXT: bgtz a3, .LBB4_2
196+
; ZBA-NEXT: # %bb.1: # %entry
197+
; ZBA-NEXT: mv a1, a2
198+
; ZBA-NEXT: .LBB4_2: # %entry
199+
; ZBA-NEXT: sw a1, 0(a0)
200+
; ZBA-NEXT: sw a1, 4(a0)
201+
; ZBA-NEXT: sw a4, 120(a0)
202+
; ZBA-NEXT: ret
138203
entry:
139204
%add = add nsw i32 %a, 2048
140205
%cmp = icmp sgt i32 %x, 0
@@ -152,20 +217,34 @@ entry:
152217
}
153218

154219
define void @add_shl_moreOneUse_inSelect(ptr %array1, i64 %a, i64 %b, i64 %x) {
155-
; RV64-LABEL: add_shl_moreOneUse_inSelect:
156-
; RV64: # %bb.0: # %entry
157-
; RV64-NEXT: addi a4, a1, 5
158-
; RV64-NEXT: mv a5, a4
159-
; RV64-NEXT: bgtz a3, .LBB5_2
160-
; RV64-NEXT: # %bb.1: # %entry
161-
; RV64-NEXT: mv a5, a2
162-
; RV64-NEXT: .LBB5_2: # %entry
163-
; RV64-NEXT: slli a1, a1, 3
164-
; RV64-NEXT: add a0, a1, a0
165-
; RV64-NEXT: sd a5, 40(a0)
166-
; RV64-NEXT: sd a5, 48(a0)
167-
; RV64-NEXT: sd a4, 280(a0)
168-
; RV64-NEXT: ret
220+
; NO-ZBA-LABEL: add_shl_moreOneUse_inSelect:
221+
; NO-ZBA: # %bb.0: # %entry
222+
; NO-ZBA-NEXT: addi a4, a1, 5
223+
; NO-ZBA-NEXT: mv a5, a4
224+
; NO-ZBA-NEXT: bgtz a3, .LBB5_2
225+
; NO-ZBA-NEXT: # %bb.1: # %entry
226+
; NO-ZBA-NEXT: mv a5, a2
227+
; NO-ZBA-NEXT: .LBB5_2: # %entry
228+
; NO-ZBA-NEXT: slli a1, a1, 3
229+
; NO-ZBA-NEXT: add a0, a1, a0
230+
; NO-ZBA-NEXT: sd a5, 40(a0)
231+
; NO-ZBA-NEXT: sd a5, 48(a0)
232+
; NO-ZBA-NEXT: sd a4, 280(a0)
233+
; NO-ZBA-NEXT: ret
234+
;
235+
; ZBA-LABEL: add_shl_moreOneUse_inSelect:
236+
; ZBA: # %bb.0: # %entry
237+
; ZBA-NEXT: addi a4, a1, 5
238+
; ZBA-NEXT: mv a5, a4
239+
; ZBA-NEXT: bgtz a3, .LBB5_2
240+
; ZBA-NEXT: # %bb.1: # %entry
241+
; ZBA-NEXT: mv a5, a2
242+
; ZBA-NEXT: .LBB5_2: # %entry
243+
; ZBA-NEXT: sh3add a0, a1, a0
244+
; ZBA-NEXT: sd a5, 40(a0)
245+
; ZBA-NEXT: sd a5, 48(a0)
246+
; ZBA-NEXT: sd a4, 280(a0)
247+
; ZBA-NEXT: ret
169248
entry:
170249
%add = add nsw i64 %a, 5
171250
%cmp = icmp sgt i64 %x, 0
@@ -180,3 +259,90 @@ entry:
180259
store i64 %add, ptr %arrayidx6
181260
ret void
182261
}
262+
263+
define i64 @add_shl_moreOneUse_sh1add(i64 %x) {
264+
; NO-ZBA-LABEL: add_shl_moreOneUse_sh1add:
265+
; NO-ZBA: # %bb.0:
266+
; NO-ZBA-NEXT: ori a1, a0, 1
267+
; NO-ZBA-NEXT: slli a0, a0, 1
268+
; NO-ZBA-NEXT: ori a0, a0, 2
269+
; NO-ZBA-NEXT: add a0, a0, a1
270+
; NO-ZBA-NEXT: ret
271+
;
272+
; ZBA-LABEL: add_shl_moreOneUse_sh1add:
273+
; ZBA: # %bb.0:
274+
; ZBA-NEXT: ori a0, a0, 1
275+
; ZBA-NEXT: sh1add a0, a0, a0
276+
; ZBA-NEXT: ret
277+
%or = or i64 %x, 1
278+
%mul = shl i64 %or, 1
279+
%add = add i64 %mul, %or
280+
ret i64 %add
281+
}
282+
283+
define i64 @add_shl_moreOneUse_sh2add(i64 %x) {
284+
; NO-ZBA-LABEL: add_shl_moreOneUse_sh2add:
285+
; NO-ZBA: # %bb.0:
286+
; NO-ZBA-NEXT: ori a1, a0, 1
287+
; NO-ZBA-NEXT: slli a0, a0, 2
288+
; NO-ZBA-NEXT: ori a0, a0, 4
289+
; NO-ZBA-NEXT: add a0, a0, a1
290+
; NO-ZBA-NEXT: ret
291+
;
292+
; ZBA-LABEL: add_shl_moreOneUse_sh2add:
293+
; ZBA: # %bb.0:
294+
; ZBA-NEXT: ori a0, a0, 1
295+
; ZBA-NEXT: sh2add a0, a0, a0
296+
; ZBA-NEXT: ret
297+
%or = or i64 %x, 1
298+
%mul = shl i64 %or, 2
299+
%add = add i64 %mul, %or
300+
ret i64 %add
301+
}
302+
303+
define i64 @add_shl_moreOneUse_sh3add(i64 %x) {
304+
; NO-ZBA-LABEL: add_shl_moreOneUse_sh3add:
305+
; NO-ZBA: # %bb.0:
306+
; NO-ZBA-NEXT: ori a1, a0, 1
307+
; NO-ZBA-NEXT: slli a0, a0, 3
308+
; NO-ZBA-NEXT: ori a0, a0, 8
309+
; NO-ZBA-NEXT: add a0, a0, a1
310+
; NO-ZBA-NEXT: ret
311+
;
312+
; ZBA-LABEL: add_shl_moreOneUse_sh3add:
313+
; ZBA: # %bb.0:
314+
; ZBA-NEXT: ori a0, a0, 1
315+
; ZBA-NEXT: sh3add a0, a0, a0
316+
; ZBA-NEXT: ret
317+
%or = or i64 %x, 1
318+
%mul = shl i64 %or, 3
319+
%add = add i64 %mul, %or
320+
ret i64 %add
321+
}
322+
323+
define i64 @add_shl_moreOneUse_sh4add(i64 %x) {
324+
; RV64-LABEL: add_shl_moreOneUse_sh4add:
325+
; RV64: # %bb.0:
326+
; RV64-NEXT: ori a1, a0, 1
327+
; RV64-NEXT: slli a0, a0, 4
328+
; RV64-NEXT: ori a0, a0, 16
329+
; RV64-NEXT: add a0, a0, a1
330+
; RV64-NEXT: ret
331+
%or = or i64 %x, 1
332+
%mul = shl i64 %or, 4
333+
%add = add i64 %mul, %or
334+
ret i64 %add
335+
}
336+
337+
define i64 @add_shl_rhs_constant(i64 %x, i64 %y) {
338+
; RV64-LABEL: add_shl_rhs_constant:
339+
; RV64: # %bb.0:
340+
; RV64-NEXT: add a0, a0, a1
341+
; RV64-NEXT: slli a0, a0, 3
342+
; RV64-NEXT: ret
343+
%a = add i64 %x, 1
344+
%b = add i64 %y, %a
345+
%c = shl i64 %b, 3
346+
%d = add i64 %c, -8
347+
ret i64 %d
348+
}

0 commit comments

Comments
 (0)