Skip to content

Commit 3083acc

Browse files
authored
[DAGCombine] Remove oneuse restrictions for RISCV in folding (shl (add_nsw x, c1)), c2) and folding (shl(sext(add x, c1)), c2) in some scenarios (#101294)
This patch remove the restriction for folding (shl (add_nsw x, c1)), c2) and folding (shl(sext(add x, c1)), c2), and test case from dhrystone , see this link: riscv32: https://godbolt.org/z/o8GdMKrae riscv64: https://godbolt.org/z/Yh5bPz56z
1 parent 9a06fb7 commit 3083acc

File tree

9 files changed

+384
-127
lines changed

9 files changed

+384
-127
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4304,6 +4304,12 @@ class TargetLowering : public TargetLoweringBase {
43044304
/// @param Level the current DAGCombine legalization level.
43054305
virtual bool isDesirableToCommuteWithShift(const SDNode *N,
43064306
CombineLevel Level) const {
4307+
SDValue ShiftLHS = N->getOperand(0);
4308+
if (!ShiftLHS->hasOneUse())
4309+
return false;
4310+
if (ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
4311+
!ShiftLHS.getOperand(0)->hasOneUse())
4312+
return false;
43074313
return true;
43084314
}
43094315

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10233,7 +10233,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
1023310233
// Variant of version done on multiply, except mul by a power of 2 is turned
1023410234
// into a shift.
1023510235
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10236-
N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
10236+
TLI.isDesirableToCommuteWithShift(N, Level)) {
1023710237
SDValue N01 = N0.getOperand(1);
1023810238
if (SDValue Shl1 =
1023910239
DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
@@ -10252,8 +10252,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
1025210252
// TODO: Should we limit this with isLegalAddImmediate?
1025310253
if (N0.getOpcode() == ISD::SIGN_EXTEND &&
1025410254
N0.getOperand(0).getOpcode() == ISD::ADD &&
10255-
N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
10256-
N0.getOperand(0)->hasOneUse() &&
10255+
N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
1025710256
TLI.isDesirableToCommuteWithShift(N, Level)) {
1025810257
SDValue Add = N0.getOperand(0);
1025910258
SDLoc DL(N0);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17931,6 +17931,13 @@ AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
1793117931
SDValue ShiftLHS = N->getOperand(0);
1793217932
EVT VT = N->getValueType(0);
1793317933

17934+
if (!ShiftLHS->hasOneUse())
17935+
return false;
17936+
17937+
if (ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
17938+
!ShiftLHS.getOperand(0)->hasOneUse())
17939+
return false;
17940+
1793417941
// If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not
1793517942
// combine it with shift 'N' to let it be lowered to UBFX except:
1793617943
// ((x >> C) & mask) << C.

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,6 +1072,15 @@ bool AMDGPUTargetLowering::isDesirableToCommuteWithShift(
10721072
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
10731073
N->getOpcode() == ISD::SRL) &&
10741074
"Expected shift op");
1075+
1076+
SDValue ShiftLHS = N->getOperand(0);
1077+
if (!ShiftLHS->hasOneUse())
1078+
return false;
1079+
1080+
if (ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
1081+
!ShiftLHS.getOperand(0)->hasOneUse())
1082+
return false;
1083+
10751084
// Always commute pre-type legalization and right shifts.
10761085
// We're looking for shl(or(x,y),z) patterns.
10771086
if (Level < CombineLevel::AfterLegalizeTypes ||

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13826,6 +13826,14 @@ ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
1382613826
N->getOpcode() == ISD::SRL) &&
1382713827
"Expected shift op");
1382813828

13829+
SDValue ShiftLHS = N->getOperand(0);
13830+
if (!ShiftLHS->hasOneUse())
13831+
return false;
13832+
13833+
if (ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
13834+
!ShiftLHS.getOperand(0)->hasOneUse())
13835+
return false;
13836+
1382913837
if (Level == BeforeLegalizeTypes)
1383013838
return true;
1383113839

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18233,8 +18233,46 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
1823318233
// (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1823418234
SDValue N0 = N->getOperand(0);
1823518235
EVT Ty = N0.getValueType();
18236+
18237+
// LD/ST will optimize constant Offset extraction, so when AddNode is used by
18238+
// LD/ST, it can still complete the folding optimization operation performed
18239+
// above.
18240+
auto isUsedByLdSt = [&]() {
18241+
bool CanOptAlways = false;
18242+
if (N0->getOpcode() == ISD::ADD && !N0->hasOneUse()) {
18243+
for (SDNode *Use : N0->uses()) {
18244+
// This use is the one we're on right now. Skip it
18245+
if (Use == N || Use->getOpcode() == ISD::SELECT)
18246+
continue;
18247+
if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use)) {
18248+
CanOptAlways = false;
18249+
break;
18250+
}
18251+
CanOptAlways = true;
18252+
}
18253+
}
18254+
18255+
if (N0->getOpcode() == ISD::SIGN_EXTEND &&
18256+
!N0->getOperand(0)->hasOneUse()) {
18257+
for (SDNode *Use : N0->getOperand(0)->uses()) {
18258+
// This use is the one we're on right now. Skip it
18259+
if (Use == N0.getNode() || Use->getOpcode() == ISD::SELECT)
18260+
continue;
18261+
if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use)) {
18262+
CanOptAlways = false;
18263+
break;
18264+
}
18265+
CanOptAlways = true;
18266+
}
18267+
}
18268+
return CanOptAlways;
18269+
};
18270+
1823618271
if (Ty.isScalarInteger() &&
1823718272
(N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
18273+
if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
18274+
return isUsedByLdSt();
18275+
1823818276
auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
1823918277
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
1824018278
if (C1 && C2) {
@@ -18269,6 +18307,15 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
1826918307
return false;
1827018308
}
1827118309
}
18310+
18311+
if (!N0->hasOneUse())
18312+
return false;
18313+
18314+
if (N0->getOpcode() == ISD::SIGN_EXTEND &&
18315+
N0->getOperand(0)->getOpcode() == ISD::ADD &&
18316+
!N0->getOperand(0)->hasOneUse())
18317+
return isUsedByLdSt();
18318+
1827218319
return true;
1827318320
}
1827418321

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefix=RV64 %s
3+
4+
define void @add_sext_shl_moreOneUse_add(ptr %array1, i32 %a, i32 %b) {
5+
; RV64-LABEL: add_sext_shl_moreOneUse_add:
6+
; RV64: # %bb.0: # %entry
7+
; RV64-NEXT: addi a3, a1, 5
8+
; RV64-NEXT: sext.w a1, a1
9+
; RV64-NEXT: slli a1, a1, 2
10+
; RV64-NEXT: add a0, a1, a0
11+
; RV64-NEXT: sw a2, 20(a0)
12+
; RV64-NEXT: sw a2, 24(a0)
13+
; RV64-NEXT: sw a3, 140(a0)
14+
; RV64-NEXT: ret
15+
entry:
16+
%add = add nsw i32 %a, 5
17+
%idxprom = sext i32 %add to i64
18+
%arrayidx = getelementptr inbounds i32, ptr %array1, i64 %idxprom
19+
store i32 %b, ptr %arrayidx
20+
%add3 = add nsw i32 %a, 6
21+
%idxprom4 = sext i32 %add3 to i64
22+
%arrayidx5 = getelementptr inbounds i32, ptr %array1, i64 %idxprom4
23+
store i32 %b, ptr %arrayidx5
24+
%add6 = add nsw i32 %a, 35
25+
%idxprom7 = sext i32 %add6 to i64
26+
%arrayidx8 = getelementptr inbounds i32, ptr %array1, i64 %idxprom7
27+
store i32 %add, ptr %arrayidx8
28+
ret void
29+
}
30+
31+
define void @add_sext_shl_moreOneUse_addexceedsign12(ptr %array1, i32 %a, i32 %b) {
32+
; RV64-LABEL: add_sext_shl_moreOneUse_addexceedsign12:
33+
; RV64: # %bb.0: # %entry
34+
; RV64-NEXT: addi a3, a1, 2047
35+
; RV64-NEXT: lui a4, 2
36+
; RV64-NEXT: sext.w a1, a1
37+
; RV64-NEXT: addi a3, a3, 1
38+
; RV64-NEXT: slli a1, a1, 2
39+
; RV64-NEXT: add a0, a0, a4
40+
; RV64-NEXT: add a0, a0, a1
41+
; RV64-NEXT: sw a2, 0(a0)
42+
; RV64-NEXT: sw a3, 4(a0)
43+
; RV64-NEXT: sw a2, 120(a0)
44+
; RV64-NEXT: ret
45+
entry:
46+
%add = add nsw i32 %a, 2048
47+
%idxprom = sext i32 %add to i64
48+
%arrayidx = getelementptr inbounds i32, ptr %array1, i64 %idxprom
49+
store i32 %b, ptr %arrayidx
50+
%0 = sext i32 %a to i64
51+
%1 = getelementptr i32, ptr %array1, i64 %0
52+
%arrayidx3 = getelementptr i8, ptr %1, i64 8196
53+
store i32 %add, ptr %arrayidx3
54+
%arrayidx6 = getelementptr i8, ptr %1, i64 8312
55+
store i32 %b, ptr %arrayidx6
56+
ret void
57+
}
58+
59+
define void @add_sext_shl_moreOneUse_sext(ptr %array1, i32 %a, i32 %b) {
60+
; RV64-LABEL: add_sext_shl_moreOneUse_sext:
61+
; RV64: # %bb.0: # %entry
62+
; RV64-NEXT: sext.w a1, a1
63+
; RV64-NEXT: addi a3, a1, 5
64+
; RV64-NEXT: slli a1, a1, 2
65+
; RV64-NEXT: add a0, a1, a0
66+
; RV64-NEXT: sw a2, 20(a0)
67+
; RV64-NEXT: sw a2, 24(a0)
68+
; RV64-NEXT: sd a3, 140(a0)
69+
; RV64-NEXT: ret
70+
entry:
71+
%add = add nsw i32 %a, 5
72+
%idxprom = sext i32 %add to i64
73+
%arrayidx = getelementptr inbounds i32, ptr %array1, i64 %idxprom
74+
store i32 %b, ptr %arrayidx
75+
%add3 = add nsw i32 %a, 6
76+
%idxprom4 = sext i32 %add3 to i64
77+
%arrayidx5 = getelementptr inbounds i32, ptr %array1, i64 %idxprom4
78+
store i32 %b, ptr %arrayidx5
79+
%add6 = add nsw i32 %a, 35
80+
%idxprom7 = sext i32 %add6 to i64
81+
%arrayidx8 = getelementptr inbounds i32, ptr %array1, i64 %idxprom7
82+
store i64 %idxprom, ptr %arrayidx8
83+
ret void
84+
}
85+
86+
; test of jumpping, find add's operand has one more use can simplified
87+
define void @add_sext_shl_moreOneUse_add_inSelect(ptr %array1, i32 signext %a, i32 %b, i32 signext %x) {
88+
; RV64-LABEL: add_sext_shl_moreOneUse_add_inSelect:
89+
; RV64: # %bb.0: # %entry
90+
; RV64-NEXT: addi a4, a1, 5
91+
; RV64-NEXT: mv a5, a4
92+
; RV64-NEXT: bgtz a3, .LBB3_2
93+
; RV64-NEXT: # %bb.1: # %entry
94+
; RV64-NEXT: mv a5, a2
95+
; RV64-NEXT: .LBB3_2: # %entry
96+
; RV64-NEXT: slli a1, a1, 2
97+
; RV64-NEXT: add a0, a1, a0
98+
; RV64-NEXT: sw a5, 20(a0)
99+
; RV64-NEXT: sw a5, 24(a0)
100+
; RV64-NEXT: sw a4, 140(a0)
101+
; RV64-NEXT: ret
102+
entry:
103+
%add = add nsw i32 %a, 5
104+
%cmp = icmp sgt i32 %x, 0
105+
%idxprom = sext i32 %add to i64
106+
%arrayidx = getelementptr inbounds i32, ptr %array1, i64 %idxprom
107+
%add.b = select i1 %cmp, i32 %add, i32 %b
108+
store i32 %add.b, ptr %arrayidx
109+
%add5 = add nsw i32 %a, 6
110+
%idxprom6 = sext i32 %add5 to i64
111+
%arrayidx7 = getelementptr inbounds i32, ptr %array1, i64 %idxprom6
112+
store i32 %add.b, ptr %arrayidx7
113+
%add8 = add nsw i32 %a, 35
114+
%idxprom9 = sext i32 %add8 to i64
115+
%arrayidx10 = getelementptr inbounds i32, ptr %array1, i64 %idxprom9
116+
store i32 %add, ptr %arrayidx10
117+
ret void
118+
}
119+
120+
define void @add_sext_shl_moreOneUse_add_inSelect_addexceedsign12(ptr %array1, i32 signext %a, i32 %b, i32 signext %x) {
121+
; RV64-LABEL: add_sext_shl_moreOneUse_add_inSelect_addexceedsign12:
122+
; RV64: # %bb.0: # %entry
123+
; RV64-NEXT: addi a4, a1, 2047
124+
; RV64-NEXT: lui a5, 2
125+
; RV64-NEXT: slli a6, a1, 2
126+
; RV64-NEXT: addi a1, a4, 1
127+
; RV64-NEXT: add a0, a0, a6
128+
; RV64-NEXT: add a0, a0, a5
129+
; RV64-NEXT: mv a4, a1
130+
; RV64-NEXT: bgtz a3, .LBB4_2
131+
; RV64-NEXT: # %bb.1: # %entry
132+
; RV64-NEXT: mv a4, a2
133+
; RV64-NEXT: .LBB4_2: # %entry
134+
; RV64-NEXT: sw a4, 0(a0)
135+
; RV64-NEXT: sw a4, 4(a0)
136+
; RV64-NEXT: sw a1, 120(a0)
137+
; RV64-NEXT: ret
138+
entry:
139+
%add = add nsw i32 %a, 2048
140+
%cmp = icmp sgt i32 %x, 0
141+
%idxprom = sext i32 %add to i64
142+
%arrayidx = getelementptr inbounds i32, ptr %array1, i64 %idxprom
143+
%add.b = select i1 %cmp, i32 %add, i32 %b
144+
store i32 %add.b, ptr %arrayidx
145+
%0 = sext i32 %a to i64
146+
%1 = getelementptr i32, ptr %array1, i64 %0
147+
%arrayidx7 = getelementptr i8, ptr %1, i64 8196
148+
store i32 %add.b, ptr %arrayidx7
149+
%arrayidx10 = getelementptr i8, ptr %1, i64 8312
150+
store i32 %add, ptr %arrayidx10
151+
ret void
152+
}
153+
154+
define void @add_shl_moreOneUse_inSelect(ptr %array1, i64 %a, i64 %b, i64 %x) {
155+
; RV64-LABEL: add_shl_moreOneUse_inSelect:
156+
; RV64: # %bb.0: # %entry
157+
; RV64-NEXT: addi a4, a1, 5
158+
; RV64-NEXT: mv a5, a4
159+
; RV64-NEXT: bgtz a3, .LBB5_2
160+
; RV64-NEXT: # %bb.1: # %entry
161+
; RV64-NEXT: mv a5, a2
162+
; RV64-NEXT: .LBB5_2: # %entry
163+
; RV64-NEXT: slli a1, a1, 3
164+
; RV64-NEXT: add a0, a1, a0
165+
; RV64-NEXT: sd a5, 40(a0)
166+
; RV64-NEXT: sd a5, 48(a0)
167+
; RV64-NEXT: sd a4, 280(a0)
168+
; RV64-NEXT: ret
169+
entry:
170+
%add = add nsw i64 %a, 5
171+
%cmp = icmp sgt i64 %x, 0
172+
%spec.select = select i1 %cmp, i64 %add, i64 %b
173+
%0 = getelementptr inbounds i64, ptr %array1, i64 %add
174+
store i64 %spec.select, ptr %0
175+
%add3 = add nsw i64 %a, 6
176+
%arrayidx4 = getelementptr inbounds i64, ptr %array1, i64 %add3
177+
store i64 %spec.select, ptr %arrayidx4
178+
%add5 = add nsw i64 %a, 35
179+
%arrayidx6 = getelementptr inbounds i64, ptr %array1, i64 %add5
180+
store i64 %add, ptr %arrayidx6
181+
ret void
182+
}

0 commit comments

Comments
 (0)