Skip to content

Commit 32f911f

Browse files
el-evdtcxzyw
andauthored
[InstCombine] Fold ceil(X / (2 ^ C)) == 0 -> X == 0 (#143683)
Co-authored-by: Yingwei Zheng <[email protected]>
1 parent 76ae9aa commit 32f911f

File tree

4 files changed

+347
-0
lines changed

4 files changed

+347
-0
lines changed

llvm/include/llvm/Analysis/ValueTracking.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -999,6 +999,11 @@ LLVM_ABI void
999999
findValuesAffectedByCondition(Value *Cond, bool IsAssume,
10001000
function_ref<void(Value *)> InsertAffected);
10011001

1002+
/// Returns the inner value X if the expression has the form f(X)
1003+
/// where f(X) == 0 if and only if X == 0, otherwise returns nullptr.
1004+
LLVM_ABI Value *stripNullTest(Value *V);
1005+
LLVM_ABI const Value *stripNullTest(const Value *V);
1006+
10021007
} // end namespace llvm
10031008

10041009
#endif // LLVM_ANALYSIS_VALUETRACKING_H

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3521,6 +3521,9 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
35213521
isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT))
35223522
return true;
35233523

3524+
if (const Value *Stripped = stripNullTest(V))
3525+
return isKnownNonZero(Stripped, DemandedElts, Q, Depth);
3526+
35243527
return false;
35253528
}
35263529

@@ -10170,3 +10173,26 @@ void llvm::findValuesAffectedByCondition(
1017010173
}
1017110174
}
1017210175
}
10176+
10177+
const Value *llvm::stripNullTest(const Value *V) {
10178+
// (X >> C) or/add (X & mask(C) != 0)
10179+
if (const auto *BO = dyn_cast<BinaryOperator>(V)) {
10180+
if (BO->getOpcode() == Instruction::Add ||
10181+
BO->getOpcode() == Instruction::Or) {
10182+
const Value *X;
10183+
const APInt *C1, *C2;
10184+
if (match(BO, m_c_BinOp(m_LShr(m_Value(X), m_APInt(C1)),
10185+
m_ZExt(m_SpecificICmp(
10186+
ICmpInst::ICMP_NE,
10187+
m_And(m_Deferred(X), m_LowBitMask(C2)),
10188+
m_Zero())))) &&
10189+
C2->popcount() == C1->getZExtValue())
10190+
return X;
10191+
}
10192+
}
10193+
return nullptr;
10194+
}
10195+
10196+
Value *llvm::stripNullTest(Value *V) {
10197+
return const_cast<Value *>(stripNullTest(const_cast<const Value *>(V)));
10198+
}

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1298,6 +1298,14 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
12981298
// eq/ne (mul X, Y)) with (icmp eq/ne X/Y) and if X/Y is known non-zero that
12991299
// will fold to a constant elsewhere.
13001300
}
1301+
1302+
// (icmp eq/ne f(X), 0) -> (icmp eq/ne X, 0)
1303+
// where f(X) == 0 if and only if X == 0
1304+
if (ICmpInst::isEquality(Pred))
1305+
if (Value *Stripped = stripNullTest(Cmp.getOperand(0)))
1306+
return new ICmpInst(Pred, Stripped,
1307+
Constant::getNullValue(Stripped->getType()));
1308+
13011309
return nullptr;
13021310
}
13031311

Lines changed: 308 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,308 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3+
4+
define i1 @ceil_shift4(i32 %arg0) {
5+
; CHECK-LABEL: define i1 @ceil_shift4(
6+
; CHECK-SAME: i32 [[ARG0:%.*]]) {
7+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[ARG0]], 0
8+
; CHECK-NEXT: ret i1 [[TMP1]]
9+
;
10+
%quot = lshr i32 %arg0, 4
11+
%rem = and i32 %arg0, 15
12+
%has_rem = icmp ne i32 %rem, 0
13+
%zext_has_rem = zext i1 %has_rem to i32
14+
%quot_or_rem = or i32 %quot, %zext_has_rem
15+
%is_zero = icmp eq i32 %quot_or_rem, 0
16+
ret i1 %is_zero
17+
}
18+
19+
define i1 @ceil_shift4_add(i32 %arg0) {
20+
; CHECK-LABEL: define i1 @ceil_shift4_add(
21+
; CHECK-SAME: i32 [[ARG0:%.*]]) {
22+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[ARG0]], 0
23+
; CHECK-NEXT: ret i1 [[TMP6]]
24+
;
25+
%quot = lshr i32 %arg0, 4
26+
%rem = and i32 %arg0, 15
27+
%has_rem = icmp ne i32 %rem, 0
28+
%zext_has_rem = zext i1 %has_rem to i32
29+
%ceil = add i32 %quot, %zext_has_rem
30+
%res = icmp eq i32 %ceil, 0
31+
ret i1 %res
32+
}
33+
34+
define i1 @ceil_shift6(i32 %arg0) {
35+
; CHECK-LABEL: define i1 @ceil_shift6(
36+
; CHECK-SAME: i32 [[ARG0:%.*]]) {
37+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[ARG0]], 0
38+
; CHECK-NEXT: ret i1 [[TMP1]]
39+
;
40+
%quot = lshr i32 %arg0, 6
41+
%rem = and i32 %arg0, 63
42+
%has_rem = icmp ne i32 %rem, 0
43+
%zext_has_rem = zext i1 %has_rem to i32
44+
%quot_or_rem = or i32 %quot, %zext_has_rem
45+
%res = icmp eq i32 %quot_or_rem, 0
46+
ret i1 %res
47+
}
48+
49+
define i1 @ceil_shift6_ne(i32 %arg0) {
50+
; CHECK-LABEL: define i1 @ceil_shift6_ne(
51+
; CHECK-SAME: i32 [[ARG0:%.*]]) {
52+
; CHECK-NEXT: [[RES:%.*]] = icmp ne i32 [[ARG0]], 0
53+
; CHECK-NEXT: ret i1 [[RES]]
54+
;
55+
%quot = lshr i32 %arg0, 6
56+
%rem = and i32 %arg0, 63
57+
%has_rem = icmp ne i32 %rem, 0
58+
%zext_has_rem = zext i1 %has_rem to i32
59+
%quot_or_rem = or i32 %quot, %zext_has_rem
60+
%res = icmp ne i32 %quot_or_rem, 0
61+
ret i1 %res
62+
}
63+
64+
define i1 @ceil_shift11(i32 %arg0) {
65+
; CHECK-LABEL: define i1 @ceil_shift11(
66+
; CHECK-SAME: i32 [[ARG0:%.*]]) {
67+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[ARG0]], 0
68+
; CHECK-NEXT: ret i1 [[TMP1]]
69+
;
70+
%quot = lshr i32 %arg0, 11
71+
%rem = and i32 %arg0, 2047
72+
%has_rem = icmp ne i32 %rem, 0
73+
%zext_has_rem = zext i1 %has_rem to i32
74+
%quot_or_rem = or i32 %quot, %zext_has_rem
75+
%res = icmp eq i32 %quot_or_rem, 0
76+
ret i1 %res
77+
}
78+
79+
define i1 @ceil_shift11_ne(i32 %arg0) {
80+
; CHECK-LABEL: define i1 @ceil_shift11_ne(
81+
; CHECK-SAME: i32 [[ARG0:%.*]]) {
82+
; CHECK-NEXT: [[RES:%.*]] = icmp ne i32 [[ARG0]], 0
83+
; CHECK-NEXT: ret i1 [[RES]]
84+
;
85+
%quot = lshr i32 %arg0, 6
86+
%rem = and i32 %arg0, 63
87+
%has_rem = icmp ne i32 %rem, 0
88+
%zext_has_rem = zext i1 %has_rem to i32
89+
%quot_or_rem = or i32 %quot, %zext_has_rem
90+
%res = icmp ne i32 %quot_or_rem, 0
91+
ret i1 %res
92+
}
93+
94+
define i1 @ceil_shift0(i32 %arg0) {
95+
; CHECK-LABEL: define i1 @ceil_shift0(
96+
; CHECK-SAME: i32 [[ARG0:%.*]]) {
97+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[ARG0]], 0
98+
; CHECK-NEXT: ret i1 [[TMP1]]
99+
;
100+
%quot = lshr i32 %arg0, 0
101+
%rem = and i32 %arg0, 0
102+
%has_rem = icmp ne i32 %rem, 0
103+
%zext_has_rem = zext i1 %has_rem to i32
104+
%quot_or_rem = or i32 %quot, %zext_has_rem
105+
%res = icmp eq i32 %quot_or_rem, 0
106+
ret i1 %res
107+
}
108+
109+
define i1 @ceil_shift4_comm(i32 %arg0) {
110+
; CHECK-LABEL: define i1 @ceil_shift4_comm(
111+
; CHECK-SAME: i32 [[ARG0:%.*]]) {
112+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[ARG0]], 0
113+
; CHECK-NEXT: ret i1 [[TMP6]]
114+
;
115+
%quot = lshr i32 %arg0, 4
116+
%rem = and i32 %arg0, 15
117+
%has_rem = icmp ne i32 %rem, 0
118+
%zext_has_rem = zext i1 %has_rem to i32
119+
%quot_or_rem = or i32 %zext_has_rem, %quot
120+
%res = icmp eq i32 %quot_or_rem, 0
121+
ret i1 %res
122+
}
123+
124+
declare void @use(i32)
125+
126+
define i1 @ceil_shift4_used_1(i32 %arg0) {
127+
; CHECK-LABEL: define i1 @ceil_shift4_used_1(
128+
; CHECK-SAME: i32 [[ARG0:%.*]]) {
129+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[ARG0]], 4
130+
; CHECK-NEXT: call void @use(i32 [[TMP1]])
131+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[ARG0]], 0
132+
; CHECK-NEXT: ret i1 [[TMP6]]
133+
;
134+
%quot = lshr i32 %arg0, 4
135+
call void @use(i32 %quot)
136+
%rem = and i32 %arg0, 15
137+
%has_rem = icmp ne i32 %rem, 0
138+
%zext_has_rem = zext i1 %has_rem to i32
139+
%quot_or_rem = or i32 %quot, %zext_has_rem
140+
%res = icmp eq i32 %quot_or_rem, 0
141+
ret i1 %res
142+
}
143+
144+
define i1 @ceil_shift4_used_5(i32 %arg0) {
145+
; CHECK-LABEL: define i1 @ceil_shift4_used_5(
146+
; CHECK-SAME: i32 [[ARG0:%.*]]) {
147+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[ARG0]], 4
148+
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[ARG0]], 15
149+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
150+
; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
151+
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP1]], [[TMP4]]
152+
; CHECK-NEXT: call void @use(i32 [[TMP5]])
153+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[ARG0]], 0
154+
; CHECK-NEXT: ret i1 [[TMP6]]
155+
;
156+
%quot = lshr i32 %arg0, 4
157+
%rem = and i32 %arg0, 15
158+
%has_rem = icmp ne i32 %rem, 0
159+
%zext_has_rem = zext i1 %has_rem to i32
160+
%quot_or_rem = or i32 %quot, %zext_has_rem
161+
call void @use(i32 %quot_or_rem)
162+
%res = icmp eq i32 %quot_or_rem, 0
163+
ret i1 %res
164+
}
165+
166+
define i1 @ceil_shift4_used_add_nuw_nsw(i32 %arg0) {
167+
; CHECK-LABEL: define i1 @ceil_shift4_used_add_nuw_nsw(
168+
; CHECK-SAME: i32 [[ARG0:%.*]]) {
169+
; CHECK-NEXT: [[QUOT:%.*]] = lshr i32 [[ARG0]], 4
170+
; CHECK-NEXT: [[REM:%.*]] = and i32 [[ARG0]], 15
171+
; CHECK-NEXT: [[HAS_REM:%.*]] = icmp ne i32 [[REM]], 0
172+
; CHECK-NEXT: [[ZEXT_HAS_REM:%.*]] = zext i1 [[HAS_REM]] to i32
173+
; CHECK-NEXT: [[CEIL:%.*]] = add nuw nsw i32 [[QUOT]], [[ZEXT_HAS_REM]]
174+
; CHECK-NEXT: call void @use(i32 [[CEIL]])
175+
; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[ARG0]], 0
176+
; CHECK-NEXT: ret i1 [[RES]]
177+
;
178+
%quot = lshr i32 %arg0, 4
179+
%rem = and i32 %arg0, 15
180+
%has_rem = icmp ne i32 %rem, 0
181+
%zext_has_rem = zext i1 %has_rem to i32
182+
%ceil = add nuw nsw i32 %quot, %zext_has_rem
183+
call void @use(i32 %ceil)
184+
%res = icmp eq i32 %ceil, 0
185+
ret i1 %res
186+
}
187+
188+
define <4 x i1> @ceil_shift4_v4i32(<4 x i32> %arg0) {
189+
; CHECK-LABEL: define <4 x i1> @ceil_shift4_v4i32(
190+
; CHECK-SAME: <4 x i32> [[ARG0:%.*]]) {
191+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[ARG0]], zeroinitializer
192+
; CHECK-NEXT: ret <4 x i1> [[TMP1]]
193+
;
194+
%quot = lshr <4 x i32> %arg0, splat (i32 16)
195+
%rem = and <4 x i32> %arg0, splat (i32 65535)
196+
%has_rem = icmp ne <4 x i32> %rem, zeroinitializer
197+
%zext_has_rem = zext <4 x i1> %has_rem to <4 x i32>
198+
%quot_or_rem = or <4 x i32> %quot, %zext_has_rem
199+
%res = icmp eq <4 x i32> %quot_or_rem, zeroinitializer
200+
ret <4 x i1> %res
201+
}
202+
203+
define <8 x i1> @ceil_shift4_v8i16(<8 x i16> %arg0) {
204+
; CHECK-LABEL: define <8 x i1> @ceil_shift4_v8i16(
205+
; CHECK-SAME: <8 x i16> [[ARG0:%.*]]) {
206+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[ARG0]], zeroinitializer
207+
; CHECK-NEXT: ret <8 x i1> [[TMP1]]
208+
;
209+
%quot = lshr <8 x i16> %arg0, splat (i16 4)
210+
%rem = and <8 x i16> %arg0, splat (i16 15)
211+
%has_rem = icmp ne <8 x i16> %rem, zeroinitializer
212+
%zext_has_rem = zext <8 x i1> %has_rem to <8 x i16>
213+
%quot_or_rem = or <8 x i16> %quot, %zext_has_rem
214+
%res = icmp eq <8 x i16> %quot_or_rem, zeroinitializer
215+
ret <8 x i1> %res
216+
}
217+
218+
; negative tests
219+
220+
define i1 @ceil_shift_not_mask_1(i32 %arg0) {
221+
; CHECK-LABEL: define i1 @ceil_shift_not_mask_1(
222+
; CHECK-SAME: i32 [[ARG0:%.*]]) {
223+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[ARG0]], 4
224+
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[ARG0]], 31
225+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
226+
; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
227+
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP1]], [[TMP4]]
228+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
229+
; CHECK-NEXT: ret i1 [[TMP6]]
230+
;
231+
%quot = lshr i32 %arg0, 4
232+
%rem = and i32 %arg0, 31
233+
%has_rem = icmp ne i32 %rem, 0
234+
%zext_has_rem = zext i1 %has_rem to i32
235+
%quot_or_rem = or i32 %quot, %zext_has_rem
236+
%res = icmp eq i32 %quot_or_rem, 0
237+
ret i1 %res
238+
}
239+
240+
define i1 @ceil_shift_not_mask_2(i32 %arg0) {
241+
; CHECK-LABEL: define i1 @ceil_shift_not_mask_2(
242+
; CHECK-SAME: i32 [[ARG0:%.*]]) {
243+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[ARG0]], 5
244+
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[ARG0]], 15
245+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
246+
; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
247+
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP1]], [[TMP4]]
248+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
249+
; CHECK-NEXT: ret i1 [[TMP6]]
250+
;
251+
%quot = lshr i32 %arg0, 5
252+
%rem = and i32 %arg0, 15
253+
%has_rem = icmp ne i32 %rem, 0
254+
%zext_has_rem = zext i1 %has_rem to i32
255+
%quot_or_rem = or i32 %quot, %zext_has_rem
256+
%res = icmp eq i32 %quot_or_rem, 0
257+
ret i1 %res
258+
}
259+
260+
define i1 @ceil_shift_not_add_or(i32 %arg0) {
261+
; CHECK-LABEL: define i1 @ceil_shift_not_add_or(
262+
; CHECK-SAME: i32 [[ARG0:%.*]]) {
263+
; CHECK-NEXT: [[REM:%.*]] = and i32 [[ARG0]], 15
264+
; CHECK-NEXT: [[HAS_REM_NOT:%.*]] = icmp eq i32 [[REM]], 0
265+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARG0]], 32
266+
; CHECK-NEXT: [[RES1:%.*]] = icmp eq i32 [[TMP1]], 0
267+
; CHECK-NEXT: [[RES:%.*]] = or i1 [[HAS_REM_NOT]], [[RES1]]
268+
; CHECK-NEXT: ret i1 [[RES]]
269+
;
270+
%quot = lshr i32 %arg0, 5
271+
%rem = and i32 %arg0, 15
272+
%has_rem = icmp ne i32 %rem, 0
273+
%zext_has_rem = zext i1 %has_rem to i32
274+
%quot_and_rem = and i32 %quot, %zext_has_rem
275+
%res = icmp eq i32 %quot_and_rem, 0
276+
ret i1 %res
277+
}
278+
279+
define i32 @ceil_shift_should_infer_ge_zero(i32 %x) {
280+
; CHECK-LABEL: define i32 @ceil_shift_should_infer_ge_zero(
281+
; CHECK-SAME: i32 [[X:%.*]]) {
282+
; CHECK-NEXT: [[COND_NOT:%.*]] = icmp eq i32 [[X]], 0
283+
; CHECK-NEXT: br i1 [[COND_NOT]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
284+
; CHECK: [[IF_THEN]]:
285+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X]], 20
286+
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[X]], 1048575
287+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
288+
; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
289+
; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP4]]
290+
; CHECK-NEXT: ret i32 [[TMP5]]
291+
; CHECK: [[IF_ELSE]]:
292+
; CHECK-NEXT: ret i32 0
293+
;
294+
%cond = icmp ne i32 %x, 0
295+
br i1 %cond, label %if.then, label %if.else
296+
297+
if.then:
298+
%quot = lshr i32 %x, 20
299+
%rem = and i32 %x, 1048575
300+
%has_rem = icmp ne i32 %rem, 0
301+
%zext_has_rem = zext i1 %has_rem to i32
302+
%ceil = add nuw nsw i32 %quot, %zext_has_rem
303+
%max = call i32 @llvm.umax.i32(i32 %ceil, i32 1)
304+
ret i32 %max
305+
306+
if.else:
307+
ret i32 0
308+
}

0 commit comments

Comments
 (0)