Skip to content

Commit 411cf42

Browse files
authored
Backport "eq of parts" fold with logical ops (#113)
* [InstCombine] Add tests for "eq of parts" with logical op (NFC) We currently only handle this with a bitwise and/or instruction, but not a logical. (cherry picked from commit be4b836) * [InstCombine] Perform "eq of parts" fold with logical ops The pattern matched here is too complex for the general logical and/or to bitwise and/or conversion to trigger. However, the fold is poison-safe, so match it with a select root as well: https://alive2.llvm.org/ce/z/vNzzSg https://alive2.llvm.org/ce/z/Beyumt (cherry picked from commit fafe5a6)
1 parent a93e47b commit 411cf42

File tree

4 files changed

+63
-6
lines changed

4 files changed

+63
-6
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -1113,8 +1113,8 @@ static Value *extractIntPart(const IntPart &P, IRBuilderBase &Builder) {
11131113
/// (icmp eq X0, Y0) & (icmp eq X1, Y1) -> icmp eq X01, Y01
11141114
/// (icmp ne X0, Y0) | (icmp ne X1, Y1) -> icmp ne X01, Y01
11151115
/// where X0, X1 and Y0, Y1 are adjacent parts extracted from an integer.
1116-
static Value *foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd,
1117-
InstCombiner::BuilderTy &Builder) {
1116+
Value *InstCombinerImpl::foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1,
1117+
bool IsAnd) {
11181118
if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
11191119
return nullptr;
11201120

@@ -1262,7 +1262,7 @@ Value *InstCombinerImpl::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
12621262
foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/true, Q, Builder))
12631263
return X;
12641264

1265-
if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/true, Builder))
1265+
if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/true))
12661266
return X;
12671267

12681268
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
@@ -2496,7 +2496,7 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
24962496
foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/false, Q, Builder))
24972497
return X;
24982498

2499-
if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/false, Builder))
2499+
if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/false))
25002500
return X;
25012501

25022502
// (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

+2
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
347347
Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Or);
348348
Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Xor);
349349

350+
Value *foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd);
351+
350352
/// Optimize (fcmp)&(fcmp) or (fcmp)|(fcmp).
351353
/// NOTE: Unlike most of instcombine, this returns a Value which should
352354
/// already be inserted into the function.

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

+7-2
Original file line numberDiff line numberDiff line change
@@ -2754,11 +2754,16 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
27542754
/* IsAnd */ IsAnd))
27552755
return I;
27562756

2757-
if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal))
2758-
if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1))
2757+
if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal)) {
2758+
if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1)) {
27592759
if (auto *V = foldAndOrOfICmpsOfAndWithPow2(ICmp0, ICmp1, &SI, IsAnd,
27602760
/* IsLogical */ true))
27612761
return replaceInstUsesWith(SI, V);
2762+
2763+
if (auto *V = foldEqOfParts(ICmp0, ICmp1, IsAnd))
2764+
return replaceInstUsesWith(SI, V);
2765+
}
2766+
}
27622767
}
27632768

27642769
// select (select a, true, b), c, false -> select a, c, false

llvm/test/Transforms/InstCombine/eq-of-parts.ll

+50
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,31 @@ define i1 @eq_21_extra_use_eq2(i32 %x, i32 %y) {
352352
ret i1 %c.210
353353
}
354354

355+
; Logical and instead of bitwise and.
356+
357+
define i1 @eq_21_logical(i32 %x, i32 %y) {
358+
; CHECK-LABEL: @eq_21_logical(
359+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 8
360+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
361+
; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[Y:%.*]], 8
362+
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
363+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i16 [[TMP2]], [[TMP4]]
364+
; CHECK-NEXT: ret i1 [[TMP5]]
365+
;
366+
%x.321 = lshr i32 %x, 8
367+
%x.1 = trunc i32 %x.321 to i8
368+
%x.32 = lshr i32 %x, 16
369+
%x.2 = trunc i32 %x.32 to i8
370+
%y.321 = lshr i32 %y, 8
371+
%y.1 = trunc i32 %y.321 to i8
372+
%y.32 = lshr i32 %y, 16
373+
%y.2 = trunc i32 %y.32 to i8
374+
%c.1 = icmp eq i8 %x.1, %y.1
375+
%c.2 = icmp eq i8 %x.2, %y.2
376+
%c.210 = select i1 %c.2, i1 %c.1, i1 false
377+
ret i1 %c.210
378+
}
379+
355380
; Negative tests.
356381

357382
define i1 @eq_21_wrong_op1(i32 %x, i32 %y, i32 %z) {
@@ -992,6 +1017,31 @@ define i1 @ne_21_extra_use_ne2(i32 %x, i32 %y) {
9921017
ret i1 %c.210
9931018
}
9941019

1020+
; Logical or instead of bitwise or.
1021+
1022+
define i1 @ne_21_logical(i32 %x, i32 %y) {
1023+
; CHECK-LABEL: @ne_21_logical(
1024+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 8
1025+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
1026+
; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[Y:%.*]], 8
1027+
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1028+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP2]], [[TMP4]]
1029+
; CHECK-NEXT: ret i1 [[TMP5]]
1030+
;
1031+
%x.321 = lshr i32 %x, 8
1032+
%x.1 = trunc i32 %x.321 to i8
1033+
%x.32 = lshr i32 %x, 16
1034+
%x.2 = trunc i32 %x.32 to i8
1035+
%y.321 = lshr i32 %y, 8
1036+
%y.1 = trunc i32 %y.321 to i8
1037+
%y.32 = lshr i32 %y, 16
1038+
%y.2 = trunc i32 %y.32 to i8
1039+
%c.1 = icmp ne i8 %x.1, %y.1
1040+
%c.2 = icmp ne i8 %x.2, %y.2
1041+
%c.210 = select i1 %c.2, i1 true, i1 %c.1
1042+
ret i1 %c.210
1043+
}
1044+
9951045
; Negative tests.
9961046

9971047
define i1 @ne_21_wrong_op1(i32 %x, i32 %y, i32 %z) {

0 commit comments

Comments
 (0)