Skip to content

Commit fafe5a6

Browse files
committed
[InstCombine] Perform "eq of parts" fold with logical ops
The pattern matched here is too complex for the general logical and/or to bitwise and/or conversion to trigger. However, the fold is poison-safe, so match it with a select root as well: https://alive2.llvm.org/ce/z/vNzzSg https://alive2.llvm.org/ce/z/Beyumt
1 parent be4b836 commit fafe5a6

File tree

4 files changed

+25
-30
lines changed

4 files changed

+25
-30
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -1113,8 +1113,8 @@ static Value *extractIntPart(const IntPart &P, IRBuilderBase &Builder) {
11131113
/// (icmp eq X0, Y0) & (icmp eq X1, Y1) -> icmp eq X01, Y01
11141114
/// (icmp ne X0, Y0) | (icmp ne X1, Y1) -> icmp ne X01, Y01
11151115
/// where X0, X1 and Y0, Y1 are adjacent parts extracted from an integer.
1116-
static Value *foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd,
1117-
InstCombiner::BuilderTy &Builder) {
1116+
Value *InstCombinerImpl::foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1,
1117+
bool IsAnd) {
11181118
if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
11191119
return nullptr;
11201120

@@ -1262,7 +1262,7 @@ Value *InstCombinerImpl::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
12621262
foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/true, Q, Builder))
12631263
return X;
12641264

1265-
if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/true, Builder))
1265+
if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/true))
12661266
return X;
12671267

12681268
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
@@ -2496,7 +2496,7 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
24962496
foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/false, Q, Builder))
24972497
return X;
24982498

2499-
if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/false, Builder))
2499+
if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/false))
25002500
return X;
25012501

25022502
// (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

+2
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
348348
Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Or);
349349
Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Xor);
350350

351+
Value *foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd);
352+
351353
/// Optimize (fcmp)&(fcmp) or (fcmp)|(fcmp).
352354
/// NOTE: Unlike most of instcombine, this returns a Value which should
353355
/// already be inserted into the function.

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

+7-2
Original file line numberDiff line numberDiff line change
@@ -2755,11 +2755,16 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
27552755
/* IsAnd */ IsAnd))
27562756
return I;
27572757

2758-
if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal))
2759-
if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1))
2758+
if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal)) {
2759+
if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1)) {
27602760
if (auto *V = foldAndOrOfICmpsOfAndWithPow2(ICmp0, ICmp1, &SI, IsAnd,
27612761
/* IsLogical */ true))
27622762
return replaceInstUsesWith(SI, V);
2763+
2764+
if (auto *V = foldEqOfParts(ICmp0, ICmp1, IsAnd))
2765+
return replaceInstUsesWith(SI, V);
2766+
}
2767+
}
27632768
}
27642769

27652770
// select (select a, true, b), c, false -> select a, c, false

llvm/test/Transforms/InstCombine/eq-of-parts.ll

+12-24
Original file line numberDiff line numberDiff line change
@@ -356,18 +356,12 @@ define i1 @eq_21_extra_use_eq2(i32 %x, i32 %y) {
356356

357357
define i1 @eq_21_logical(i32 %x, i32 %y) {
358358
; CHECK-LABEL: @eq_21_logical(
359-
; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8
360-
; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8
361-
; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16
362-
; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8
363-
; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8
364-
; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8
365-
; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16
366-
; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8
367-
; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[X_1]], [[Y_1]]
368-
; CHECK-NEXT: [[C_2:%.*]] = icmp eq i8 [[X_2]], [[Y_2]]
369-
; CHECK-NEXT: [[C_210:%.*]] = select i1 [[C_2]], i1 [[C_1]], i1 false
370-
; CHECK-NEXT: ret i1 [[C_210]]
359+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 8
360+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
361+
; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[Y:%.*]], 8
362+
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
363+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i16 [[TMP2]], [[TMP4]]
364+
; CHECK-NEXT: ret i1 [[TMP5]]
371365
;
372366
%x.321 = lshr i32 %x, 8
373367
%x.1 = trunc i32 %x.321 to i8
@@ -1027,18 +1021,12 @@ define i1 @ne_21_extra_use_ne2(i32 %x, i32 %y) {
10271021

10281022
define i1 @ne_21_logical(i32 %x, i32 %y) {
10291023
; CHECK-LABEL: @ne_21_logical(
1030-
; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8
1031-
; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8
1032-
; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16
1033-
; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8
1034-
; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8
1035-
; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8
1036-
; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16
1037-
; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8
1038-
; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]]
1039-
; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]]
1040-
; CHECK-NEXT: [[C_210:%.*]] = select i1 [[C_2]], i1 true, i1 [[C_1]]
1041-
; CHECK-NEXT: ret i1 [[C_210]]
1024+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 8
1025+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
1026+
; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[Y:%.*]], 8
1027+
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
1028+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP2]], [[TMP4]]
1029+
; CHECK-NEXT: ret i1 [[TMP5]]
10421030
;
10431031
%x.321 = lshr i32 %x, 8
10441032
%x.1 = trunc i32 %x.321 to i8

0 commit comments

Comments
 (0)