-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[CVP]: Fold icmp eq X, C
to trunc X to i1
if C=2k+1 and X in [2k, 2k+1]
#83829
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write If you have received no comments on your PR for a week, you can request a review If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
@llvm/pr-subscribers-llvm-transforms Author: Monad (YanWQ-monad) ChangesFor
we can fold Generally, for
we can fold it to With this fold, RISC-V can eliminate two instructions, while ARM can eliminate one instruction on the hot path. Full diff: https://github.com/llvm/llvm-project/pull/83829.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 490cb7e528eb6f..73be5f0b016603 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -332,6 +332,39 @@ static bool constantFoldCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
return true;
}
+/// Given an icmp `icmp eq X, C`,
+/// if we already know that C is 2k+1 and X is in [2k, 2k+1],
+/// then we can fold it to `trunc X to i1`.
+static bool processEqualityICmp(CmpInst *Cmp, LazyValueInfo *LVI) {
+ if (Cmp->getType()->isVectorTy() ||
+ !Cmp->getOperand(0)->getType()->isIntegerTy() || !Cmp->isEquality())
+ return false;
+
+ Value *Op0 = Cmp->getOperand(0);
+ Value *Op1 = Cmp->getOperand(1);
+ ConstantInt *CI = dyn_cast<ConstantInt>(Op1);
+ if (!CI)
+ return false;
+
+ ConstantRange Range =
+ LVI->getConstantRangeAtUse(Cmp->getOperandUse(0), /*UndefAllowed*/ true);
+ APInt RangeSize = Range.getUpper() - Range.getLower();
+ APInt Value = CI->getValue();
+ if (RangeSize != 2 || !Range.contains(Value))
+ return false;
+
+ bool ShouldBeOdd = Cmp->getPredicate() == ICmpInst::Predicate::ICMP_EQ;
+ if ((CI->getValue() & 1) == ShouldBeOdd) {
+ IRBuilder<> B{Cmp};
+ auto *Trunc = B.CreateTruncOrBitCast(Op0, Cmp->getType());
+ Cmp->replaceAllUsesWith(Trunc);
+ Cmp->eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
if (constantFoldCmp(Cmp, LVI))
return true;
@@ -340,6 +373,9 @@ static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
if (processICmp(ICmp, LVI))
return true;
+ if (processEqualityICmp(Cmp, LVI))
+ return true;
+
return false;
}
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll
index 101820a4c65f23..ccfbc274d570ce 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll
@@ -594,10 +594,10 @@ define void @test_cmp_phi(i8 %a) {
; CHECK-NEXT: br i1 [[C0]], label [[LOOP:%.*]], label [[EXIT:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[P:%.*]] = phi i8 [ [[A]], [[ENTRY:%.*]] ], [ [[B:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 [[P]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i8 [[P]] to i1
; CHECK-NEXT: [[C4:%.*]] = call i1 @get_bool()
; CHECK-NEXT: [[B]] = zext i1 [[C4]] to i8
-; CHECK-NEXT: br i1 [[C1]], label [[LOOP]], label [[EXIT]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -1455,3 +1455,63 @@ entry:
%select = select i1 %cmp1, i1 %cmp2, i1 false
ret i1 %select
}
+
+define i1 @test_icmp_eq_on_valid_bool_range(i8 %x) {
+; CHECK-LABEL: @test_icmp_eq_on_valid_bool_range(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], 2
+; CHECK-NEXT: br i1 [[TMP1]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: [[TMP2:%.*]] = tail call i1 @get_bool()
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i8 [[X]] to i1
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP3]], [[BB1]] ], [ [[TMP2]], [[BB2]] ]
+; CHECK-NEXT: ret i1 [[TMP4]]
+;
+ %1 = icmp ult i8 %x, 2
+ br i1 %1, label %bb1, label %bb2
+
+bb2:
+ %2 = tail call i1 @get_bool()
+ br label %bb3
+
+bb1:
+ %3 = icmp eq i8 %x, 1
+ br label %bb3
+
+bb3:
+ %4 = phi i1 [ %3, %bb1 ], [ %2, %bb2 ]
+ ret i1 %4
+}
+
+define i1 @test_icmp_ne_on_valid_bool_range(i8 %x) {
+; CHECK-LABEL: @test_icmp_ne_on_valid_bool_range(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], 2
+; CHECK-NEXT: br i1 [[TMP1]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: [[TMP2:%.*]] = tail call i1 @get_bool()
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i8 [[X]] to i1
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP3]], [[BB1]] ], [ [[TMP2]], [[BB2]] ]
+; CHECK-NEXT: ret i1 [[TMP4]]
+;
+ %1 = icmp ult i8 %x, 2
+ br i1 %1, label %bb1, label %bb2
+
+bb2:
+ %2 = tail call i1 @get_bool()
+ br label %bb3
+
+bb1:
+ %3 = icmp ne i8 %x, 0
+ br label %bb3
+
+bb3:
+ %4 = phi i1 [ %3, %bb1 ], [ %2, %bb2 ]
+ ret i1 %4
+}
diff --git a/llvm/test/Transforms/JumpThreading/pr33917.ll b/llvm/test/Transforms/JumpThreading/pr33917.ll
index 7d21a4e1781519..20380c769bf173 100644
--- a/llvm/test/Transforms/JumpThreading/pr33917.ll
+++ b/llvm/test/Transforms/JumpThreading/pr33917.ll
@@ -15,16 +15,16 @@ define void @patatino() personality ptr @rust_eh_personality {
; CHECK-LABEL: @patatino(
; CHECK-NEXT: bb9:
; CHECK-NEXT: [[T9:%.*]] = invoke ptr @foo()
-; CHECK-NEXT: to label [[GOOD:%.*]] unwind label [[BAD:%.*]]
+; CHECK-NEXT: to label [[GOOD:%.*]] unwind label [[BAD:%.*]]
; CHECK: bad:
; CHECK-NEXT: [[T10:%.*]] = landingpad { ptr, i32 }
-; CHECK-NEXT: cleanup
+; CHECK-NEXT: cleanup
; CHECK-NEXT: resume { ptr, i32 } [[T10]]
; CHECK: good:
; CHECK-NEXT: [[T11:%.*]] = icmp ne ptr [[T9]], null
; CHECK-NEXT: [[T12:%.*]] = zext i1 [[T11]] to i64
-; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[T12]], 1
-; CHECK-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[DONE:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[T12]] to i1
+; CHECK-NEXT: br i1 [[TMP0]], label [[IF_TRUE:%.*]], label [[DONE:%.*]]
; CHECK: if_true:
; CHECK-NEXT: call void @llvm.assume(i1 [[T11]])
; CHECK-NEXT: br label [[DONE]]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Could you please provide the godbolt link? |
Sure, the link is https://godbolt.org/z/xMTbP94Yn. |
FYI, |
For most of the regression cases, they probably follow this form: - %cmp18 = icmp eq i32 %encoding, 5
- br i1 %cmp18, label %cond.true19, label %cond.false22
+ %17 = and i32 %encoding, 1
+ %.not = icmp eq i32 %17, 0
+ br i1 %.not, label %cond.false22, label %cond.true19 It seems that it's not necessarily regression. In terms of this IR, define i1 @src(i32 %x) {
%y = icmp ne i32 %x, 0
ret i1 %y
}
define i1 @tgt(i32 %x) {
%1 = and i32 %x, 1
%y = icmp ne i32 %1, 0
ret i1 %y
} On x86 and RISC-V, it generates same number of instructions. On AArch64, the number of instructions it generates has reduced by one. For the other regressions, I'm still trying to resolve them. |
@YanWQ-monad Looking at your godbolt, doesn't that still have the useless |
Yes, it seems that ARM backend generates an Putting that aside, the canonicalization of |
Could you create an issue and link back to rust-lang/rust#121673? |
Sure, is it done like this: #84605? |
Relevant fold seems to be: llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp Lines 737 to 745 in a84e66a
Quite the unusual choice, probably worth trying to remove. |
Remove the canonicalization of `trunc` to `i1` according to the suggestion of #83829 (comment) https://github.com/llvm/llvm-project/blob/a84e66a92d7b97f68aa3ae7d2c5839f3fb0d291d/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp#L737-L745 Alive2: https://alive2.llvm.org/ce/z/cacYVA
rebased, and add |
ping? How about only fold those that can be folded to |
For
we can fold
icmp eq i8 %x, 1
totrunc i8 %x to i1
since x is in [0, 1].The alive2 proof is https://alive2.llvm.org/ce/z/sGig3-.
Generally, for
icmp eq X, C
if C = 2k+1 and X is in [2k, 2k+1]icmp ne X, C
if C = 2k and X is in [2k, 2k+1]we can fold it to
trunc X to i1
.With this fold, RISC-V can eliminate two instructions, while ARM can eliminate one instruction on the hot path. Link: https://godbolt.org/z/xMTbP94Yn.
The real-world case: rust-lang/rust#121673