Skip to content

Commit f400daa

Browse files
committed
[InstCombine] limit zext-of-icmp folds to bit-hacks
In the changed tests, we avoid creating extra instructions, and there are no obvious regressions in IR tests at least. Codegen should be able to create the shift+mask form if that is profitable. This is a more general fix for issue #59897 than 0eedc9e .
1 parent a4f3b23 commit f400daa

File tree

2 files changed

+9
-11
lines changed

2 files changed

+9
-11
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1024,7 +1024,9 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
10241024
// zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
10251025
// zext (X != 0) to i32 --> X iff X has only the low bit set.
10261026
// zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
1027-
if (Op1CV->isZero() && Cmp->isEquality()) {
1027+
if (Op1CV->isZero() && Cmp->isEquality() &&
1028+
(Cmp->getOperand(0)->getType() == Zext.getType() ||
1029+
Cmp->getPredicate() == ICmpInst::ICMP_NE)) {
10281030
// If Op1C some other power of two, convert:
10291031
KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
10301032

llvm/test/Transforms/InstCombine/zext.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -645,10 +645,9 @@ define i64 @and_trunc_extra_use1_wider_src(i65 %x, i32 %y) {
645645

646646
define i16 @zext_icmp_eq0_pow2(i32 %x) {
647647
; CHECK-LABEL: @zext_icmp_eq0_pow2(
648-
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
649-
; CHECK-NEXT: [[TMP2:%.*]] = lshr i16 [[TMP1]], 2
650-
; CHECK-NEXT: [[TMP3:%.*]] = and i16 [[TMP2]], 1
651-
; CHECK-NEXT: [[Z:%.*]] = xor i16 [[TMP3]], 1
648+
; CHECK-NEXT: [[M:%.*]] = and i32 [[X:%.*]], 4
649+
; CHECK-NEXT: [[I:%.*]] = icmp eq i32 [[M]], 0
650+
; CHECK-NEXT: [[Z:%.*]] = zext i1 [[I]] to i16
652651
; CHECK-NEXT: ret i16 [[Z]]
653652
;
654653
%m = and i32 %x, 4
@@ -661,9 +660,8 @@ define i16 @zext_icmp_eq0_pow2_use1(i32 %x) {
661660
; CHECK-LABEL: @zext_icmp_eq0_pow2_use1(
662661
; CHECK-NEXT: [[M:%.*]] = and i32 [[X:%.*]], 4
663662
; CHECK-NEXT: call void @use32(i32 [[M]])
664-
; CHECK-NEXT: [[M_LOBIT:%.*]] = lshr exact i32 [[M]], 2
665-
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[M_LOBIT]] to i16
666-
; CHECK-NEXT: [[Z:%.*]] = xor i16 [[TMP1]], 1
663+
; CHECK-NEXT: [[I:%.*]] = icmp eq i32 [[M]], 0
664+
; CHECK-NEXT: [[Z:%.*]] = zext i1 [[I]] to i16
667665
; CHECK-NEXT: ret i16 [[Z]]
668666
;
669667
%m = and i32 %x, 4
@@ -678,9 +676,7 @@ define i16 @zext_icmp_eq0_pow2_use2(i32 %x) {
678676
; CHECK-NEXT: [[M:%.*]] = and i32 [[X:%.*]], 4
679677
; CHECK-NEXT: [[I:%.*]] = icmp eq i32 [[M]], 0
680678
; CHECK-NEXT: call void @use1(i1 [[I]])
681-
; CHECK-NEXT: [[M_LOBIT:%.*]] = lshr exact i32 [[M]], 2
682-
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[M_LOBIT]] to i16
683-
; CHECK-NEXT: [[Z:%.*]] = xor i16 [[TMP1]], 1
679+
; CHECK-NEXT: [[Z:%.*]] = zext i1 [[I]] to i16
684680
; CHECK-NEXT: ret i16 [[Z]]
685681
;
686682
%m = and i32 %x, 4

0 commit comments

Comments
 (0)