Skip to content

[AArch64] - Fold and and cmp into tst #110347

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4301,6 +4301,29 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(1));
}

// Converts SETCC (AND X Y) Z ULT -> SETCC (AND X (Y & ~(Z - 1)) 0 EQ when Y is
// a power of 2. This is then lowered to ANDS X (Y & ~(Z - 1)) instead of SUBS
// (AND X Y) Z which produces a better opt with EmitComparison
static void simplifySetCCIntoEq(ISD::CondCode &CC, SDValue &LHS, SDValue &RHS,
SelectionDAG &DAG, const SDLoc dl) {
if (CC == ISD::SETULT && LHS.getOpcode() == ISD::AND && LHS->hasOneUse()) {
ConstantSDNode *LHSConstOp = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
if (LHSConstOp && RHSConst) {
uint64_t LHSConstValue = LHSConstOp->getZExtValue();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LLVM Capitalized variable names, so maybe use LHSConstValue and RHSConstValue. Same for NewMaskValue below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies, fixed it.

uint64_t RHSConstant = RHSConst->getZExtValue();
if (isPowerOf2_64(RHSConstant)) {
uint64_t NewMaskValue = LHSConstValue & ~(RHSConstant - 1);
LHS =
DAG.getNode(ISD::AND, dl, LHS.getValueType(), LHS.getOperand(0),
DAG.getConstant(NewMaskValue, dl, LHS.getValueType()));
RHS = DAG.getConstant(0, dl, RHS.getValueType());
CC = ISD::SETEQ;
}
}
}
}

SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
Expand Down Expand Up @@ -10589,6 +10612,9 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
}

if (LHS.getValueType().isInteger()) {

simplifySetCCIntoEq(CC, LHS, RHS, DAG, dl);

SDValue CCVal;
SDValue Cmp = getAArch64Cmp(
LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
Expand Down
216 changes: 216 additions & 0 deletions llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s


define i1 @lt8_u8(i8 %0) {
; CHECK-LABEL: lt8_u8:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0xf8
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%2 = icmp ult i8 %0, 8
ret i1 %2
}

define i1 @lt32_u8(i8 %0) {
; CHECK-LABEL: lt32_u8:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0xe0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%2 = icmp ult i8 %0, 32
ret i1 %2
}

define i1 @lt64_u8(i8 %0) {
; CHECK-LABEL: lt64_u8:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0xc0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%2 = icmp ult i8 %0, 64
ret i1 %2
}

define i1 @lt8_u32(i32 %0) {
; CHECK-LABEL: lt8_u32:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp w0, #8
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%2 = icmp ult i32 %0, 8
ret i1 %2
}

define i1 @lt32_u32(i32 %0) {
; CHECK-LABEL: lt32_u32:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp w0, #32
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%2 = icmp ult i32 %0, 32
ret i1 %2
}

define i1 @lt64_u32(i32 %0) {
; CHECK-LABEL: lt64_u32:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp w0, #64
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%2 = icmp ult i32 %0, 64
ret i1 %2
}

define i1 @lt8_u64(i64 %0) {
; CHECK-LABEL: lt8_u64:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #8
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%2 = icmp ult i64 %0, 8
ret i1 %2
}

define i1 @lt32_u64(i64 %0) {
; CHECK-LABEL: lt32_u64:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #32
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%2 = icmp ult i64 %0, 32
ret i1 %2
}

define i1 @lt64_u64(i64 %0) {
; CHECK-LABEL: lt64_u64:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #64
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%2 = icmp ult i64 %0, 64
ret i1 %2
}

define i1 @lt8_u16_and_5(i8 %0) {
; CHECK-LABEL: lt8_u16_and_5:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%2 = and i8 %0, 5
%3 = icmp ult i8 %2, 16
ret i1 %3
}

define i1 @lt8_u16_and_19(i8 %0) {
; CHECK-LABEL: lt8_u16_and_19:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x10
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%2 = and i8 %0, 19
%3 = icmp ult i8 %2, 16
ret i1 %3
}

define i1 @lt32_u16_and_7(i32 %0) {
; CHECK-LABEL: lt32_u16_and_7:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%2 = and i32 %0, 7
%3 = icmp ult i32 %2, 16
ret i1 %3
}

define i1 @lt32_u16_and_21(i32 %0) {
; CHECK-LABEL: lt32_u16_and_21:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x10
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%2 = and i32 %0, 21
%3 = icmp ult i32 %2, 16
ret i1 %3
}

define i1 @lt64_u16_and_9(i64 %0) {
; CHECK-LABEL: lt64_u16_and_9:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%2 = and i64 %0, 9
%3 = icmp ult i64 %2, 16
ret i1 %3
}

define i1 @lt64_u16_and_23(i64 %0) {
; CHECK-LABEL: lt64_u16_and_23:
; CHECK: // %bb.0:
; CHECK-NEXT: tst x0, #0x10
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%2 = and i64 %0, 23
%3 = icmp ult i64 %2, 16
ret i1 %3
}

; negative test
define i1 @lt3_u8(i8 %0) {
; CHECK-LABEL: lt3_u8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: cmp w8, #3
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%2 = icmp ult i8 %0, 3
ret i1 %2
}

; negative test
define i1 @lt3_u32(i32 %0) {
; CHECK-LABEL: lt3_u32:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp w0, #3
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%2 = icmp ult i32 %0, 3
ret i1 %2
}

; negative test
define i1 @lt3_u64(i64 %0) {
; CHECK-LABEL: lt3_u64:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x0, #3
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
%2 = icmp ult i64 %0, 3
ret i1 %2
}

; negative test
define i32 @lt32_u16_multiple_use(i32 %0) {
; CHECK-LABEL: lt32_u16_multiple_use:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #21 // =0x15
; CHECK-NEXT: mov w9, #10 // =0xa
; CHECK-NEXT: and w8, w0, w8
; CHECK-NEXT: cmp w8, #16
; CHECK-NEXT: orr w8, w8, w9
; CHECK-NEXT: cset w10, lo
; CHECK-NEXT: mul w0, w8, w10
; CHECK-NEXT: ret
%2 = and i32 %0, 21
%3 = icmp ult i32 %2, 16
%4 = add i32 %2, 10
%5 = zext i1 %3 to i32
%6 = mul i32 %4, %5
ret i32 %6
}
25 changes: 10 additions & 15 deletions llvm/test/CodeGen/AArch64/signed-truncation-check.ll
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,8 @@ define i1 @add_ultcmp_bad_i16_i8_add(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: add_ultcmp_bad_i16_i8_add:
; CHECK: // %bb.0:
; CHECK-NEXT: add w8, w0, w1
; CHECK-NEXT: and w8, w8, #0xffff
; CHECK-NEXT: cmp w8, #256
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: tst w8, #0xff00
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i16 %x, %y
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
Expand Down Expand Up @@ -328,9 +327,8 @@ define i1 @add_ultcmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind {
; CHECK-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo:
; CHECK: // %bb.0:
; CHECK-NEXT: add w8, w0, #192
; CHECK-NEXT: and w8, w8, #0xffff
; CHECK-NEXT: cmp w8, #256
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: tst w8, #0xff00
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i16 %x, 192 ; (1U << (8-1)) + (1U << (8-1-1))
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
Expand All @@ -356,9 +354,8 @@ define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind {
; CHECK-LABEL: add_ultcmp_bad_i16_i8_magic:
; CHECK: // %bb.0:
; CHECK-NEXT: add w8, w0, #64
; CHECK-NEXT: and w8, w8, #0xffff
; CHECK-NEXT: cmp w8, #256
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: tst w8, #0xff00
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i16 %x, 64 ; 1U << (8-1-1)
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
Expand All @@ -370,9 +367,8 @@ define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind {
; CHECK-LABEL: add_ultcmp_bad_i16_i4:
; CHECK: // %bb.0:
; CHECK-NEXT: add w8, w0, #8
; CHECK-NEXT: and w8, w8, #0xffff
; CHECK-NEXT: cmp w8, #16
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: tst w8, #0xfff0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i16 %x, 8 ; 1U << (4-1)
%tmp1 = icmp ult i16 %tmp0, 16 ; 1U << 4
Expand All @@ -384,9 +380,8 @@ define i1 @add_ultcmp_bad_i24_i8(i24 %x) nounwind {
; CHECK-LABEL: add_ultcmp_bad_i24_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: add w8, w0, #128
; CHECK-NEXT: and w8, w8, #0xffffff
; CHECK-NEXT: cmp w8, #256
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: tst w8, #0xffff00
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i24 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i24 %tmp0, 256 ; 1U << 8
Expand Down
Loading