From 0ca5720aa20a45ba9d14ac4760df9251a49a39fe Mon Sep 17 00:00:00 2001 From: Jorge Botto Date: Wed, 2 Oct 2024 22:26:02 +0100 Subject: [PATCH 1/3] Precommit test --- llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll | 227 ++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll diff --git a/llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll b/llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll new file mode 100644 index 0000000000000..a5d7d1cc0ceb7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll @@ -0,0 +1,227 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s + + +define i1 @lt8_u8(i8 %0) { +; CHECK-LABEL: lt8_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: cmp w8, #8 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = icmp ult i8 %0, 8 + ret i1 %2 +} + +define i1 @lt32_u8(i8 %0) { +; CHECK-LABEL: lt32_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: cmp w8, #32 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = icmp ult i8 %0, 32 + ret i1 %2 +} + +define i1 @lt64_u8(i8 %0) { +; CHECK-LABEL: lt64_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: cmp w8, #64 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = icmp ult i8 %0, 64 + ret i1 %2 +} + +define i1 @lt8_u32(i32 %0) { +; CHECK-LABEL: lt8_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w0, #8 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = icmp ult i32 %0, 8 + ret i1 %2 +} + +define i1 @lt32_u32(i32 %0) { +; CHECK-LABEL: lt32_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w0, #32 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = icmp ult i32 %0, 32 + ret i1 %2 +} + +define i1 @lt64_u32(i32 %0) { +; CHECK-LABEL: lt64_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w0, #64 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = icmp ult i32 %0, 64 + ret i1 %2 +} + +define i1 @lt8_u64(i64 %0) { +; CHECK-LABEL: lt8_u64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #8 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = icmp ult i64 %0, 8 + ret i1 %2 +} + +define i1 @lt32_u64(i64 %0) { +; CHECK-LABEL: lt32_u64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #32 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = icmp ult i64 %0, 32 + ret i1 %2 +} + +define i1 @lt64_u64(i64 %0) { +; CHECK-LABEL: lt64_u64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #64 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = icmp ult i64 %0, 64 + ret i1 %2 +} + +define i1 @lt8_u16_and_5(i8 %0) { +; CHECK-LABEL: lt8_u16_and_5: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #5 // =0x5 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: cmp w8, #16 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = and i8 %0, 5 + %3 = icmp ult i8 %2, 16 + ret i1 %3 +} + +define i1 @lt8_u16_and_19(i8 %0) { +; CHECK-LABEL: lt8_u16_and_19: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #19 // =0x13 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: cmp w8, #16 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = and i8 %0, 19 + %3 = icmp ult i8 %2, 16 + ret i1 %3 +} + +define i1 @lt32_u16_and_7(i32 %0) { +; CHECK-LABEL: lt32_u16_and_7: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0x7 +; CHECK-NEXT: cmp w8, #16 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = and i32 %0, 7 + %3 = icmp ult i32 %2, 16 + ret i1 %3 +} + +define i1 @lt32_u16_and_21(i32 %0) { +; CHECK-LABEL: lt32_u16_and_21: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #21 // =0x15 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: cmp w8, #16 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = and i32 %0, 21 + %3 = icmp ult i32 %2, 16 + ret i1 %3 +} + +define i1 @lt64_u16_and_9(i64 %0) { +; CHECK-LABEL: lt64_u16_and_9: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #9 // =0x9 +; CHECK-NEXT: and x8, x0, x8 +; CHECK-NEXT: cmp x8, #16 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = and i64 %0, 9 + %3 = icmp ult i64 %2, 16 + ret i1 %3 +} + +define i1 @lt64_u16_and_23(i64 %0) { +; CHECK-LABEL: lt64_u16_and_23: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #23 // =0x17 +; CHECK-NEXT: and x8, x0, x8 +; CHECK-NEXT: cmp x8, #16 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = and i64 %0, 23 + %3 = icmp ult i64 %2, 16 + ret i1 %3 +} + +; negative test +define i1 @lt3_u8(i8 %0) { +; CHECK-LABEL: lt3_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: cmp w8, #3 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = icmp ult i8 %0, 3 + ret i1 %2 +} + +; negative test +define i1 @lt3_u32(i32 %0) { +; CHECK-LABEL: lt3_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w0, #3 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = icmp ult i32 %0, 3 + ret i1 %2 +} + +; negative test +define i1 @lt3_u64(i64 %0) { +; CHECK-LABEL: lt3_u64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #3 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %2 = icmp ult i64 %0, 3 + ret i1 %2 +} + +; negative test +define i32 @lt32_u16_multiple_use(i32 %0) { +; CHECK-LABEL: lt32_u16_multiple_use: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #21 // =0x15 +; CHECK-NEXT: mov w9, #10 // =0xa +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: cmp w8, #16 +; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: cset w10, lo +; CHECK-NEXT: mul w0, w8, w10 +; CHECK-NEXT: ret + %2 = and i32 %0, 21 + %3 = icmp ult i32 %2, 16 + %4 = add i32 %2, 10 + %5 = zext i1 %3 to i32 + %6 = mul i32 %4, %5 + ret i32 %6 +} From 1ccc6c82623ce44ce1860bbd5c9fda1126b8c2ab Mon Sep 17 00:00:00 2001 From: Jorge Botto Date: Wed, 2 Oct 2024 22:33:36 +0100 Subject: [PATCH 2/3] Adding missed optimisation --- .../Target/AArch64/AArch64ISelLowering.cpp | 26 +++++++++ llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll | 53 ++++++++----------- .../AArch64/signed-truncation-check.ll | 25 ++++----- 3 files changed, 57 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 3de09eca94233..66900507b129f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4301,6 +4301,29 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1)); } +// Converts SETCC (AND X Y) Z ULT -> SETCC (AND X (Y & ~(Z - 1)) 0 EQ when Y is +// a power of 2. This is then lowered to ANDS X (Y & ~(Z - 1)) instead of SUBS +// (AND X Y) Z which produces a better opt with EmitComparison +static void simplifySetCCIntoEq(ISD::CondCode &CC, SDValue &LHS, SDValue &RHS, + SelectionDAG &DAG, const SDLoc dl) { + if (CC == ISD::SETULT && LHS.getOpcode() == ISD::AND && LHS->hasOneUse()) { + ConstantSDNode *LHSConstOp = dyn_cast(LHS.getOperand(1)); + ConstantSDNode *RHSConst = dyn_cast(RHS); + if (LHSConstOp && RHSConst) { + uint64_t lhsConstValue = LHSConstOp->getZExtValue(); + uint64_t rhsConstant = RHSConst->getZExtValue(); + if (isPowerOf2_64(rhsConstant)) { + uint64_t newMaskValue = lhsConstValue & ~(rhsConstant - 1); + LHS = + DAG.getNode(ISD::AND, dl, LHS.getValueType(), LHS.getOperand(0), + DAG.getConstant(newMaskValue, dl, LHS.getValueType())); + RHS = DAG.getConstant(0, dl, RHS.getValueType()); + CC = ISD::SETEQ; + } + } + } +} + SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); @@ -10589,6 +10612,9 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { } if (LHS.getValueType().isInteger()) { + + simplifySetCCIntoEq(CC, LHS, RHS, DAG, dl); + SDValue CCVal; SDValue Cmp = getAArch64Cmp( LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl); diff --git a/llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll b/llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll index a5d7d1cc0ceb7..33c5ba7987974 100644 --- a/llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll +++ b/llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll @@ -5,9 +5,8 @@ define i1 @lt8_u8(i8 %0) { ; CHECK-LABEL: lt8_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: cmp w8, #8 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: tst w0, #0xf8 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %2 = icmp ult i8 %0, 8 ret i1 %2 @@ -16,9 +15,8 @@ define i1 @lt8_u8(i8 %0) { define i1 @lt32_u8(i8 %0) { ; CHECK-LABEL: lt32_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: cmp w8, #32 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: tst w0, #0xe0 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %2 = icmp ult i8 %0, 32 ret i1 %2 @@ -27,9 +25,8 @@ define i1 @lt32_u8(i8 %0) { define i1 @lt64_u8(i8 %0) { ; CHECK-LABEL: lt64_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: cmp w8, #64 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: tst w0, #0xc0 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %2 = icmp ult i8 %0, 64 ret i1 %2 @@ -98,10 +95,9 @@ define i1 @lt64_u64(i64 %0) { define i1 @lt8_u16_and_5(i8 %0) { ; CHECK-LABEL: lt8_u16_and_5: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #5 // =0x5 -; CHECK-NEXT: and w8, w0, w8 -; CHECK-NEXT: cmp w8, #16 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %2 = and i8 %0, 5 %3 = icmp ult i8 %2, 16 @@ -111,10 +107,8 @@ define i1 @lt8_u16_and_5(i8 %0) { define i1 @lt8_u16_and_19(i8 %0) { ; CHECK-LABEL: lt8_u16_and_19: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #19 // =0x13 -; CHECK-NEXT: and w8, w0, w8 -; CHECK-NEXT: cmp w8, #16 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: tst w0, #0x10 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %2 = and i8 %0, 19 %3 = icmp ult i8 %2, 16 @@ -124,9 +118,9 @@ define i1 @lt8_u16_and_19(i8 %0) { define i1 @lt32_u16_and_7(i32 %0) { ; CHECK-LABEL: lt32_u16_and_7: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x7 -; CHECK-NEXT: cmp w8, #16 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %2 = and i32 %0, 7 %3 = icmp ult i32 %2, 16 @@ -136,10 +130,8 @@ define i1 @lt32_u16_and_7(i32 %0) { define i1 @lt32_u16_and_21(i32 %0) { ; CHECK-LABEL: lt32_u16_and_21: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #21 // =0x15 -; CHECK-NEXT: and w8, w0, w8 -; CHECK-NEXT: cmp w8, #16 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: tst w0, #0x10 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %2 = and i32 %0, 21 %3 = icmp ult i32 %2, 16 @@ -149,10 +141,9 @@ define i1 @lt32_u16_and_21(i32 %0) { define i1 @lt64_u16_and_9(i64 %0) { ; CHECK-LABEL: lt64_u16_and_9: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #9 // =0x9 -; CHECK-NEXT: and x8, x0, x8 -; CHECK-NEXT: cmp x8, #16 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %2 = and i64 %0, 9 %3 = icmp ult i64 %2, 16 @@ -162,10 +153,8 @@ define i1 @lt64_u16_and_9(i64 %0) { define i1 @lt64_u16_and_23(i64 %0) { ; CHECK-LABEL: lt64_u16_and_23: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23 // =0x17 -; CHECK-NEXT: and x8, x0, x8 -; CHECK-NEXT: cmp x8, #16 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: tst x0, #0x10 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %2 = and i64 %0, 23 %3 = icmp ult i64 %2, 16 diff --git a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll index bb4df6d8935b1..7c80f9320faec 100644 --- a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll +++ b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll @@ -287,9 +287,8 @@ define i1 @add_ultcmp_bad_i16_i8_add(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: add_ultcmp_bad_i16_i8_add: ; CHECK: // %bb.0: ; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: and w8, w8, #0xffff -; CHECK-NEXT: cmp w8, #256 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: tst w8, #0xff00 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i16 %x, %y %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8 @@ -328,9 +327,8 @@ define i1 @add_ultcmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind { ; CHECK-LABEL: add_ultcmp_bad_i16_i8_c0notpoweroftwo: ; CHECK: // %bb.0: ; CHECK-NEXT: add w8, w0, #192 -; CHECK-NEXT: and w8, w8, #0xffff -; CHECK-NEXT: cmp w8, #256 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: tst w8, #0xff00 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i16 %x, 192 ; (1U << (8-1)) + (1U << (8-1-1)) %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8 @@ -356,9 +354,8 @@ define i1 @add_ultcmp_bad_i16_i8_magic(i16 %x) nounwind { ; CHECK-LABEL: add_ultcmp_bad_i16_i8_magic: ; CHECK: // %bb.0: ; CHECK-NEXT: add w8, w0, #64 -; CHECK-NEXT: and w8, w8, #0xffff -; CHECK-NEXT: cmp w8, #256 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: tst w8, #0xff00 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i16 %x, 64 ; 1U << (8-1-1) %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8 @@ -370,9 +367,8 @@ define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind { ; CHECK-LABEL: add_ultcmp_bad_i16_i4: ; CHECK: // %bb.0: ; CHECK-NEXT: add w8, w0, #8 -; CHECK-NEXT: and w8, w8, #0xffff -; CHECK-NEXT: cmp w8, #16 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: tst w8, #0xfff0 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i16 %x, 8 ; 1U << (4-1) %tmp1 = icmp ult i16 %tmp0, 16 ; 1U << 4 @@ -384,9 +380,8 @@ define i1 @add_ultcmp_bad_i24_i8(i24 %x) nounwind { ; CHECK-LABEL: add_ultcmp_bad_i24_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: add w8, w0, #128 -; CHECK-NEXT: and w8, w8, #0xffffff -; CHECK-NEXT: cmp w8, #256 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: tst w8, #0xffff00 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i24 %x, 128 ; 1U << (8-1) %tmp1 = icmp ult i24 %tmp0, 256 ; 1U << 8 From 2a167dbba18ef2e262cbe7835949a024f55f9f9c Mon Sep 17 00:00:00 2001 From: Jorge Botto Date: Thu, 3 Oct 2024 15:02:38 +0100 Subject: [PATCH 3/3] Fixing lowercase variable names --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 66900507b129f..5f9a973e2524b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4310,13 +4310,13 @@ static void simplifySetCCIntoEq(ISD::CondCode &CC, SDValue &LHS, SDValue &RHS, ConstantSDNode *LHSConstOp = dyn_cast(LHS.getOperand(1)); ConstantSDNode *RHSConst = dyn_cast(RHS); if (LHSConstOp && RHSConst) { - uint64_t lhsConstValue = LHSConstOp->getZExtValue(); - uint64_t rhsConstant = RHSConst->getZExtValue(); - if (isPowerOf2_64(rhsConstant)) { - uint64_t newMaskValue = lhsConstValue & ~(rhsConstant - 1); + uint64_t LHSConstValue = LHSConstOp->getZExtValue(); + uint64_t RHSConstant = RHSConst->getZExtValue(); + if (isPowerOf2_64(RHSConstant)) { + uint64_t NewMaskValue = LHSConstValue & ~(RHSConstant - 1); LHS = DAG.getNode(ISD::AND, dl, LHS.getValueType(), LHS.getOperand(0), - DAG.getConstant(newMaskValue, dl, LHS.getValueType())); + DAG.getConstant(NewMaskValue, dl, LHS.getValueType())); RHS = DAG.getConstant(0, dl, RHS.getValueType()); CC = ISD::SETEQ; }