diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9a916a663a64c..f6d358009604f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53600,6 +53600,41 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG, DAG.getUNDEF(SrcVT))); } +// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to +// (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able +// to avoid generating code with MOVABS and large constants in certain cases. +static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG, + const SDLoc &DL) { + using namespace llvm::SDPatternMatch; + + SDValue AddLhs; + APInt AddConst, SrlConst; + if (VT != MVT::i32 || + !sd_match(N, m_AllOf(m_SpecificVT(MVT::i64), + m_Srl(m_OneUse(m_Add(m_Value(AddLhs), + m_ConstInt(AddConst))), + m_ConstInt(SrlConst))))) + return SDValue(); + + if (SrlConst.ule(31) || AddConst.lshr(SrlConst).shl(SrlConst) != AddConst) + return SDValue(); + + SDValue AddLHSSrl = + DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl); + + APInt NewAddConstVal = + (~((~AddConst).lshr(SrlConst))).trunc(VT.getSizeInBits()); + SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT); + SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst); + + APInt CleanupSizeConstVal = (SrlConst - 32).zextOrTrunc(VT.getSizeInBits()); + EVT CleanUpVT = + EVT::getIntegerVT(*DAG.getContext(), CleanupSizeConstVal.getZExtValue()); + SDValue CleanUp = DAG.getAnyExtOrTrunc(NewAddNode, DL, CleanUpVT); + return DAG.getAnyExtOrTrunc(CleanUp, DL, VT); +} + /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify /// the codegen. /// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) ) @@ -53645,6 +53680,9 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, if (!Src.hasOneUse()) return SDValue(); + if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL)) + return R; + // Only support vector truncation for now. // TODO: i64 scalar math would benefit as well. if (!VT.isVector()) diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll new file mode 100644 index 0000000000000..1ce1e7e1c2b9f --- /dev/null +++ b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64 + +; Test for https://github.com/llvm/llvm-project/issues/123239 + +define i1 @test_ult_trunc_add(i64 %x) { +; X64-LABEL: test_ult_trunc_add: +; X64: # %bb.0: +; X64-NEXT: shrq $48, %rdi +; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E +; X64-NEXT: cmpl $3, %edi +; X64-NEXT: setb %al +; X64-NEXT: retq + %add = add i64 %x, 3940649673949184 + %shr = lshr i64 %add, 48 + %conv = trunc i64 %shr to i32 + %res = icmp ult i32 %conv, 3 + ret i1 %res +} + +define i1 @test_ult_add(i64 %x) { +; X64-LABEL: test_ult_add: +; X64: # %bb.0: +; X64-NEXT: shrq $48, %rdi +; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E +; X64-NEXT: cmpl $3, %edi +; X64-NEXT: setb %al +; X64-NEXT: retq + %add = add i64 3940649673949184, %x + %cmp = icmp ult i64 %add, 844424930131968 + ret i1 %cmp +} + +define i1 @test_ugt_trunc_add(i64 %x) { +; X64-LABEL: test_ugt_trunc_add: +; X64: # %bb.0: +; X64-NEXT: shrq $48, %rdi +; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E +; X64-NEXT: cmpl $4, %edi +; X64-NEXT: setae %al +; X64-NEXT: retq + %add = add i64 %x, 3940649673949184 + %shr = lshr i64 %add, 48 + %conv = trunc i64 %shr to i32 + %res = icmp ugt i32 %conv, 3 + ret i1 %res +} + +define i1 @test_ugt_add(i64 %x) { +; X64-LABEL: test_ugt_add: +; X64: # %bb.0: +; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000 +; X64-NEXT: addq %rdi, %rax +; X64-NEXT: movabsq $844424930131968, %rcx # imm = 0x3000000000000 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: seta %al +; X64-NEXT: retq + %add = add i64 3940649673949184, %x + %cmp = icmp ugt i64 %add, 844424930131968 + ret i1 %cmp +} + +define i1 @test_eq_trunc_add(i64 %x) { +; X64-LABEL: test_eq_trunc_add: +; X64: # %bb.0: +; X64-NEXT: shrq $48, %rdi +; X64-NEXT: cmpl $65525, %edi # imm = 0xFFF5 +; X64-NEXT: sete %al +; X64-NEXT: retq + %add = add i64 %x, 3940649673949184 + %shr = lshr i64 %add, 48 + %conv = trunc i64 %shr to i32 + %res = icmp eq i32 %conv, 3 + ret i1 %res +} + +define i1 @test_eq_add(i64 %x) { +; X64-LABEL: test_eq_add: +; X64: # %bb.0: +; X64-NEXT: movabsq $-3096224743817216, %rax # imm = 0xFFF5000000000000 +; X64-NEXT: cmpq %rax, %rdi +; X64-NEXT: sete %al +; X64-NEXT: retq + %add = add i64 3940649673949184, %x + %cmp = icmp eq i64 %add, 844424930131968 + ret i1 %cmp +} + +define i1 @test_ne_trunc_add(i64 %x) { +; X64-LABEL: test_ne_trunc_add: +; X64: # %bb.0: +; X64-NEXT: shrq $48, %rdi +; X64-NEXT: cmpl $65525, %edi # imm = 0xFFF5 +; X64-NEXT: setne %al +; X64-NEXT: retq + %add = add i64 %x, 3940649673949184 + %shr = lshr i64 %add, 48 + %conv = trunc i64 %shr to i32 + %res = icmp ne i32 %conv, 3 + ret i1 %res +} + +define i1 @test_ne_add(i64 %x) { +; X64-LABEL: test_ne_add: +; X64: # %bb.0: +; X64-NEXT: movabsq $-3096224743817216, %rax # imm = 0xFFF5000000000000 +; X64-NEXT: cmpq %rax, %rdi +; X64-NEXT: setne %al +; X64-NEXT: retq + %add = add i64 3940649673949184, %x + %cmp = icmp ne i64 %add, 844424930131968 + ret i1 %cmp +} + +define i32 @test_trunc_add(i64 %x) { +; X64-LABEL: test_trunc_add: +; X64: # %bb.0: +; X64-NEXT: shrq $48, %rdi +; X64-NEXT: leal -65522(%rdi), %eax +; X64-NEXT: retq + %add = add i64 %x, 3940649673949184 + %shr = lshr i64 %add, 48 + %conv = trunc i64 %shr to i32 + ret i32 %conv +}