Skip to content

[X86] Fold some (truncate (srl (add X, C1), C2)) patterns to (add (truncate (srl X, C2)), C1') #126448

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 21, 2025
38 changes: 38 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53600,6 +53600,41 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
DAG.getUNDEF(SrcVT)));
}

// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
// (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able
// to avoid generating code with MOVABS and large constants in certain cases.
static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG,
const SDLoc &DL) {
using namespace llvm::SDPatternMatch;

SDValue AddLhs;
APInt AddConst, SrlConst;
if (VT != MVT::i32 ||
!sd_match(N, m_AllOf(m_SpecificVT(MVT::i64),
m_Srl(m_OneUse(m_Add(m_Value(AddLhs),
m_ConstInt(AddConst))),
m_ConstInt(SrlConst)))))
return SDValue();

if (SrlConst.ule(31) || AddConst.lshr(SrlConst).shl(SrlConst) != AddConst)
return SDValue();

SDValue AddLHSSrl =
DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);

APInt NewAddConstVal =
(~((~AddConst).lshr(SrlConst))).trunc(VT.getSizeInBits());
SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);

APInt CleanupSizeConstVal = (SrlConst - 32).zextOrTrunc(VT.getSizeInBits());
EVT CleanUpVT =
EVT::getIntegerVT(*DAG.getContext(), CleanupSizeConstVal.getZExtValue());
SDValue CleanUp = DAG.getAnyExtOrTrunc(NewAddNode, DL, CleanUpVT);
return DAG.getAnyExtOrTrunc(CleanUp, DL, VT);
}

/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
/// the codegen.
/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
Expand Down Expand Up @@ -53645,6 +53680,9 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
if (!Src.hasOneUse())
return SDValue();

if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL))
return R;

// Only support vector truncation for now.
// TODO: i64 scalar math would benefit as well.
if (!VT.isVector())
Expand Down
125 changes: 125 additions & 0 deletions llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64

; Test for https://github.com/llvm/llvm-project/issues/123239

define i1 @test_ult_trunc_add(i64 %x) {
; X64-LABEL: test_ult_trunc_add:
; X64: # %bb.0:
; X64-NEXT: shrq $48, %rdi
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
; X64-NEXT: cmpl $3, %edi
; X64-NEXT: setb %al
; X64-NEXT: retq
%add = add i64 %x, 3940649673949184
%shr = lshr i64 %add, 48
%conv = trunc i64 %shr to i32
%res = icmp ult i32 %conv, 3
ret i1 %res
}

define i1 @test_ult_add(i64 %x) {
; X64-LABEL: test_ult_add:
; X64: # %bb.0:
; X64-NEXT: shrq $48, %rdi
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
; X64-NEXT: cmpl $3, %edi
; X64-NEXT: setb %al
; X64-NEXT: retq
%add = add i64 3940649673949184, %x
%cmp = icmp ult i64 %add, 844424930131968
ret i1 %cmp
}

define i1 @test_ugt_trunc_add(i64 %x) {
; X64-LABEL: test_ugt_trunc_add:
; X64: # %bb.0:
; X64-NEXT: shrq $48, %rdi
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
; X64-NEXT: cmpl $4, %edi
; X64-NEXT: setae %al
; X64-NEXT: retq
%add = add i64 %x, 3940649673949184
%shr = lshr i64 %add, 48
%conv = trunc i64 %shr to i32
%res = icmp ugt i32 %conv, 3
ret i1 %res
}

define i1 @test_ugt_add(i64 %x) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these negative tests? Add comments if they are.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, this transformation is also applicable to those tests. However, the selection DAG differs in those cases, so I figured expanding the transformation to include them should be a separate patch. If preferred, I can expand it now, though.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's ok to do in a separate patch.

; X64-LABEL: test_ugt_add:
; X64: # %bb.0:
; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000
; X64-NEXT: addq %rdi, %rax
; X64-NEXT: movabsq $844424930131968, %rcx # imm = 0x3000000000000
; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: seta %al
; X64-NEXT: retq
%add = add i64 3940649673949184, %x
%cmp = icmp ugt i64 %add, 844424930131968
ret i1 %cmp
}

define i1 @test_eq_trunc_add(i64 %x) {
; X64-LABEL: test_eq_trunc_add:
; X64: # %bb.0:
; X64-NEXT: shrq $48, %rdi
; X64-NEXT: cmpl $65525, %edi # imm = 0xFFF5
; X64-NEXT: sete %al
; X64-NEXT: retq
%add = add i64 %x, 3940649673949184
%shr = lshr i64 %add, 48
%conv = trunc i64 %shr to i32
%res = icmp eq i32 %conv, 3
ret i1 %res
}

define i1 @test_eq_add(i64 %x) {
; X64-LABEL: test_eq_add:
; X64: # %bb.0:
; X64-NEXT: movabsq $-3096224743817216, %rax # imm = 0xFFF5000000000000
; X64-NEXT: cmpq %rax, %rdi
; X64-NEXT: sete %al
; X64-NEXT: retq
%add = add i64 3940649673949184, %x
%cmp = icmp eq i64 %add, 844424930131968
ret i1 %cmp
}

define i1 @test_ne_trunc_add(i64 %x) {
; X64-LABEL: test_ne_trunc_add:
; X64: # %bb.0:
; X64-NEXT: shrq $48, %rdi
; X64-NEXT: cmpl $65525, %edi # imm = 0xFFF5
; X64-NEXT: setne %al
; X64-NEXT: retq
%add = add i64 %x, 3940649673949184
%shr = lshr i64 %add, 48
%conv = trunc i64 %shr to i32
%res = icmp ne i32 %conv, 3
ret i1 %res
}

define i1 @test_ne_add(i64 %x) {
; X64-LABEL: test_ne_add:
; X64: # %bb.0:
; X64-NEXT: movabsq $-3096224743817216, %rax # imm = 0xFFF5000000000000
; X64-NEXT: cmpq %rax, %rdi
; X64-NEXT: setne %al
; X64-NEXT: retq
%add = add i64 3940649673949184, %x
%cmp = icmp ne i64 %add, 844424930131968
ret i1 %cmp
}

define i32 @test_trunc_add(i64 %x) {
; X64-LABEL: test_trunc_add:
; X64: # %bb.0:
; X64-NEXT: shrq $48, %rdi
; X64-NEXT: leal -65522(%rdi), %eax
; X64-NEXT: retq
%add = add i64 %x, 3940649673949184
%shr = lshr i64 %add, 48
%conv = trunc i64 %shr to i32
ret i32 %conv
}
Loading