Skip to content

Commit 0a913b5

Browse files
authored
[X86] Fold some (truncate (srl (add X, C1), C2)) patterns to (add (truncate (srl X, C2)), C1') (#126448)
Addresses the poor codegen identified in #123239 and a few extra cases. This transformation is correct for `eq` (https://alive2.llvm.org/ce/z/qZhwtT), `ne` (https://alive2.llvm.org/ce/z/6gsmNz), `ult` (https://alive2.llvm.org/ce/z/xip_td) and `ugt` (https://alive2.llvm.org/ce/z/39XQkX). Fixes #123239
1 parent db98767 commit 0a913b5

File tree

2 files changed

+163
-0
lines changed

2 files changed

+163
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53697,6 +53697,41 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
5369753697
DAG.getUNDEF(SrcVT)));
5369853698
}
5369953699

53700+
// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
53701+
// (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able
53702+
// to avoid generating code with MOVABS and large constants in certain cases.
53703+
static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG,
53704+
const SDLoc &DL) {
53705+
using namespace llvm::SDPatternMatch;
53706+
53707+
SDValue AddLhs;
53708+
APInt AddConst, SrlConst;
53709+
if (VT != MVT::i32 ||
53710+
!sd_match(N, m_AllOf(m_SpecificVT(MVT::i64),
53711+
m_Srl(m_OneUse(m_Add(m_Value(AddLhs),
53712+
m_ConstInt(AddConst))),
53713+
m_ConstInt(SrlConst)))))
53714+
return SDValue();
53715+
53716+
if (SrlConst.ule(31) || AddConst.lshr(SrlConst).shl(SrlConst) != AddConst)
53717+
return SDValue();
53718+
53719+
SDValue AddLHSSrl =
53720+
DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1));
53721+
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
53722+
53723+
APInt NewAddConstVal =
53724+
(~((~AddConst).lshr(SrlConst))).trunc(VT.getSizeInBits());
53725+
SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
53726+
SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
53727+
53728+
APInt CleanupSizeConstVal = (SrlConst - 32).zextOrTrunc(VT.getSizeInBits());
53729+
EVT CleanUpVT =
53730+
EVT::getIntegerVT(*DAG.getContext(), CleanupSizeConstVal.getZExtValue());
53731+
SDValue CleanUp = DAG.getAnyExtOrTrunc(NewAddNode, DL, CleanUpVT);
53732+
return DAG.getAnyExtOrTrunc(CleanUp, DL, VT);
53733+
}
53734+
5370053735
/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
5370153736
/// the codegen.
5370253737
/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
@@ -53742,6 +53777,9 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
5374253777
if (!Src.hasOneUse())
5374353778
return SDValue();
5374453779

53780+
if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL))
53781+
return R;
53782+
5374553783
// Only support vector truncation for now.
5374653784
// TODO: i64 scalar math would benefit as well.
5374753785
if (!VT.isVector())
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
3+
4+
; Test for https://github.com/llvm/llvm-project/issues/123239
5+
6+
define i1 @test_ult_trunc_add(i64 %x) {
7+
; X64-LABEL: test_ult_trunc_add:
8+
; X64: # %bb.0:
9+
; X64-NEXT: shrq $48, %rdi
10+
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
11+
; X64-NEXT: cmpl $3, %edi
12+
; X64-NEXT: setb %al
13+
; X64-NEXT: retq
14+
%add = add i64 %x, 3940649673949184
15+
%shr = lshr i64 %add, 48
16+
%conv = trunc i64 %shr to i32
17+
%res = icmp ult i32 %conv, 3
18+
ret i1 %res
19+
}
20+
21+
define i1 @test_ult_add(i64 %x) {
22+
; X64-LABEL: test_ult_add:
23+
; X64: # %bb.0:
24+
; X64-NEXT: shrq $48, %rdi
25+
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
26+
; X64-NEXT: cmpl $3, %edi
27+
; X64-NEXT: setb %al
28+
; X64-NEXT: retq
29+
%add = add i64 3940649673949184, %x
30+
%cmp = icmp ult i64 %add, 844424930131968
31+
ret i1 %cmp
32+
}
33+
34+
define i1 @test_ugt_trunc_add(i64 %x) {
35+
; X64-LABEL: test_ugt_trunc_add:
36+
; X64: # %bb.0:
37+
; X64-NEXT: shrq $48, %rdi
38+
; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
39+
; X64-NEXT: cmpl $4, %edi
40+
; X64-NEXT: setae %al
41+
; X64-NEXT: retq
42+
%add = add i64 %x, 3940649673949184
43+
%shr = lshr i64 %add, 48
44+
%conv = trunc i64 %shr to i32
45+
%res = icmp ugt i32 %conv, 3
46+
ret i1 %res
47+
}
48+
49+
define i1 @test_ugt_add(i64 %x) {
50+
; X64-LABEL: test_ugt_add:
51+
; X64: # %bb.0:
52+
; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000
53+
; X64-NEXT: addq %rdi, %rax
54+
; X64-NEXT: movabsq $844424930131968, %rcx # imm = 0x3000000000000
55+
; X64-NEXT: cmpq %rcx, %rax
56+
; X64-NEXT: seta %al
57+
; X64-NEXT: retq
58+
%add = add i64 3940649673949184, %x
59+
%cmp = icmp ugt i64 %add, 844424930131968
60+
ret i1 %cmp
61+
}
62+
63+
define i1 @test_eq_trunc_add(i64 %x) {
64+
; X64-LABEL: test_eq_trunc_add:
65+
; X64: # %bb.0:
66+
; X64-NEXT: shrq $48, %rdi
67+
; X64-NEXT: cmpl $65525, %edi # imm = 0xFFF5
68+
; X64-NEXT: sete %al
69+
; X64-NEXT: retq
70+
%add = add i64 %x, 3940649673949184
71+
%shr = lshr i64 %add, 48
72+
%conv = trunc i64 %shr to i32
73+
%res = icmp eq i32 %conv, 3
74+
ret i1 %res
75+
}
76+
77+
define i1 @test_eq_add(i64 %x) {
78+
; X64-LABEL: test_eq_add:
79+
; X64: # %bb.0:
80+
; X64-NEXT: movabsq $-3096224743817216, %rax # imm = 0xFFF5000000000000
81+
; X64-NEXT: cmpq %rax, %rdi
82+
; X64-NEXT: sete %al
83+
; X64-NEXT: retq
84+
%add = add i64 3940649673949184, %x
85+
%cmp = icmp eq i64 %add, 844424930131968
86+
ret i1 %cmp
87+
}
88+
89+
define i1 @test_ne_trunc_add(i64 %x) {
90+
; X64-LABEL: test_ne_trunc_add:
91+
; X64: # %bb.0:
92+
; X64-NEXT: shrq $48, %rdi
93+
; X64-NEXT: cmpl $65525, %edi # imm = 0xFFF5
94+
; X64-NEXT: setne %al
95+
; X64-NEXT: retq
96+
%add = add i64 %x, 3940649673949184
97+
%shr = lshr i64 %add, 48
98+
%conv = trunc i64 %shr to i32
99+
%res = icmp ne i32 %conv, 3
100+
ret i1 %res
101+
}
102+
103+
define i1 @test_ne_add(i64 %x) {
104+
; X64-LABEL: test_ne_add:
105+
; X64: # %bb.0:
106+
; X64-NEXT: movabsq $-3096224743817216, %rax # imm = 0xFFF5000000000000
107+
; X64-NEXT: cmpq %rax, %rdi
108+
; X64-NEXT: setne %al
109+
; X64-NEXT: retq
110+
%add = add i64 3940649673949184, %x
111+
%cmp = icmp ne i64 %add, 844424930131968
112+
ret i1 %cmp
113+
}
114+
115+
define i32 @test_trunc_add(i64 %x) {
116+
; X64-LABEL: test_trunc_add:
117+
; X64: # %bb.0:
118+
; X64-NEXT: shrq $48, %rdi
119+
; X64-NEXT: leal -65522(%rdi), %eax
120+
; X64-NEXT: retq
121+
%add = add i64 %x, 3940649673949184
122+
%shr = lshr i64 %add, 48
123+
%conv = trunc i64 %shr to i32
124+
ret i32 %conv
125+
}

0 commit comments

Comments
 (0)