Skip to content

Commit 65c9153

Browse files
committed
[X86] combineBitcastvxi1 - don't prematurely create PACKSS nodes.
Similar to Issue #63710 - by truncating the v8i16 result with a PACKSS node before type legalization, we fail to make use of various folds that rely on TRUNCATE nodes. This required tweaks to LowerTruncateVecPackWithSignBits to recognise when the truncation source has been widened and to more closely match combineVectorSignBitsTruncation wrt truncating with PACKSS/PACKUS on AVX512 targets. One of the last stages before we can finally get rid of combineVectorSignBitsTruncation.
1 parent c6c5aad commit 65c9153

File tree

2 files changed

+26
-7
lines changed

2 files changed

+26
-7
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22945,6 +22945,26 @@ static SDValue LowerTruncateVecPackWithSignBits(MVT DstVT, SDValue In,
2294522945
(DstSVT == MVT::i8 || DstSVT == MVT::i16 || DstSVT == MVT::i32)))
2294622946
return SDValue();
2294722947

22948+
// Don't lower with PACK nodes on AVX512 targets if we'd need more than one.
22949+
if (Subtarget.hasAVX512() &&
22950+
SrcSVT.getSizeInBits() > (DstSVT.getSizeInBits() * 2))
22951+
return SDValue();
22952+
22953+
// If the upper half of the source is undef, then attempt to split and
22954+
// only truncate the lower half.
22955+
if (DstVT.getSizeInBits() >= 128) {
22956+
SmallVector<SDValue> LowerOps;
22957+
if (isUpperSubvectorUndef(In, LowerOps, DAG)) {
22958+
MVT DstHalfVT = DstVT.getHalfNumVectorElementsVT();
22959+
MVT SrcHalfVT = SrcVT.getHalfNumVectorElementsVT();
22960+
SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcHalfVT, LowerOps);
22961+
if (SDValue Res = LowerTruncateVecPackWithSignBits(DstHalfVT, Lo, DL,
22962+
Subtarget, DAG))
22963+
return widenSubVector(Res, false, Subtarget, DAG, DL,
22964+
DstVT.getSizeInBits());
22965+
}
22966+
}
22967+
2294822968
unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2294922969
unsigned NumPackedSignBits = std::min<unsigned>(DstSVT.getSizeInBits(), 16);
2295022970
unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8;
@@ -45059,9 +45079,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
4505945079
if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8 || SExtVT == MVT::v64i8) {
4506045080
V = getPMOVMSKB(DL, V, DAG, Subtarget);
4506145081
} else {
45062-
if (SExtVT == MVT::v8i16)
45063-
V = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, V,
45064-
DAG.getUNDEF(MVT::v8i16));
45082+
if (SExtVT == MVT::v8i16) {
45083+
V = widenSubVector(V, false, Subtarget, DAG, DL, 256);
45084+
V = DAG.getNode(ISD::TRUNCATE, DL, MVT::v16i8, V);
45085+
}
4506545086
V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
4506645087
}
4506745088

llvm/test/CodeGen/X86/vector-reduce-and-bool.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1193,19 +1193,17 @@ define i8 @icmp0_v8i1(<8 x i8>) nounwind {
11931193
; SSE2: # %bb.0:
11941194
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
11951195
; SSE2-NEXT: psllw $15, %xmm0
1196-
; SSE2-NEXT: psraw $15, %xmm0
11971196
; SSE2-NEXT: pmovmskb %xmm0, %eax
1198-
; SSE2-NEXT: testl %eax, %eax
1197+
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
11991198
; SSE2-NEXT: sete %al
12001199
; SSE2-NEXT: ret{{[l|q]}}
12011200
;
12021201
; SSE41-LABEL: icmp0_v8i1:
12031202
; SSE41: # %bb.0:
12041203
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
12051204
; SSE41-NEXT: psllw $15, %xmm0
1206-
; SSE41-NEXT: psraw $15, %xmm0
12071205
; SSE41-NEXT: pmovmskb %xmm0, %eax
1208-
; SSE41-NEXT: testl %eax, %eax
1206+
; SSE41-NEXT: testl $43690, %eax # imm = 0xAAAA
12091207
; SSE41-NEXT: sete %al
12101208
; SSE41-NEXT: retq
12111209
;

0 commit comments

Comments
 (0)