Skip to content

Commit 1a9fbf6

Browse files
committed
[X86] combineLoad - reuse an existing VBROADCAST_LOAD constant for a smaller vector load of the same constant
Extends the existing code that performed something similar for SUBV_BROADCAST_LOAD, but this is just for cases where AVX2 targets loads full width 128-bit constant vectors but broadcasts the equivalent 256-bit constant vector Fixes AVX2 case for Issue #70947
1 parent 9e618e5 commit 1a9fbf6

File tree

2 files changed

+39
-19
lines changed

2 files changed

+39
-19
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -49785,25 +49785,47 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
4978549785
}
4978649786
}
4978749787

49788-
// If we also broadcast this as a subvector to a wider type, then just extract
49789-
// the lowest subvector.
49788+
// If we also broadcast this to a wider type, then just extract the lowest
49789+
// subvector.
4979049790
if (Ext == ISD::NON_EXTLOAD && Subtarget.hasAVX() && Ld->isSimple() &&
4979149791
(RegVT.is128BitVector() || RegVT.is256BitVector())) {
4979249792
SDValue Ptr = Ld->getBasePtr();
4979349793
SDValue Chain = Ld->getChain();
49794-
for (SDNode *User : Ptr->uses()) {
49795-
if (User != N && User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
49796-
cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr &&
49794+
for (SDNode *User : Chain->uses()) {
49795+
if (User != N &&
49796+
(User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD ||
49797+
User->getOpcode() == X86ISD::VBROADCAST_LOAD) &&
4979749798
cast<MemIntrinsicSDNode>(User)->getChain() == Chain &&
49798-
cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() ==
49799-
MemVT.getSizeInBits() &&
4980049799
!User->hasAnyUseOfValue(1) &&
4980149800
User->getValueSizeInBits(0).getFixedValue() >
4980249801
RegVT.getFixedSizeInBits()) {
49803-
SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N),
49804-
RegVT.getSizeInBits());
49805-
Extract = DAG.getBitcast(RegVT, Extract);
49806-
return DCI.CombineTo(N, Extract, SDValue(User, 1));
49802+
if (User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&
49803+
cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr &&
49804+
cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() ==
49805+
MemVT.getSizeInBits()) {
49806+
SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N),
49807+
RegVT.getSizeInBits());
49808+
Extract = DAG.getBitcast(RegVT, Extract);
49809+
return DCI.CombineTo(N, Extract, SDValue(User, 1));
49810+
}
49811+
if (User->getOpcode() == X86ISD::VBROADCAST_LOAD &&
49812+
getTargetConstantFromBasePtr(Ptr)) {
49813+
// See if we are loading a constant that has also been broadcast.
49814+
APInt Undefs, UserUndefs;
49815+
SmallVector<APInt> Bits, UserBits;
49816+
if (getTargetConstantBitsFromNode(SDValue(N, 0), 8, Undefs, Bits) &&
49817+
getTargetConstantBitsFromNode(SDValue(User, 0), 8, UserUndefs,
49818+
UserBits)) {
49819+
UserUndefs = UserUndefs.trunc(Undefs.getBitWidth());
49820+
UserBits.truncate(Bits.size());
49821+
if (Bits == UserBits && UserUndefs.isSubsetOf(Undefs)) {
49822+
SDValue Extract = extractSubVector(
49823+
SDValue(User, 0), 0, DAG, SDLoc(N), RegVT.getSizeInBits());
49824+
Extract = DAG.getBitcast(RegVT, Extract);
49825+
return DCI.CombineTo(N, Extract, SDValue(User, 1));
49826+
}
49827+
}
49828+
}
4980749829
}
4980849830
}
4980949831
}

llvm/test/CodeGen/X86/vec_fabs.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -332,10 +332,9 @@ define void @PR70947(ptr %src, ptr %dst) {
332332
; X86-AVX2: # %bb.0:
333333
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
334334
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
335-
; X86-AVX2-NEXT: vmovups 32(%ecx), %xmm0
336-
; X86-AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
337-
; X86-AVX2-NEXT: vandps (%ecx), %ymm1, %ymm1
338-
; X86-AVX2-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
335+
; X86-AVX2-NEXT: vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
336+
; X86-AVX2-NEXT: vandps (%ecx), %ymm0, %ymm1
337+
; X86-AVX2-NEXT: vandps 32(%ecx), %xmm0, %xmm0
339338
; X86-AVX2-NEXT: vmovups %ymm1, (%eax)
340339
; X86-AVX2-NEXT: vmovups %xmm0, 16(%eax)
341340
; X86-AVX2-NEXT: vzeroupper
@@ -378,10 +377,9 @@ define void @PR70947(ptr %src, ptr %dst) {
378377
;
379378
; X64-AVX2-LABEL: PR70947:
380379
; X64-AVX2: # %bb.0:
381-
; X64-AVX2-NEXT: vmovups 32(%rdi), %xmm0
382-
; X64-AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
383-
; X64-AVX2-NEXT: vandps (%rdi), %ymm1, %ymm1
384-
; X64-AVX2-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
380+
; X64-AVX2-NEXT: vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
381+
; X64-AVX2-NEXT: vandps (%rdi), %ymm0, %ymm1
382+
; X64-AVX2-NEXT: vandps 32(%rdi), %xmm0, %xmm0
385383
; X64-AVX2-NEXT: vmovups %ymm1, (%rsi)
386384
; X64-AVX2-NEXT: vmovups %xmm0, 16(%rsi)
387385
; X64-AVX2-NEXT: vzeroupper

0 commit comments

Comments
 (0)