Skip to content

Commit cb5046d

Browse files
committed
[SLP]Do not ignore undefs when trying to replace with "poisonous" shuffles
Need to consider undefs correctly, when trying to replace them with potentially poisonous values in shuffles. Such elements should not be silently replaced by poison values, instead complex analysis should be implemented to see if it is safe to do it. Fixes #113425
1 parent 0af6c30 commit cb5046d

File tree

8 files changed

+82
-64
lines changed

8 files changed

+82
-64
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 67 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9076,14 +9076,14 @@ class BaseShuffleAnalysis {
90769076
continue;
90779077
ExtMask[Idx] = SV->getMaskValue(I);
90789078
}
9079-
bool IsOp1Undef =
9080-
isUndefVector(SV->getOperand(0),
9081-
buildUseMask(LocalVF, ExtMask, UseMask::FirstArg))
9082-
.all();
9083-
bool IsOp2Undef =
9084-
isUndefVector(SV->getOperand(1),
9085-
buildUseMask(LocalVF, ExtMask, UseMask::SecondArg))
9086-
.all();
9079+
bool IsOp1Undef = isUndefVector</*isPoisonOnly=*/true>(
9080+
SV->getOperand(0),
9081+
buildUseMask(LocalVF, ExtMask, UseMask::FirstArg))
9082+
.all();
9083+
bool IsOp2Undef = isUndefVector</*isPoisonOnly=*/true>(
9084+
SV->getOperand(1),
9085+
buildUseMask(LocalVF, ExtMask, UseMask::SecondArg))
9086+
.all();
90879087
if (!IsOp1Undef && !IsOp2Undef) {
90889088
// Update mask and mark undef elems.
90899089
for (int &I : Mask) {
@@ -13305,8 +13305,17 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy) {
1330513305
return Vec;
1330613306
};
1330713307
auto *VecTy = getWidenedType(ScalarTy, VL.size());
13308-
Value *Vec = Root ? Root : PoisonValue::get(VecTy);
13308+
Value *Vec = PoisonValue::get(VecTy);
1330913309
SmallVector<int> NonConsts;
13310+
SmallVector<int> Mask(VL.size());
13311+
std::iota(Mask.begin(), Mask.end(), 0);
13312+
Value *OriginalRoot = Root;
13313+
if (auto *SV = dyn_cast_or_null<ShuffleVectorInst>(Root);
13314+
SV && isa<PoisonValue>(SV->getOperand(1)) &&
13315+
SV->getOperand(0)->getType() == VecTy) {
13316+
Root = SV->getOperand(0);
13317+
Mask.assign(SV->getShuffleMask().begin(), SV->getShuffleMask().end());
13318+
}
1331013319
// Insert constant values at first.
1331113320
for (int I = 0, E = VL.size(); I < E; ++I) {
1331213321
if (PostponedIndices.contains(I))
@@ -13315,19 +13324,20 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy) {
1331513324
NonConsts.push_back(I);
1331613325
continue;
1331713326
}
13318-
if (Root) {
13319-
if (!isa<UndefValue>(VL[I])) {
13320-
NonConsts.push_back(I);
13321-
continue;
13322-
}
13323-
if (isa<PoisonValue>(VL[I]))
13324-
continue;
13325-
if (auto *SV = dyn_cast<ShuffleVectorInst>(Root)) {
13326-
if (SV->getMaskValue(I) == PoisonMaskElem)
13327-
continue;
13328-
}
13329-
}
13327+
if (isa<PoisonValue>(VL[I]))
13328+
continue;
1333013329
Vec = CreateInsertElement(Vec, VL[I], I, ScalarTy);
13330+
Mask[I] = I + E;
13331+
}
13332+
if (Root) {
13333+
if (isa<PoisonValue>(Vec)) {
13334+
Vec = OriginalRoot;
13335+
} else {
13336+
Vec = Builder.CreateShuffleVector(Root, Vec, Mask);
13337+
if (auto *OI = dyn_cast<Instruction>(OriginalRoot);
13338+
OI && OI->hasNUses(0))
13339+
eraseInstruction(OI);
13340+
}
1333113341
}
1333213342
// Insert non-constant values.
1333313343
for (int I : NonConsts)
@@ -14041,7 +14051,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
1404114051
if (!ReorderMask.empty())
1404214052
reorderScalars(GatheredScalars, ReorderMask);
1404314053
auto FindReusedSplat = [&](MutableArrayRef<int> Mask, unsigned InputVF,
14044-
unsigned I, unsigned SliceSize) {
14054+
unsigned I, unsigned SliceSize,
14055+
bool IsNotPoisonous) {
1404514056
if (!isSplat(E->Scalars) || none_of(E->Scalars, [](Value *V) {
1404614057
return isa<UndefValue>(V) && !isa<PoisonValue>(V);
1404714058
}))
@@ -14050,14 +14061,29 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
1405014061
unsigned EdgeIdx = E->UserTreeIndices.back().EdgeIdx;
1405114062
if (UserTE->getNumOperands() != 2)
1405214063
return false;
14053-
auto *It =
14054-
find_if(VectorizableTree, [=](const std::unique_ptr<TreeEntry> &TE) {
14055-
return find_if(TE->UserTreeIndices, [=](const EdgeInfo &EI) {
14056-
return EI.UserTE == UserTE && EI.EdgeIdx != EdgeIdx;
14057-
}) != TE->UserTreeIndices.end();
14058-
});
14059-
if (It == VectorizableTree.end())
14060-
return false;
14064+
if (!IsNotPoisonous) {
14065+
auto *It =
14066+
find_if(VectorizableTree, [=](const std::unique_ptr<TreeEntry> &TE) {
14067+
return find_if(TE->UserTreeIndices, [=](const EdgeInfo &EI) {
14068+
return EI.UserTE == UserTE && EI.EdgeIdx != EdgeIdx;
14069+
}) != TE->UserTreeIndices.end();
14070+
});
14071+
if (It == VectorizableTree.end())
14072+
return false;
14073+
SmallVector<Value *> GS((*It)->Scalars.begin(), (*It)->Scalars.end());
14074+
if (!(*It)->ReorderIndices.empty()) {
14075+
inversePermutation((*It)->ReorderIndices, ReorderMask);
14076+
reorderScalars(GS, ReorderMask);
14077+
}
14078+
if (!all_of(zip(GatheredScalars, GS), [&](const auto &P) {
14079+
Value *V0 = std::get<0>(P);
14080+
Value *V1 = std::get<1>(P);
14081+
return !isa<UndefValue>(V0) || isa<PoisonValue>(V0) ||
14082+
(isa<UndefValue>(V0) && !isa<PoisonValue>(V0) &&
14083+
is_contained(E->Scalars, V1));
14084+
}))
14085+
return false;
14086+
}
1406114087
int Idx;
1406214088
if ((Mask.size() < InputVF &&
1406314089
ShuffleVectorInst::isExtractSubvectorMask(Mask, InputVF, Idx) &&
@@ -14330,12 +14356,13 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
1433014356
isGuaranteedNotToBePoison(Vec1) && isGuaranteedNotToBePoison(Vec2);
1433114357
ShuffleBuilder.add(Vec1, Vec2, ExtractMask);
1433214358
} else if (Vec1) {
14359+
bool IsNotPoisonedVec = isGuaranteedNotToBePoison(Vec1);
1433314360
IsUsedInExpr &= FindReusedSplat(
1433414361
ExtractMask,
1433514362
cast<FixedVectorType>(Vec1->getType())->getNumElements(), 0,
14336-
ExtractMask.size());
14363+
ExtractMask.size(), IsNotPoisonedVec);
1433714364
ShuffleBuilder.add(Vec1, ExtractMask, /*ForExtracts=*/true);
14338-
IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1);
14365+
IsNonPoisoned &= IsNotPoisonedVec;
1433914366
} else {
1434014367
IsUsedInExpr = false;
1434114368
ShuffleBuilder.add(PoisonValue::get(VecTy), ExtractMask,
@@ -14358,12 +14385,15 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
1435814385
VecMask.assign(VecMask.size(), PoisonMaskElem);
1435914386
copy(SubMask, std::next(VecMask.begin(), I * SliceSize));
1436014387
if (TEs.size() == 1) {
14361-
IsUsedInExpr &= FindReusedSplat(
14362-
VecMask, TEs.front()->getVectorFactor(), I, SliceSize);
14388+
bool IsNotPoisonedVec =
14389+
TEs.front()->VectorizedValue
14390+
? isGuaranteedNotToBePoison(TEs.front()->VectorizedValue)
14391+
: true;
14392+
IsUsedInExpr &=
14393+
FindReusedSplat(VecMask, TEs.front()->getVectorFactor(), I,
14394+
SliceSize, IsNotPoisonedVec);
1436314395
ShuffleBuilder.add(*TEs.front(), VecMask);
14364-
if (TEs.front()->VectorizedValue)
14365-
IsNonPoisoned &=
14366-
isGuaranteedNotToBePoison(TEs.front()->VectorizedValue);
14396+
IsNonPoisoned &= IsNotPoisonedVec;
1436714397
} else {
1436814398
IsUsedInExpr = false;
1436914399
ShuffleBuilder.add(*TEs.front(), *TEs.back(), VecMask);

llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@ define void @test1(<4 x float> %load6, <4 x float> %load7, <4 x float> %load8, <
1919
; CHECK-NEXT: [[VEXT309_I:%.*]] = shufflevector <4 x float> [[LOAD7]], <4 x float> [[LOAD8:%.*]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2020
; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[VEXT165_I]], i64 0)
2121
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP0]], <4 x float> [[VEXT309_I]], i64 4)
22-
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> poison, i64 4)
23-
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP2]], <4 x float> [[LOAD17:%.*]], i64 0)
22+
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[LOAD17:%.*]], i64 0)
2423
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
2524
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[FMULADD7:%.*]], i64 0)
2625
; CHECK-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP5]], <4 x float> [[FMULADD16:%.*]], i64 4)
@@ -58,8 +57,7 @@ define void @test2(<8 x float> %load6, <8 x float> %load7, <8 x float> %load8, <
5857
; CHECK-NEXT: [[VEXT309_I:%.*]] = shufflevector <8 x float> [[LOAD7]], <8 x float> [[LOAD8:%.*]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
5958
; CHECK-NEXT: [[TMP0:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[VEXT165_I]], i64 0)
6059
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP0]], <8 x float> [[VEXT309_I]], i64 8)
61-
; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> poison, i64 8)
62-
; CHECK-NEXT: [[TMP3:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP2]], <8 x float> [[LOAD17:%.*]], i64 0)
60+
; CHECK-NEXT: [[TMP3:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[LOAD17:%.*]], i64 0)
6361
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6462
; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[FMULADD7:%.*]], i64 0)
6563
; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP5]], <8 x float> [[FMULADD16:%.*]], i64 8)

llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@ define i32 @test() {
77
; CHECK-NEXT: br label [[IF_END_I87:%.*]]
88
; CHECK: if.end.i87:
99
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> <ptr inttoptr (i64 64036 to ptr), ptr inttoptr (i64 64036 to ptr), ptr inttoptr (i64 64064 to ptr), ptr inttoptr (i64 64064 to ptr)>, <4 x i64> <i64 0, i64 1, i64 0, i64 1>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
10-
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> poison, i64 0)
11-
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP1]], <2 x i32> zeroinitializer, i64 2)
10+
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> zeroinitializer, i64 2)
1211
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1312
; CHECK-NEXT: switch i32 0, label [[SW_BB509_I:%.*]] [
1413
; CHECK-NEXT: i32 1, label [[SW_BB509_I]]

llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ define i32 @bar() local_unnamed_addr {
1616
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SUB86_1]], i32 7
1717
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[ADD78_2]], i32 9
1818
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15>
19-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 6, i32 5, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
20-
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[SUB102_3]], i32 12
19+
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 poison, i32 poison, i32 poison, i32 poison, i32 undef, i32 undef, i32 undef, i32 undef, i32 poison, i32 undef, i32 undef, i32 poison>, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 7, i32 6, i32 5, i32 4, i32 24, i32 25, i32 26, i32 27, i32 poison, i32 29, i32 30, i32 poison>
20+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP18]], i32 [[SUB102_3]], i32 12
2121
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12>
2222
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <16 x i32> [[TMP5]], [[TMP8]]
2323
; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i32> [[TMP5]], [[TMP8]]

llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ define void @fextr(ptr %ptr) {
88
; CHECK-NEXT: [[LD:%.*]] = load <8 x i16>, ptr undef, align 16
99
; CHECK-NEXT: br label [[T:%.*]]
1010
; CHECK: t:
11-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> zeroinitializer
11+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> <i16 poison, i16 undef, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>, <8 x i32> <i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1212
; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[LD]], [[TMP0]]
1313
; CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[PTR:%.*]], align 2
1414
; CHECK-NEXT: ret void

llvm/test/Transforms/SLPVectorizer/insertelement-across-zero.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@ define void @test(i8 %0, i8 %1) {
88
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr getelementptr (i8, ptr null, i32 8), align 1
99
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <16 x i32> <i32 7, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison>
1010
; CHECK-NEXT: [[LUPTO132421:%.*]] = shufflevector <16 x i8> zeroinitializer, <16 x i8> [[TMP3]], <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 14, i32 15>
11-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> [[LUPTO132421]], i8 [[TMP0]], i32 0
11+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i8> [[LUPTO132421]], <16 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
12+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> [[TMP6]], i8 [[TMP0]], i32 0
1213
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i8> [[TMP4]], i8 [[TMP1]], i32 1
13-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 0, i32 7
1414
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
15-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP6]], <16 x i8> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1616
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[TMP8]], <16 x i8> poison, <16 x i32> <i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15>
1717
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i8> zeroinitializer, [[TMP9]]
1818
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)