Skip to content

Commit 656f1d8

Browse files
committed
Revert "[SLP] Extend reordering data of tree entry to support PHI nodes"
This reverts commit 87a2086 as it has problems with scalable vectors and use-list orders. Test to follow.
1 parent ad980b5 commit 656f1d8

File tree

2 files changed

+51
-115
lines changed

2 files changed

+51
-115
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+46-110
Original file line numberDiff line numberDiff line change
@@ -3795,49 +3795,6 @@ BoUpSLP::findPartiallyOrderedLoads(const BoUpSLP::TreeEntry &TE) {
37953795
return None;
37963796
}
37973797

3798-
/// Check if two insertelement instructions are from the same buildvector.
3799-
static bool areTwoInsertFromSameBuildVector(
3800-
InsertElementInst *VU, InsertElementInst *V,
3801-
function_ref<Value *(InsertElementInst *)> GetBaseOperand) {
3802-
// Instructions must be from the same basic blocks.
3803-
if (VU->getParent() != V->getParent())
3804-
return false;
3805-
// Checks if 2 insertelements are from the same buildvector.
3806-
if (VU->getType() != V->getType())
3807-
return false;
3808-
// Multiple used inserts are separate nodes.
3809-
if (!VU->hasOneUse() && !V->hasOneUse())
3810-
return false;
3811-
auto *IE1 = VU;
3812-
auto *IE2 = V;
3813-
unsigned Idx1 = *getInsertIndex(IE1);
3814-
unsigned Idx2 = *getInsertIndex(IE2);
3815-
// Go through the vector operand of insertelement instructions trying to find
3816-
// either VU as the original vector for IE2 or V as the original vector for
3817-
// IE1.
3818-
do {
3819-
if (IE2 == VU)
3820-
return VU->hasOneUse();
3821-
if (IE1 == V)
3822-
return V->hasOneUse();
3823-
if (IE1) {
3824-
if ((IE1 != VU && !IE1->hasOneUse()) ||
3825-
getInsertIndex(IE1).value_or(Idx2) == Idx2)
3826-
IE1 = nullptr;
3827-
else
3828-
IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
3829-
}
3830-
if (IE2) {
3831-
if ((IE2 != V && !IE2->hasOneUse()) ||
3832-
getInsertIndex(IE2).value_or(Idx1) == Idx1)
3833-
IE2 = nullptr;
3834-
else
3835-
IE2 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE2));
3836-
}
3837-
} while (IE1 || IE2);
3838-
return false;
3839-
}
3840-
38413798
Optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &TE,
38423799
bool TopToBottom) {
38433800
// No need to reorder if need to shuffle reuses, still need to shuffle the
@@ -3901,58 +3858,6 @@ Optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &TE,
39013858
(TopToBottom && isa<StoreInst, InsertElementInst>(TE.getMainOp()))) &&
39023859
!TE.isAltShuffle())
39033860
return TE.ReorderIndices;
3904-
if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) {
3905-
auto PHICompare = [](llvm::Value *V1, llvm::Value *V2) {
3906-
if (V1->user_empty() || V2->user_empty())
3907-
return false;
3908-
auto *FirstUserOfPhi1 = cast<Instruction>(*V1->user_begin());
3909-
auto *FirstUserOfPhi2 = cast<Instruction>(*V2->user_begin());
3910-
if (auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1))
3911-
if (auto *IE2 = dyn_cast<InsertElementInst>(FirstUserOfPhi2)) {
3912-
if (!areTwoInsertFromSameBuildVector(
3913-
IE1, IE2,
3914-
[](InsertElementInst *II) { return II->getOperand(0); }))
3915-
return false;
3916-
Optional<unsigned> Idx1 = getInsertIndex(IE1);
3917-
Optional<unsigned> Idx2 = getInsertIndex(IE2);
3918-
if (Idx1 == None || Idx2 == None)
3919-
return false;
3920-
return *Idx1 < *Idx2;
3921-
}
3922-
if (auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1))
3923-
if (auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2)) {
3924-
if (EE1->getOperand(0) != EE2->getOperand(0))
3925-
return false;
3926-
Optional<unsigned> Idx1 = getExtractIndex(EE1);
3927-
Optional<unsigned> Idx2 = getExtractIndex(EE2);
3928-
if (Idx1 == None || Idx2 == None)
3929-
return false;
3930-
return *Idx1 < *Idx2;
3931-
}
3932-
return false;
3933-
};
3934-
auto IsIdentityOrder = [](const OrdersType &Order) {
3935-
for (unsigned Idx : seq<unsigned>(0, Order.size()))
3936-
if (Idx != Order[Idx])
3937-
return false;
3938-
return true;
3939-
};
3940-
if (!TE.ReorderIndices.empty())
3941-
return TE.ReorderIndices;
3942-
DenseMap<Value *, unsigned> PhiToId;
3943-
SmallVector<Value *, 4> Phis;
3944-
OrdersType ResOrder(TE.Scalars.size());
3945-
for (unsigned Id = 0, Sz = TE.Scalars.size(); Id < Sz; ++Id) {
3946-
PhiToId[TE.Scalars[Id]] = Id;
3947-
Phis.push_back(TE.Scalars[Id]);
3948-
}
3949-
llvm::stable_sort(Phis, PHICompare);
3950-
for (unsigned Id = 0, Sz = Phis.size(); Id < Sz; ++Id)
3951-
ResOrder[Id] = PhiToId[Phis[Id]];
3952-
if (IsIdentityOrder(ResOrder))
3953-
return {};
3954-
return ResOrder;
3955-
}
39563861
if (TE.State == TreeEntry::NeedToGather) {
39573862
// TODO: add analysis of other gather nodes with extractelement
39583863
// instructions and other values/instructions, not only undefs.
@@ -4030,9 +3935,6 @@ void BoUpSLP::reorderTopToBottom() {
40303935
// their ordering.
40313936
DenseMap<const TreeEntry *, OrdersType> GathersToOrders;
40323937

4033-
// Phi nodes can have preferred ordering based on their result users
4034-
DenseMap<const TreeEntry *, OrdersType> PhisToOrders;
4035-
40363938
// AltShuffles can also have a preferred ordering that leads to fewer
40373939
// instructions, e.g., the addsub instruction in x86.
40383940
DenseMap<const TreeEntry *, OrdersType> AltShufflesToOrders;
@@ -4047,7 +3949,7 @@ void BoUpSLP::reorderTopToBottom() {
40473949
// extracts.
40483950
for_each(VectorizableTree, [this, &TTIRef, &VFToOrderedEntries,
40493951
&GathersToOrders, &ExternalUserReorderMap,
4050-
&AltShufflesToOrders, &PhisToOrders](
3952+
&AltShufflesToOrders](
40513953
const std::unique_ptr<TreeEntry> &TE) {
40523954
// Look for external users that will probably be vectorized.
40533955
SmallVector<OrdersType, 1> ExternalUserReorderIndices =
@@ -4104,9 +4006,6 @@ void BoUpSLP::reorderTopToBottom() {
41044006
VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
41054007
if (TE->State != TreeEntry::Vectorize || !TE->ReuseShuffleIndices.empty())
41064008
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
4107-
if (TE->State == TreeEntry::Vectorize &&
4108-
TE->getOpcode() == Instruction::PHI)
4109-
PhisToOrders.try_emplace(TE.get(), *CurrentOrder);
41104009
}
41114010
});
41124011

@@ -4132,8 +4031,8 @@ void BoUpSLP::reorderTopToBottom() {
41324031
if (!OpTE->ReuseShuffleIndices.empty() && !GathersToOrders.count(OpTE))
41334032
continue;
41344033
// Count number of orders uses.
4135-
const auto &Order = [OpTE, &GathersToOrders, &AltShufflesToOrders,
4136-
&PhisToOrders]() -> const OrdersType & {
4034+
const auto &Order = [OpTE, &GathersToOrders,
4035+
&AltShufflesToOrders]() -> const OrdersType & {
41374036
if (OpTE->State == TreeEntry::NeedToGather ||
41384037
!OpTE->ReuseShuffleIndices.empty()) {
41394038
auto It = GathersToOrders.find(OpTE);
@@ -4145,12 +4044,6 @@ void BoUpSLP::reorderTopToBottom() {
41454044
if (It != AltShufflesToOrders.end())
41464045
return It->second;
41474046
}
4148-
if (OpTE->State == TreeEntry::Vectorize &&
4149-
isa<PHINode>(OpTE->getMainOp())) {
4150-
auto It = PhisToOrders.find(OpTE);
4151-
if (It != PhisToOrders.end())
4152-
return It->second;
4153-
}
41544047
return OpTE->ReorderIndices;
41554048
}();
41564049
// First consider the order of the external scalar users.
@@ -7245,6 +7138,49 @@ InstructionCost BoUpSLP::getSpillCost() const {
72457138
return Cost;
72467139
}
72477140

7141+
/// Check if two insertelement instructions are from the same buildvector.
7142+
static bool areTwoInsertFromSameBuildVector(
7143+
InsertElementInst *VU, InsertElementInst *V,
7144+
function_ref<Value *(InsertElementInst *)> GetBaseOperand) {
7145+
// Instructions must be from the same basic blocks.
7146+
if (VU->getParent() != V->getParent())
7147+
return false;
7148+
// Checks if 2 insertelements are from the same buildvector.
7149+
if (VU->getType() != V->getType())
7150+
return false;
7151+
// Multiple used inserts are separate nodes.
7152+
if (!VU->hasOneUse() && !V->hasOneUse())
7153+
return false;
7154+
auto *IE1 = VU;
7155+
auto *IE2 = V;
7156+
unsigned Idx1 = *getInsertIndex(IE1);
7157+
unsigned Idx2 = *getInsertIndex(IE2);
7158+
// Go through the vector operand of insertelement instructions trying to find
7159+
// either VU as the original vector for IE2 or V as the original vector for
7160+
// IE1.
7161+
do {
7162+
if (IE2 == VU)
7163+
return VU->hasOneUse();
7164+
if (IE1 == V)
7165+
return V->hasOneUse();
7166+
if (IE1) {
7167+
if ((IE1 != VU && !IE1->hasOneUse()) ||
7168+
getInsertIndex(IE1).value_or(Idx2) == Idx2)
7169+
IE1 = nullptr;
7170+
else
7171+
IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
7172+
}
7173+
if (IE2) {
7174+
if ((IE2 != V && !IE2->hasOneUse()) ||
7175+
getInsertIndex(IE2).value_or(Idx1) == Idx1)
7176+
IE2 = nullptr;
7177+
else
7178+
IE2 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE2));
7179+
}
7180+
} while (IE1 || IE2);
7181+
return false;
7182+
}
7183+
72487184
/// Checks if the \p IE1 instructions is followed by \p IE2 instruction in the
72497185
/// buildvector sequence.
72507186
static bool isFirstInsertElement(const InsertElementInst *IE1,

llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll

+5-5
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
6363
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x half> [[IN1]], i64 1
6464
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1]], i64 2
6565
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3
66-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A0]], i32 0
67-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A1]], i32 1
66+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A1]], i32 0
67+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A0]], i32 1
6868
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[A2]], i32 0
6969
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[A3]], i32 1
7070
; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
@@ -73,15 +73,15 @@ define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
7373
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x half> [[IN2]], i64 1
7474
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2]], i64 2
7575
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3
76-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B0]], i32 0
77-
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B1]], i32 1
76+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B1]], i32 0
77+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B0]], i32 1
7878
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[B2]], i32 0
7979
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x half> [[TMP6]], half [[B3]], i32 1
8080
; CHECK-NEXT: br label [[BB1:%.*]]
8181
; CHECK: bb1:
8282
; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x half> [ [[TMP1]], %entry ], [ [[TMP5]], %bb0 ]
8383
; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x half> [ [[TMP3]], %entry ], [ [[TMP7]], %bb0 ]
84-
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
84+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
8585
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x half> [[TMP9]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
8686
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
8787
; CHECK-NEXT: ret <4 x half> [[TMP12]]

0 commit comments

Comments
 (0)