Skip to content

Commit f6e01b9

Browse files
committed
[SLP]Do not trunc bv nodes, if the user is vectorized an requires wider type.
If at least a single user of the gathered trunc'ed instruction is vectorized and requires wider type, than the trunc node, such gathers/buildvectors should not be optimized for better bitwidth.
1 parent 7e37d02 commit f6e01b9

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15529,7 +15529,23 @@ void BoUpSLP::computeMinimumValueSizes() {
1552915529
// Check if the root is trunc and the next node is gather/buildvector, then
1553015530
// keep trunc in scalars, which is free in most cases.
1553115531
if (E.isGather() && IsTruncRoot && E.UserTreeIndices.size() == 1 &&
15532-
E.Idx > (IsStoreOrInsertElt ? 2 : 1)) {
15532+
E.Idx > (IsStoreOrInsertElt ? 2 : 1) &&
15533+
all_of(E.Scalars, [&](Value *V) {
15534+
return V->hasOneUse() || isa<Constant>(V) ||
15535+
(!V->hasNUsesOrMore(UsesLimit) &&
15536+
none_of(V->users(), [&](User *U) {
15537+
const TreeEntry *TE = getTreeEntry(U);
15538+
const TreeEntry *UserTE = E.UserTreeIndices.back().UserTE;
15539+
if (TE == UserTE || !TE)
15540+
return false;
15541+
unsigned UserTESz = DL->getTypeSizeInBits(
15542+
UserTE->Scalars.front()->getType());
15543+
auto It = MinBWs.find(TE);
15544+
if (It != MinBWs.end() && It->second.first > UserTESz)
15545+
return true;
15546+
return DL->getTypeSizeInBits(U->getType()) > UserTESz;
15547+
}));
15548+
})) {
1553315549
ToDemote.push_back(E.Idx);
1553415550
const TreeEntry *UserTE = E.UserTreeIndices.back().UserTE;
1553515551
auto It = MinBWs.find(UserTE);

llvm/test/Transforms/SLPVectorizer/RISCV/trunc-bv-multi-uses.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ define i32 @test(i64 %v1, i64 %v2) {
88
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0
99
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[V2]], i32 1
1010
; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
11-
; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
12-
; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[TMP3]], <i64 32, i64 32>
11+
; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[TMP1]], <i64 32, i64 32>
1312
; CHECK-NEXT: [[TMP5:%.*]] = trunc <2 x i64> [[TMP4]] to <2 x i32>
1413
; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP2]], [[TMP5]]
1514
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0

0 commit comments

Comments
 (0)