Skip to content

Commit 2f8f17c

Browse files
committed
[SLP]Fix PR58956: fix insertpoint for reduced buildvector graphs.
If the graph is only the buildvector node without main operation, need to inherit insrtpoint from the redution instruction. Otherwise the compiler crashes trying to insert instruction at the entry block.
1 parent 8fbb6f8 commit 2f8f17c

File tree

2 files changed

+46
-10
lines changed

2 files changed

+46
-10
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+14-10
Original file line numberDiff line numberDiff line change
@@ -970,7 +970,8 @@ class BoUpSLP {
970970
/// Vectorize the tree but with the list of externally used values \p
971971
/// ExternallyUsedValues. Values in this MapVector can be replaced but the
972972
/// generated extractvalue instructions.
973-
Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues);
973+
Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
974+
Instruction *ReductionRoot = nullptr);
974975

975976
/// \returns the cost incurred by unwanted spills and fills, caused by
976977
/// holding live values over call sites.
@@ -9002,8 +9003,8 @@ struct ShuffledInsertData {
90029003
};
90039004
} // namespace
90049005

9005-
Value *
9006-
BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
9006+
Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
9007+
Instruction *ReductionRoot) {
90079008
// All blocks must be scheduled before any instructions are inserted.
90089009
for (auto &BSIter : BlocksSchedules) {
90099010
scheduleBlock(BSIter.second.get());
@@ -9020,7 +9021,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
90209021
EntryToLastInstruction.try_emplace(E.get(), LastInst);
90219022
}
90229023

9023-
Builder.SetInsertPoint(&F->getEntryBlock().front());
9024+
Builder.SetInsertPoint(ReductionRoot ? ReductionRoot
9025+
: &F->getEntryBlock().front());
90249026
auto *VectorRoot = vectorizeTree(VectorizableTree[0].get());
90259027

90269028
// If the vectorized tree can be rewritten in a smaller type, we truncate the
@@ -11944,16 +11946,18 @@ class HorizontalReduction {
1194411946

1194511947
Builder.setFastMathFlags(RdxFMF);
1194611948

11947-
// Vectorize a tree.
11948-
Value *VectorizedRoot = V.vectorizeTree(LocalExternallyUsedValues);
11949-
1195011949
// Emit a reduction. If the root is a select (min/max idiom), the insert
1195111950
// point is the compare condition of that select.
1195211951
Instruction *RdxRootInst = cast<Instruction>(ReductionRoot);
11952+
Instruction *InsertPt = RdxRootInst;
1195311953
if (IsCmpSelMinMax)
11954-
Builder.SetInsertPoint(GetCmpForMinMaxReduction(RdxRootInst));
11955-
else
11956-
Builder.SetInsertPoint(RdxRootInst);
11954+
InsertPt = GetCmpForMinMaxReduction(RdxRootInst);
11955+
11956+
// Vectorize a tree.
11957+
Value *VectorizedRoot =
11958+
V.vectorizeTree(LocalExternallyUsedValues, InsertPt);
11959+
11960+
Builder.SetInsertPoint(InsertPt);
1195711961

1195811962
// To prevent poison from leaking across what used to be sequential,
1195911963
// safe, scalar boolean logic operations, the reduction operand must be
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=arm64-apple-macosx | FileCheck %s
3+
4+
define i8 @test() {
5+
; CHECK-LABEL: @test(
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
8+
; CHECK: for.body:
9+
; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
10+
; CHECK-NEXT: [[CALL278:%.*]] = call i32 @fn(i32 [[SUM]])
11+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[CALL278]], i32 0
12+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> zeroinitializer
13+
; CHECK-NEXT: [[TMP1]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[SHUFFLE]])
14+
; CHECK-NEXT: br label [[FOR_BODY]]
15+
;
16+
entry:
17+
br label %for.body
18+
19+
for.body:
20+
%sum = phi i32 [ %add285.19, %for.body ], [ 0, %entry ]
21+
%call278 = call i32 @fn(i32 %sum)
22+
%add285.13 = add i32 %call278, %call278
23+
%add285.14 = add i32 %add285.13, %call278
24+
%add285.15 = add i32 %add285.14, %call278
25+
%add285.16 = add i32 %add285.15, %call278
26+
%add285.17 = add i32 %add285.16, %call278
27+
%add285.18 = add i32 %add285.17, %call278
28+
%add285.19 = add i32 %add285.18, %call278
29+
br label %for.body
30+
}
31+
32+
declare i32 @fn(i32)

0 commit comments

Comments
 (0)