Skip to content

Commit 799fd3d

Browse files
[SLP]Support vectorization of small strided loads only graph.
If the graph includes only strided loads node, the compiler should still try to vectorize it. Reviewers: RKSimon, preames, topperc Reviewed By: RKSimon Pull Request: #101659
1 parent 5689ccc commit 799fd3d

File tree

2 files changed

+4
-47
lines changed

2 files changed

+4
-47
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10071,6 +10071,7 @@ bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const {
1007110071
// We only handle trees of heights 1 and 2.
1007210072
if (VectorizableTree.size() == 1 &&
1007310073
(VectorizableTree[0]->State == TreeEntry::Vectorize ||
10074+
VectorizableTree[0]->State == TreeEntry::StridedVectorize ||
1007410075
(ForReduction &&
1007510076
AreVectorizableGathers(VectorizableTree[0].get(),
1007610077
VectorizableTree[0]->Scalars.size()) &&

llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll

Lines changed: 3 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -146,53 +146,9 @@ entry:
146146
define i64 @red_strided_ld_16xi64(ptr %ptr) {
147147
; CHECK-LABEL: @red_strided_ld_16xi64(
148148
; CHECK-NEXT: entry:
149-
; CHECK-NEXT: [[LD0:%.*]] = load i64, ptr [[PTR:%.*]], align 8
150-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 2
151-
; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[GEP]], align 8
152-
; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[LD0]], [[LD1]]
153-
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 4
154-
; CHECK-NEXT: [[LD2:%.*]] = load i64, ptr [[GEP_1]], align 8
155-
; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[ADD_1]], [[LD2]]
156-
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 6
157-
; CHECK-NEXT: [[LD3:%.*]] = load i64, ptr [[GEP_2]], align 8
158-
; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[ADD_2]], [[LD3]]
159-
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 8
160-
; CHECK-NEXT: [[LD4:%.*]] = load i64, ptr [[GEP_3]], align 8
161-
; CHECK-NEXT: [[ADD_4:%.*]] = add nuw nsw i64 [[ADD_3]], [[LD4]]
162-
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 10
163-
; CHECK-NEXT: [[LD5:%.*]] = load i64, ptr [[GEP_4]], align 8
164-
; CHECK-NEXT: [[ADD_5:%.*]] = add nuw nsw i64 [[ADD_4]], [[LD5]]
165-
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 12
166-
; CHECK-NEXT: [[LD6:%.*]] = load i64, ptr [[GEP_5]], align 8
167-
; CHECK-NEXT: [[ADD_6:%.*]] = add nuw nsw i64 [[ADD_5]], [[LD6]]
168-
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 14
169-
; CHECK-NEXT: [[LD7:%.*]] = load i64, ptr [[GEP_6]], align 8
170-
; CHECK-NEXT: [[ADD_7:%.*]] = add nuw nsw i64 [[ADD_6]], [[LD7]]
171-
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 16
172-
; CHECK-NEXT: [[LD8:%.*]] = load i64, ptr [[GEP_7]], align 8
173-
; CHECK-NEXT: [[ADD_8:%.*]] = add nuw nsw i64 [[ADD_7]], [[LD8]]
174-
; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 18
175-
; CHECK-NEXT: [[LD9:%.*]] = load i64, ptr [[GEP_8]], align 8
176-
; CHECK-NEXT: [[ADD_9:%.*]] = add nuw nsw i64 [[ADD_8]], [[LD9]]
177-
; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 20
178-
; CHECK-NEXT: [[LD10:%.*]] = load i64, ptr [[GEP_9]], align 8
179-
; CHECK-NEXT: [[ADD_10:%.*]] = add nuw nsw i64 [[ADD_9]], [[LD10]]
180-
; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 22
181-
; CHECK-NEXT: [[LD11:%.*]] = load i64, ptr [[GEP_10]], align 8
182-
; CHECK-NEXT: [[ADD_11:%.*]] = add nuw nsw i64 [[ADD_10]], [[LD11]]
183-
; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 24
184-
; CHECK-NEXT: [[LD12:%.*]] = load i64, ptr [[GEP_11]], align 8
185-
; CHECK-NEXT: [[ADD_12:%.*]] = add nuw nsw i64 [[ADD_11]], [[LD12]]
186-
; CHECK-NEXT: [[GEP_12:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 26
187-
; CHECK-NEXT: [[LD13:%.*]] = load i64, ptr [[GEP_12]], align 8
188-
; CHECK-NEXT: [[ADD_13:%.*]] = add nuw nsw i64 [[ADD_12]], [[LD13]]
189-
; CHECK-NEXT: [[GEP_13:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 28
190-
; CHECK-NEXT: [[LD14:%.*]] = load i64, ptr [[GEP_13]], align 8
191-
; CHECK-NEXT: [[ADD_14:%.*]] = add nuw nsw i64 [[ADD_13]], [[LD14]]
192-
; CHECK-NEXT: [[GEP_14:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 30
193-
; CHECK-NEXT: [[LD15:%.*]] = load i64, ptr [[GEP_14]], align 8
194-
; CHECK-NEXT: [[ADD_15:%.*]] = add nuw nsw i64 [[ADD_14]], [[LD15]]
195-
; CHECK-NEXT: ret i64 [[ADD_15]]
149+
; CHECK-NEXT: [[TMP0:%.*]] = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr align 8 [[PTR:%.*]], i64 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 16)
150+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> [[TMP0]])
151+
; CHECK-NEXT: ret i64 [[TMP1]]
196152
;
197153
entry:
198154
%ld0 = load i64, ptr %ptr

0 commit comments

Comments
 (0)