Skip to content

Commit 3e3942d

Browse files
committed
[SLP][REVEC] Make ExtractAndExtendIfNeeded support vector instructions.
1 parent 5da11f1 commit 3e3942d

File tree

2 files changed

+32
-1
lines changed

2 files changed

+32
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13964,7 +13964,17 @@ Value *BoUpSLP::vectorizeTree(
1396413964
CloneGEP->takeName(GEP);
1396513965
Ex = CloneGEP;
1396613966
} else {
13967-
Ex = Builder.CreateExtractElement(Vec, Lane);
13967+
if (auto *VecTy = dyn_cast<FixedVectorType>(Scalar->getType())) {
13968+
unsigned VecTyNumElements = VecTy->getNumElements();
13969+
// When REVEC is enabled, we need to extract a vector.
13970+
// Note: The element size of Scalar may be different from the
13971+
// element size of Vec.
13972+
Ex = Builder.CreateExtractVector(
13973+
FixedVectorType::get(Vec->getType()->getScalarType(),
13974+
VecTyNumElements),
13975+
Vec, Builder.getInt64(ExternalUse.Lane * VecTyNumElements));
13976+
} else
13977+
Ex = Builder.CreateExtractElement(Vec, Lane);
1396813978
}
1396913979
// If necessary, sign-extend or zero-extend ScalarRoot
1397013980
// to the larger type.

llvm/test/Transforms/SLPVectorizer/revec.ll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,27 @@ entry:
6060
}
6161

6262
define void @test3(ptr %in, ptr %out) {
63+
; CHECK-LABEL: @test3(
64+
; CHECK-NEXT: entry:
65+
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[IN:%.*]], align 4
66+
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> poison, i64 8)
67+
; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP1]], <8 x float> [[TMP0]], i64 0)
68+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
69+
; CHECK-NEXT: [[TMP4:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 0)
70+
; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP4]], <8 x float> zeroinitializer, i64 8)
71+
; CHECK-NEXT: [[TMP6:%.*]] = fmul <16 x float> [[TMP3]], [[TMP5]]
72+
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> poison, i64 0)
73+
; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP7]], <8 x float> zeroinitializer, i64 8)
74+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
75+
; CHECK-NEXT: [[TMP10:%.*]] = fadd <16 x float> [[TMP9]], [[TMP6]]
76+
; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <16 x float> [[TMP10]], [[TMP5]]
77+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i1, ptr [[OUT:%.*]], i64 8
78+
; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i1> @llvm.vector.extract.v8i1.v16i1(<16 x i1> [[TMP11]], i64 8)
79+
; CHECK-NEXT: store <8 x i1> [[TMP13]], ptr [[OUT]], align 1
80+
; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i1> @llvm.vector.extract.v8i1.v16i1(<16 x i1> [[TMP11]], i64 0)
81+
; CHECK-NEXT: store <8 x i1> [[TMP14]], ptr [[TMP12]], align 1
82+
; CHECK-NEXT: ret void
83+
;
6384
entry:
6485
%0 = load <8 x float>, ptr %in, align 4
6586
%1 = fmul <8 x float> %0, zeroinitializer

0 commit comments

Comments
 (0)