Skip to content

Commit c8750b6

Browse files
committed
[SLP][REVEC] Make ExtractAndExtendIfNeeded support vector instructions.
1 parent 201cf72 commit c8750b6

File tree

2 files changed

+32
-1
lines changed

2 files changed

+32
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14061,7 +14061,17 @@ Value *BoUpSLP::vectorizeTree(
1406114061
CloneGEP->takeName(GEP);
1406214062
Ex = CloneGEP;
1406314063
} else {
14064-
Ex = Builder.CreateExtractElement(Vec, Lane);
14064+
if (auto *VecTy = dyn_cast<FixedVectorType>(Scalar->getType())) {
14065+
unsigned VecTyNumElements = VecTy->getNumElements();
14066+
// When REVEC is enabled, we need to extract a vector.
14067+
// Note: The element size of Scalar may be different from the
14068+
// element size of Vec.
14069+
Ex = Builder.CreateExtractVector(
14070+
FixedVectorType::get(Vec->getType()->getScalarType(),
14071+
VecTyNumElements),
14072+
Vec, Builder.getInt64(ExternalUse.Lane * VecTyNumElements));
14073+
} else
14074+
Ex = Builder.CreateExtractElement(Vec, Lane);
1406514075
}
1406614076
// If necessary, sign-extend or zero-extend ScalarRoot
1406714077
// to the larger type.

llvm/test/Transforms/SLPVectorizer/revec.ll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,27 @@ entry:
9090
}
9191

9292
define void @test4(ptr %in, ptr %out) {
93+
; CHECK-LABEL: @test4(
94+
; CHECK-NEXT: entry:
95+
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[IN:%.*]], align 4
96+
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> poison, i64 8)
97+
; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP1]], <8 x float> [[TMP0]], i64 0)
98+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
99+
; CHECK-NEXT: [[TMP4:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 0)
100+
; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP4]], <8 x float> zeroinitializer, i64 8)
101+
; CHECK-NEXT: [[TMP6:%.*]] = fmul <16 x float> [[TMP3]], [[TMP5]]
102+
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> poison, i64 0)
103+
; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP7]], <8 x float> zeroinitializer, i64 8)
104+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
105+
; CHECK-NEXT: [[TMP10:%.*]] = fadd <16 x float> [[TMP9]], [[TMP6]]
106+
; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <16 x float> [[TMP10]], [[TMP5]]
107+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i1, ptr [[OUT:%.*]], i64 8
108+
; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i1> @llvm.vector.extract.v8i1.v16i1(<16 x i1> [[TMP11]], i64 8)
109+
; CHECK-NEXT: store <8 x i1> [[TMP13]], ptr [[OUT]], align 1
110+
; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i1> @llvm.vector.extract.v8i1.v16i1(<16 x i1> [[TMP11]], i64 0)
111+
; CHECK-NEXT: store <8 x i1> [[TMP14]], ptr [[TMP12]], align 1
112+
; CHECK-NEXT: ret void
113+
;
93114
entry:
94115
%0 = load <8 x float>, ptr %in, align 4
95116
%1 = fmul <8 x float> %0, zeroinitializer

0 commit comments

Comments
 (0)