Skip to content

Commit 2546ae4

Browse files
authored
[SLP][REVEC] Fix the number of elements in the mask of a ShuffleVectorInst is not a power of 2. (#119689)
The following shufflevector should not be vectorized when slp-vectorize-non-power-of-2 is enabled. shufflevector <8 x float> %1, <8 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> shufflevector <8 x float> %1, <8 x float> poison, <3 x i32> <i32 4, i32 5, i32 6>
1 parent 03cbe42 commit 2546ae4

File tree

2 files changed

+100
-1
lines changed

2 files changed

+100
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,8 @@ static unsigned getShufflevectorNumGroups(ArrayRef<Value *> VL) {
344344
unsigned SVNumElements =
345345
cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
346346
unsigned ShuffleMaskSize = SV->getShuffleMask().size();
347+
if (SVNumElements % ShuffleMaskSize != 0)
348+
return 0;
347349
unsigned GroupSize = SVNumElements / ShuffleMaskSize;
348350
if (GroupSize == 0 || (VL.size() % GroupSize) != 0)
349351
return 0;

llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s
2+
; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck --check-prefixes=CHECK,POWEROF2 %s
3+
; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 -slp-vectorize-non-power-of-2 %s | FileCheck --check-prefixes=CHECK,NONPOWEROF2 %s
34

45
define i32 @test() {
56
; CHECK-LABEL: @test(
@@ -134,3 +135,99 @@ for.body:
134135
%6 = select <2 x i1> %4, <2 x float> %3, <2 x float> zeroinitializer
135136
br label %for.cond.cleanup
136137
}
138+
139+
define ptr @test4() {
140+
; POWEROF2-LABEL: @test4(
141+
; POWEROF2-NEXT: [[TMP1:%.*]] = fadd <8 x float> zeroinitializer, zeroinitializer
142+
; POWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 1, i32 2>
143+
; POWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 5, i32 6>
144+
; POWEROF2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 4, i32 0>
145+
; POWEROF2-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP2]], i64 0)
146+
; POWEROF2-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP3]], i64 2)
147+
; POWEROF2-NEXT: br label [[TMP8:%.*]]
148+
; POWEROF2: 7:
149+
; POWEROF2-NEXT: br label [[TMP8]]
150+
; POWEROF2: 8:
151+
; POWEROF2-NEXT: [[TMP9:%.*]] = phi <2 x float> [ poison, [[TMP7:%.*]] ], [ [[TMP4]], [[TMP0:%.*]] ]
152+
; POWEROF2-NEXT: [[TMP10:%.*]] = phi <4 x float> [ poison, [[TMP7]] ], [ [[TMP6]], [[TMP0]] ]
153+
; POWEROF2-NEXT: br label [[TMP11:%.*]]
154+
; POWEROF2: 11:
155+
; POWEROF2-NEXT: [[TMP12:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 0)
156+
; POWEROF2-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP12]], zeroinitializer
157+
; POWEROF2-NEXT: [[TMP14:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 2)
158+
; POWEROF2-NEXT: [[TMP15:%.*]] = fmul <2 x float> zeroinitializer, [[TMP14]]
159+
; POWEROF2-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP9]], i32 1
160+
; POWEROF2-NEXT: [[TMP17:%.*]] = fmul float 0.000000e+00, [[TMP16]]
161+
; POWEROF2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP9]], i32 0
162+
; POWEROF2-NEXT: [[TMP19:%.*]] = fmul float [[TMP18]], 0.000000e+00
163+
; POWEROF2-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP13]], i32 0
164+
; POWEROF2-NEXT: [[TMP21:%.*]] = fadd reassoc nsz float [[TMP20]], [[TMP17]]
165+
; POWEROF2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[TMP15]], i32 0
166+
; POWEROF2-NEXT: [[TMP23:%.*]] = fadd reassoc nsz float [[TMP22]], [[TMP19]]
167+
; POWEROF2-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[TMP13]], i32 1
168+
; POWEROF2-NEXT: [[TMP25:%.*]] = fadd reassoc nsz float [[TMP21]], [[TMP24]]
169+
; POWEROF2-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[TMP15]], i32 1
170+
; POWEROF2-NEXT: [[TMP27:%.*]] = fadd reassoc nsz float [[TMP23]], [[TMP26]]
171+
; POWEROF2-NEXT: [[TMP28:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP25]])
172+
; POWEROF2-NEXT: [[TMP29:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP27]])
173+
; POWEROF2-NEXT: ret ptr null
174+
;
175+
; NONPOWEROF2-LABEL: @test4(
176+
; NONPOWEROF2-NEXT: [[TMP1:%.*]] = fadd <8 x float> zeroinitializer, zeroinitializer
177+
; NONPOWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
178+
; NONPOWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <3 x i32> <i32 4, i32 5, i32 6>
179+
; NONPOWEROF2-NEXT: [[TMP4:%.*]] = call <6 x float> @llvm.vector.insert.v6f32.v3f32(<6 x float> poison, <3 x float> [[TMP2]], i64 0)
180+
; NONPOWEROF2-NEXT: [[TMP5:%.*]] = call <6 x float> @llvm.vector.insert.v6f32.v3f32(<6 x float> [[TMP4]], <3 x float> [[TMP3]], i64 3)
181+
; NONPOWEROF2-NEXT: br label [[TMP7:%.*]]
182+
; NONPOWEROF2: 6:
183+
; NONPOWEROF2-NEXT: br label [[TMP7]]
184+
; NONPOWEROF2: 7:
185+
; NONPOWEROF2-NEXT: [[TMP8:%.*]] = phi <6 x float> [ poison, [[TMP6:%.*]] ], [ [[TMP5]], [[TMP0:%.*]] ]
186+
; NONPOWEROF2-NEXT: br label [[TMP9:%.*]]
187+
; NONPOWEROF2: 9:
188+
; NONPOWEROF2-NEXT: [[TMP10:%.*]] = call <3 x float> @llvm.vector.extract.v3f32.v6f32(<6 x float> [[TMP8]], i64 0)
189+
; NONPOWEROF2-NEXT: [[TMP11:%.*]] = fmul <3 x float> zeroinitializer, [[TMP10]]
190+
; NONPOWEROF2-NEXT: [[TMP12:%.*]] = call <3 x float> @llvm.vector.extract.v3f32.v6f32(<6 x float> [[TMP8]], i64 3)
191+
; NONPOWEROF2-NEXT: [[TMP13:%.*]] = fmul <3 x float> zeroinitializer, [[TMP12]]
192+
; NONPOWEROF2-NEXT: [[TMP14:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP11]])
193+
; NONPOWEROF2-NEXT: [[TMP15:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP13]])
194+
; NONPOWEROF2-NEXT: [[TMP16:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP14]])
195+
; NONPOWEROF2-NEXT: [[TMP17:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP15]])
196+
; NONPOWEROF2-NEXT: ret ptr null
197+
;
198+
%1 = fadd <8 x float> zeroinitializer, zeroinitializer
199+
%2 = extractelement <8 x float> %1, i64 0
200+
%3 = extractelement <8 x float> %1, i64 1
201+
%4 = extractelement <8 x float> %1, i64 2
202+
%5 = extractelement <8 x float> %1, i64 4
203+
%6 = extractelement <8 x float> %1, i64 5
204+
%7 = extractelement <8 x float> %1, i64 6
205+
br label %9
206+
207+
8:
208+
br label %9
209+
210+
9:
211+
%10 = phi float [ 0.000000e+00, %8 ], [ %7, %0 ]
212+
%11 = phi float [ 0.000000e+00, %8 ], [ %6, %0 ]
213+
%12 = phi float [ 0.000000e+00, %8 ], [ %5, %0 ]
214+
%13 = phi float [ 0.000000e+00, %8 ], [ %4, %0 ]
215+
%14 = phi float [ 0.000000e+00, %8 ], [ %3, %0 ]
216+
%15 = phi float [ 0.000000e+00, %8 ], [ %2, %0 ]
217+
br label %16
218+
219+
16:
220+
%17 = fmul float %14, 0.000000e+00
221+
%18 = fmul float 0.000000e+00, %11
222+
%19 = fmul float 0.000000e+00, %15
223+
%20 = fmul float %12, 0.000000e+00
224+
%21 = fadd reassoc nsz float %17, %19
225+
%22 = fadd reassoc nsz float %18, %20
226+
%23 = fmul float %13, 0.000000e+00
227+
%24 = fmul float %10, 0.000000e+00
228+
%25 = fadd reassoc nsz float %21, %23
229+
%26 = fadd reassoc nsz float %22, %24
230+
%27 = tail call float @llvm.sqrt.f32(float %25)
231+
%28 = tail call float @llvm.sqrt.f32(float %26)
232+
ret ptr null
233+
}

0 commit comments

Comments
 (0)