Skip to content

Commit cc54a0c

Browse files
authored
[VectorCombine] vectorizeLoadInsert - only fold when inserting into a poison vector (#119906)
We have corresponding poison tests in the "-inseltpoison.ll" sibling test files. Fixes #119900
1 parent 1911919 commit cc54a0c

File tree

3 files changed

+63
-69
lines changed

3 files changed

+63
-69
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,8 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
179179
// Match insert into fixed vector of scalar value.
180180
// TODO: Handle non-zero insert index.
181181
Value *Scalar;
182-
if (!match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
183-
!Scalar->hasOneUse())
182+
if (!match(&I,
183+
m_InsertElt(m_Poison(), m_OneUse(m_Value(Scalar)), m_ZeroInt())))
184184
return false;
185185

186186
// Optionally match an extract from another vector.

llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ define protected amdgpu_kernel void @load_from_other_as(ptr nocapture nonnull %r
1212
; CHECK-NEXT: bb:
1313
; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5)
1414
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
15-
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, ptr [[TMP0]], align 4
16-
; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
15+
; CHECK-NEXT: [[D:%.*]] = load float, ptr [[TMP0]], align 4
16+
; CHECK-NEXT: [[E:%.*]] = insertelement <4 x float> undef, float [[D]], i32 0
1717
; CHECK-NEXT: store <4 x float> [[E]], ptr [[RESULTPTR:%.*]], align 16
1818
; CHECK-NEXT: ret void
1919
;

llvm/test/Transforms/VectorCombine/X86/load.ll

Lines changed: 59 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,8 @@ define double @larger_fp_scalar_256bit_vec(ptr align 32 dereferenceable(32) %p)
158158

159159
define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
160160
; CHECK-LABEL: @load_f32_insert_v4f32(
161-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
162-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
161+
; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 4
162+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
163163
; CHECK-NEXT: ret <4 x float> [[R]]
164164
;
165165
%s = load float, ptr %p, align 4
@@ -169,8 +169,8 @@ define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) n
169169

170170
define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16) %p) nofree nosync {
171171
; CHECK-LABEL: @casted_load_f32_insert_v4f32(
172-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
173-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
172+
; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 4
173+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
174174
; CHECK-NEXT: ret <4 x float> [[R]]
175175
;
176176
%s = load float, ptr %p, align 4
@@ -182,8 +182,8 @@ define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16)
182182

183183
define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nofree nosync {
184184
; CHECK-LABEL: @load_i32_insert_v4i32(
185-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
186-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
185+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[P:%.*]], align 4
186+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i32 0
187187
; CHECK-NEXT: ret <4 x i32> [[R]]
188188
;
189189
%s = load i32, ptr %p, align 4
@@ -195,8 +195,8 @@ define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nof
195195

196196
define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %p) nofree nosync {
197197
; CHECK-LABEL: @casted_load_i32_insert_v4i32(
198-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
199-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
198+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[P:%.*]], align 4
199+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i32 0
200200
; CHECK-NEXT: ret <4 x i32> [[R]]
201201
;
202202
%s = load i32, ptr %p, align 4
@@ -208,8 +208,8 @@ define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %
208208

209209
define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
210210
; CHECK-LABEL: @gep00_load_f32_insert_v4f32(
211-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
212-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
211+
; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 16
212+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i64 0
213213
; CHECK-NEXT: ret <4 x float> [[R]]
214214
;
215215
%s = load float, ptr %p, align 16
@@ -221,8 +221,8 @@ define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16)
221221

222222
define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(ptr addrspace(44) align 16 dereferenceable(16) %p) nofree nosync {
223223
; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace(
224-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr addrspace(44) [[P:%.*]], align 16
225-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
224+
; CHECK-NEXT: [[S:%.*]] = load float, ptr addrspace(44) [[P:%.*]], align 16
225+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i64 0
226226
; CHECK-NEXT: ret <4 x float> [[R]]
227227
;
228228
%s = load float, ptr addrspace(44) %p, align 16
@@ -235,8 +235,8 @@ define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(ptr addrspace(44) alig
235235
define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %p) nofree nosync {
236236
; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
237237
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
238-
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 2
239-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
238+
; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 2
239+
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
240240
; CHECK-NEXT: ret <8 x i16> [[R]]
241241
;
242242
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
@@ -248,16 +248,11 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %
248248
; Can't safely load the offset vector, but can load+shuffle if it is profitable.
249249

250250
define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable(17) %p) nofree nosync {
251-
; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref(
252-
; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
253-
; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 2
254-
; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
255-
; SSE2-NEXT: ret <8 x i16> [[R]]
256-
;
257-
; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref(
258-
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16
259-
; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
260-
; AVX2-NEXT: ret <8 x i16> [[R]]
251+
; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref(
252+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
253+
; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 2
254+
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
255+
; CHECK-NEXT: ret <8 x i16> [[R]]
261256
;
262257
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
263258
%s = load i16, ptr %gep, align 2
@@ -268,16 +263,11 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable
268263
; Verify that alignment of the new load is not over-specified.
269264

270265
define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 dereferenceable(16) %p) nofree nosync {
271-
; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
272-
; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
273-
; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 8
274-
; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
275-
; SSE2-NEXT: ret <8 x i16> [[R]]
276-
;
277-
; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
278-
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2
279-
; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
280-
; AVX2-NEXT: ret <8 x i16> [[R]]
266+
; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
267+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
268+
; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 8
269+
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
270+
; CHECK-NEXT: ret <8 x i16> [[R]]
281271
;
282272
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
283273
%s = load i16, ptr %gep, align 8
@@ -304,8 +294,9 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceabl
304294

305295
define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(20) %p) nofree nosync {
306296
; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
307-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
308-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
297+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 12
298+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
299+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
309300
; CHECK-NEXT: ret <4 x i32> [[R]]
310301
;
311302
%gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 12
@@ -336,8 +327,8 @@ define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceab
336327
define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %p) nofree nosync {
337328
; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
338329
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0
339-
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 16
340-
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
330+
; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 16
331+
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
341332
; CHECK-NEXT: ret <8 x i16> [[R]]
342333
;
343334
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0
@@ -439,8 +430,8 @@ define <4 x float> @load_f32_insert_v4f32_volatile(ptr align 16 dereferenceable(
439430

440431
define <4 x float> @load_f32_insert_v4f32_align(ptr align 1 dereferenceable(16) %p) nofree nosync {
441432
; CHECK-LABEL: @load_f32_insert_v4f32_align(
442-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
443-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
433+
; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 4
434+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
444435
; CHECK-NEXT: ret <4 x float> [[R]]
445436
;
446437
%s = load float, ptr %p, align 4
@@ -463,8 +454,8 @@ define <4 x float> @load_f32_insert_v4f32_deref(ptr align 4 dereferenceable(15)
463454

464455
define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nofree nosync {
465456
; CHECK-LABEL: @load_i32_insert_v8i32(
466-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
467-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
457+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[P:%.*]], align 4
458+
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0
468459
; CHECK-NEXT: ret <8 x i32> [[R]]
469460
;
470461
%s = load i32, ptr %p, align 4
@@ -474,8 +465,8 @@ define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nof
474465

475466
define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %p) nofree nosync {
476467
; CHECK-LABEL: @casted_load_i32_insert_v8i32(
477-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
478-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
468+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[P:%.*]], align 4
469+
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0
479470
; CHECK-NEXT: ret <8 x i32> [[R]]
480471
;
481472
%s = load i32, ptr %p, align 4
@@ -485,8 +476,8 @@ define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %
485476

486477
define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
487478
; CHECK-LABEL: @load_f32_insert_v16f32(
488-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
489-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
479+
; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 4
480+
; CHECK-NEXT: [[R:%.*]] = insertelement <16 x float> undef, float [[S]], i32 0
490481
; CHECK-NEXT: ret <16 x float> [[R]]
491482
;
492483
%s = load float, ptr %p, align 4
@@ -496,8 +487,8 @@ define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p)
496487

497488
define <2 x float> @load_f32_insert_v2f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
498489
; CHECK-LABEL: @load_f32_insert_v2f32(
499-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
500-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 0, i32 poison>
490+
; CHECK-NEXT: [[S:%.*]] = load float, ptr [[P:%.*]], align 4
491+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> undef, float [[S]], i32 0
501492
; CHECK-NEXT: ret <2 x float> [[R]]
502493
;
503494
%s = load float, ptr %p, align 4
@@ -549,8 +540,9 @@ define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr noc
549540

550541
define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
551542
; CHECK-LABEL: @load_v2f32_extract_insert_v4f32(
552-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
553-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
543+
; CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[P:%.*]], align 4
544+
; CHECK-NEXT: [[S:%.*]] = extractelement <2 x float> [[L]], i32 0
545+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
554546
; CHECK-NEXT: ret <4 x float> [[R]]
555547
;
556548
%l = load <2 x float>, ptr %p, align 4
@@ -560,10 +552,17 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable
560552
}
561553

562554
define <4 x float> @load_v8f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
563-
; CHECK-LABEL: @load_v8f32_extract_insert_v4f32(
564-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
565-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
566-
; CHECK-NEXT: ret <4 x float> [[R]]
555+
; SSE2-LABEL: @load_v8f32_extract_insert_v4f32(
556+
; SSE2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x float>, ptr [[P:%.*]], i32 0, i32 0
557+
; SSE2-NEXT: [[S:%.*]] = load float, ptr [[TMP1]], align 4
558+
; SSE2-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
559+
; SSE2-NEXT: ret <4 x float> [[R]]
560+
;
561+
; AVX2-LABEL: @load_v8f32_extract_insert_v4f32(
562+
; AVX2-NEXT: [[L:%.*]] = load <8 x float>, ptr [[P:%.*]], align 4
563+
; AVX2-NEXT: [[S:%.*]] = extractelement <8 x float> [[L]], i32 0
564+
; AVX2-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
565+
; AVX2-NEXT: ret <4 x float> [[R]]
567566
;
568567
%l = load <8 x float>, ptr %p, align 4
569568
%s = extractelement <8 x float> %l, i32 0
@@ -589,17 +588,12 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer
589588
; Can't safely load the offset vector, but can load+shuffle if it is profitable.
590589

591590
define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(ptr align 1 dereferenceable(16) %p) nofree nosync {
592-
; SSE2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
593-
; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, ptr [[P:%.*]], i64 1
594-
; SSE2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, ptr [[GEP]], i32 0, i32 0
595-
; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[TMP1]], align 8
596-
; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
597-
; SSE2-NEXT: ret <8 x i16> [[R]]
598-
;
599-
; AVX2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
600-
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 4
601-
; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
602-
; AVX2-NEXT: ret <8 x i16> [[R]]
591+
; CHECK-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
592+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, ptr [[P:%.*]], i64 1
593+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, ptr [[GEP]], i32 0, i32 0
594+
; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[TMP1]], align 8
595+
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
596+
; CHECK-NEXT: ret <8 x i16> [[R]]
603597
;
604598
%gep = getelementptr inbounds <2 x i16>, ptr %p, i64 1
605599
%l = load <2 x i16>, ptr %gep, align 8

0 commit comments

Comments
 (0)