@@ -158,8 +158,8 @@ define double @larger_fp_scalar_256bit_vec(ptr align 32 dereferenceable(32) %p)
158
158
159
159
define <4 x float > @load_f32_insert_v4f32 (ptr align 16 dereferenceable (16 ) %p ) nofree nosync {
160
160
; CHECK-LABEL: @load_f32_insert_v4f32(
161
- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x float> , ptr [[P:%.*]], align 16
162
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1 ]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
161
+ ; CHECK-NEXT: [[S :%.*]] = load float, ptr [[P:%.*]], align 4
162
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S ]], i32 0
163
163
; CHECK-NEXT: ret <4 x float> [[R]]
164
164
;
165
165
%s = load float , ptr %p , align 4
@@ -169,8 +169,8 @@ define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) n
169
169
170
170
define <4 x float > @casted_load_f32_insert_v4f32 (ptr align 4 dereferenceable (16 ) %p ) nofree nosync {
171
171
; CHECK-LABEL: @casted_load_f32_insert_v4f32(
172
- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x float> , ptr [[P:%.*]], align 4
173
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1 ]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
172
+ ; CHECK-NEXT: [[S :%.*]] = load float, ptr [[P:%.*]], align 4
173
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S ]], i32 0
174
174
; CHECK-NEXT: ret <4 x float> [[R]]
175
175
;
176
176
%s = load float , ptr %p , align 4
@@ -182,8 +182,8 @@ define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16)
182
182
183
183
define <4 x i32 > @load_i32_insert_v4i32 (ptr align 16 dereferenceable (16 ) %p ) nofree nosync {
184
184
; CHECK-LABEL: @load_i32_insert_v4i32(
185
- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x i32> , ptr [[P:%.*]], align 16
186
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1 ]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
185
+ ; CHECK-NEXT: [[S :%.*]] = load i32, ptr [[P:%.*]], align 4
186
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S ]], i32 0
187
187
; CHECK-NEXT: ret <4 x i32> [[R]]
188
188
;
189
189
%s = load i32 , ptr %p , align 4
@@ -195,8 +195,8 @@ define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nof
195
195
196
196
define <4 x i32 > @casted_load_i32_insert_v4i32 (ptr align 4 dereferenceable (16 ) %p ) nofree nosync {
197
197
; CHECK-LABEL: @casted_load_i32_insert_v4i32(
198
- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x i32> , ptr [[P:%.*]], align 4
199
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1 ]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
198
+ ; CHECK-NEXT: [[S :%.*]] = load i32, ptr [[P:%.*]], align 4
199
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S ]], i32 0
200
200
; CHECK-NEXT: ret <4 x i32> [[R]]
201
201
;
202
202
%s = load i32 , ptr %p , align 4
@@ -208,8 +208,8 @@ define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %
208
208
209
209
define <4 x float > @gep00_load_f32_insert_v4f32 (ptr align 16 dereferenceable (16 ) %p ) nofree nosync {
210
210
; CHECK-LABEL: @gep00_load_f32_insert_v4f32(
211
- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x float> , ptr [[P:%.*]], align 16
212
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1 ]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
211
+ ; CHECK-NEXT: [[S :%.*]] = load float, ptr [[P:%.*]], align 16
212
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S ]], i64 0
213
213
; CHECK-NEXT: ret <4 x float> [[R]]
214
214
;
215
215
%s = load float , ptr %p , align 16
@@ -221,8 +221,8 @@ define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16)
221
221
222
222
define <4 x float > @gep00_load_f32_insert_v4f32_addrspace (ptr addrspace (44 ) align 16 dereferenceable (16 ) %p ) nofree nosync {
223
223
; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace(
224
- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x float> , ptr addrspace(44) [[P:%.*]], align 16
225
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1 ]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
224
+ ; CHECK-NEXT: [[S :%.*]] = load float, ptr addrspace(44) [[P:%.*]], align 16
225
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S ]], i64 0
226
226
; CHECK-NEXT: ret <4 x float> [[R]]
227
227
;
228
228
%s = load float , ptr addrspace (44 ) %p , align 16
@@ -235,8 +235,8 @@ define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(ptr addrspace(44) alig
235
235
define <8 x i16 > @gep01_load_i16_insert_v8i16 (ptr align 16 dereferenceable (18 ) %p ) nofree nosync {
236
236
; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
237
237
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
238
- ; CHECK-NEXT: [[TMP1 :%.*]] = load <8 x i16> , ptr [[GEP]], align 2
239
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1 ]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
238
+ ; CHECK-NEXT: [[S :%.*]] = load i16, ptr [[GEP]], align 2
239
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S ]], i64 0
240
240
; CHECK-NEXT: ret <8 x i16> [[R]]
241
241
;
242
242
%gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 1
@@ -248,16 +248,11 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %
248
248
; Can't safely load the offset vector, but can load+shuffle if it is profitable.
249
249
250
250
define <8 x i16 > @gep01_load_i16_insert_v8i16_deref (ptr align 16 dereferenceable (17 ) %p ) nofree nosync {
251
- ; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref(
252
- ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
253
- ; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 2
254
- ; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
255
- ; SSE2-NEXT: ret <8 x i16> [[R]]
256
- ;
257
- ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref(
258
- ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16
259
- ; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
260
- ; AVX2-NEXT: ret <8 x i16> [[R]]
251
+ ; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref(
252
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
253
+ ; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 2
254
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
255
+ ; CHECK-NEXT: ret <8 x i16> [[R]]
261
256
;
262
257
%gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 1
263
258
%s = load i16 , ptr %gep , align 2
@@ -268,16 +263,11 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable
268
263
; Verify that alignment of the new load is not over-specified.
269
264
270
265
define <8 x i16 > @gep01_load_i16_insert_v8i16_deref_minalign (ptr align 2 dereferenceable (16 ) %p ) nofree nosync {
271
- ; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
272
- ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
273
- ; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 8
274
- ; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
275
- ; SSE2-NEXT: ret <8 x i16> [[R]]
276
- ;
277
- ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
278
- ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2
279
- ; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
280
- ; AVX2-NEXT: ret <8 x i16> [[R]]
266
+ ; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
267
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
268
+ ; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 8
269
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
270
+ ; CHECK-NEXT: ret <8 x i16> [[R]]
281
271
;
282
272
%gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 1
283
273
%s = load i16 , ptr %gep , align 8
@@ -304,8 +294,9 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceabl
304
294
305
295
define <4 x i32 > @gep012_bitcast_load_i32_insert_v4i32 (ptr align 1 dereferenceable (20 ) %p ) nofree nosync {
306
296
; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
307
- ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
308
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
297
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 12
298
+ ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
299
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
309
300
; CHECK-NEXT: ret <4 x i32> [[R]]
310
301
;
311
302
%gep = getelementptr inbounds <16 x i8 >, ptr %p , i64 0 , i64 12
@@ -336,8 +327,8 @@ define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceab
336
327
define <8 x i16 > @gep10_load_i16_insert_v8i16 (ptr align 16 dereferenceable (32 ) %p ) nofree nosync {
337
328
; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
338
329
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0
339
- ; CHECK-NEXT: [[TMP1 :%.*]] = load <8 x i16> , ptr [[GEP]], align 16
340
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1 ]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
330
+ ; CHECK-NEXT: [[S :%.*]] = load i16, ptr [[GEP]], align 16
331
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S ]], i64 0
341
332
; CHECK-NEXT: ret <8 x i16> [[R]]
342
333
;
343
334
%gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 1 , i64 0
@@ -439,8 +430,8 @@ define <4 x float> @load_f32_insert_v4f32_volatile(ptr align 16 dereferenceable(
439
430
440
431
define <4 x float > @load_f32_insert_v4f32_align (ptr align 1 dereferenceable (16 ) %p ) nofree nosync {
441
432
; CHECK-LABEL: @load_f32_insert_v4f32_align(
442
- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x float> , ptr [[P:%.*]], align 4
443
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1 ]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
433
+ ; CHECK-NEXT: [[S :%.*]] = load float, ptr [[P:%.*]], align 4
434
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S ]], i32 0
444
435
; CHECK-NEXT: ret <4 x float> [[R]]
445
436
;
446
437
%s = load float , ptr %p , align 4
@@ -463,8 +454,8 @@ define <4 x float> @load_f32_insert_v4f32_deref(ptr align 4 dereferenceable(15)
463
454
464
455
define <8 x i32 > @load_i32_insert_v8i32 (ptr align 16 dereferenceable (16 ) %p ) nofree nosync {
465
456
; CHECK-LABEL: @load_i32_insert_v8i32(
466
- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x i32> , ptr [[P:%.*]], align 16
467
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, < 8 x i32> <i32 0 , i32 poison , i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
457
+ ; CHECK-NEXT: [[S :%.*]] = load i32, ptr [[P:%.*]], align 4
458
+ ; CHECK-NEXT: [[R:%.*]] = insertelement < 8 x i32> undef , i32 [[S]] , i32 0
468
459
; CHECK-NEXT: ret <8 x i32> [[R]]
469
460
;
470
461
%s = load i32 , ptr %p , align 4
@@ -474,8 +465,8 @@ define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nof
474
465
475
466
define <8 x i32 > @casted_load_i32_insert_v8i32 (ptr align 4 dereferenceable (16 ) %p ) nofree nosync {
476
467
; CHECK-LABEL: @casted_load_i32_insert_v8i32(
477
- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x i32> , ptr [[P:%.*]], align 4
478
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, < 8 x i32> <i32 0 , i32 poison , i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
468
+ ; CHECK-NEXT: [[S :%.*]] = load i32, ptr [[P:%.*]], align 4
469
+ ; CHECK-NEXT: [[R:%.*]] = insertelement < 8 x i32> undef , i32 [[S]] , i32 0
479
470
; CHECK-NEXT: ret <8 x i32> [[R]]
480
471
;
481
472
%s = load i32 , ptr %p , align 4
@@ -485,8 +476,8 @@ define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %
485
476
486
477
define <16 x float > @load_f32_insert_v16f32 (ptr align 16 dereferenceable (16 ) %p ) nofree nosync {
487
478
; CHECK-LABEL: @load_f32_insert_v16f32(
488
- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x float> , ptr [[P:%.*]], align 16
489
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1 ]], <4 x float> poison, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
479
+ ; CHECK-NEXT: [[S :%.*]] = load float, ptr [[P:%.*]], align 4
480
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <16 x float> undef, float [[S ]], i32 0
490
481
; CHECK-NEXT: ret <16 x float> [[R]]
491
482
;
492
483
%s = load float , ptr %p , align 4
@@ -496,8 +487,8 @@ define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p)
496
487
497
488
define <2 x float > @load_f32_insert_v2f32 (ptr align 16 dereferenceable (16 ) %p ) nofree nosync {
498
489
; CHECK-LABEL: @load_f32_insert_v2f32(
499
- ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x float> , ptr [[P:%.*]], align 16
500
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1 ]], <4 x float> poison, <2 x i32> <i32 0, i32 poison>
490
+ ; CHECK-NEXT: [[S :%.*]] = load float, ptr [[P:%.*]], align 4
491
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> undef, float [[S ]], i32 0
501
492
; CHECK-NEXT: ret <2 x float> [[R]]
502
493
;
503
494
%s = load float , ptr %p , align 4
@@ -549,8 +540,9 @@ define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr noc
549
540
550
541
define <4 x float > @load_v2f32_extract_insert_v4f32 (ptr align 16 dereferenceable (16 ) %p ) nofree nosync {
551
542
; CHECK-LABEL: @load_v2f32_extract_insert_v4f32(
552
- ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
553
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
543
+ ; CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[P:%.*]], align 4
544
+ ; CHECK-NEXT: [[S:%.*]] = extractelement <2 x float> [[L]], i32 0
545
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
554
546
; CHECK-NEXT: ret <4 x float> [[R]]
555
547
;
556
548
%l = load <2 x float >, ptr %p , align 4
@@ -560,10 +552,17 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable
560
552
}
561
553
562
554
define <4 x float > @load_v8f32_extract_insert_v4f32 (ptr align 16 dereferenceable (16 ) %p ) nofree nosync {
563
- ; CHECK-LABEL: @load_v8f32_extract_insert_v4f32(
564
- ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
565
- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
566
- ; CHECK-NEXT: ret <4 x float> [[R]]
555
+ ; SSE2-LABEL: @load_v8f32_extract_insert_v4f32(
556
+ ; SSE2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x float>, ptr [[P:%.*]], i32 0, i32 0
557
+ ; SSE2-NEXT: [[S:%.*]] = load float, ptr [[TMP1]], align 4
558
+ ; SSE2-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
559
+ ; SSE2-NEXT: ret <4 x float> [[R]]
560
+ ;
561
+ ; AVX2-LABEL: @load_v8f32_extract_insert_v4f32(
562
+ ; AVX2-NEXT: [[L:%.*]] = load <8 x float>, ptr [[P:%.*]], align 4
563
+ ; AVX2-NEXT: [[S:%.*]] = extractelement <8 x float> [[L]], i32 0
564
+ ; AVX2-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
565
+ ; AVX2-NEXT: ret <4 x float> [[R]]
567
566
;
568
567
%l = load <8 x float >, ptr %p , align 4
569
568
%s = extractelement <8 x float > %l , i32 0
@@ -589,17 +588,12 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer
589
588
; Can't safely load the offset vector, but can load+shuffle if it is profitable.
590
589
591
590
define <8 x i16 > @gep1_load_v2i16_extract_insert_v8i16 (ptr align 1 dereferenceable (16 ) %p ) nofree nosync {
592
- ; SSE2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
593
- ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, ptr [[P:%.*]], i64 1
594
- ; SSE2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, ptr [[GEP]], i32 0, i32 0
595
- ; SSE2-NEXT: [[S:%.*]] = load i16, ptr [[TMP1]], align 8
596
- ; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
597
- ; SSE2-NEXT: ret <8 x i16> [[R]]
598
- ;
599
- ; AVX2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
600
- ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 4
601
- ; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
602
- ; AVX2-NEXT: ret <8 x i16> [[R]]
591
+ ; CHECK-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
592
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, ptr [[P:%.*]], i64 1
593
+ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, ptr [[GEP]], i32 0, i32 0
594
+ ; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[TMP1]], align 8
595
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
596
+ ; CHECK-NEXT: ret <8 x i16> [[R]]
603
597
;
604
598
%gep = getelementptr inbounds <2 x i16 >, ptr %p , i64 1
605
599
%l = load <2 x i16 >, ptr %gep , align 8
0 commit comments