@@ -19,14 +19,24 @@ define void @test_2xi64_unary_op_load_interleave_group(ptr noalias %data, ptr no
19
19
; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 1
20
20
; VF2-NEXT: [[TMP12:%.*]] = shl nsw i64 [[TMP10]], 1
21
21
; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP1]]
22
- ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP12]]
23
- ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8
24
- ; VF2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP5]], align 8
25
- ; VF2-NEXT: [[TMP9:%.*]] = fneg <2 x double> [[WIDE_LOAD]]
26
- ; VF2-NEXT: [[TMP11:%.*]] = fneg <2 x double> [[WIDE_LOAD1]]
27
- ; VF2-NEXT: store <2 x double> [[TMP9]], ptr [[TMP2]], align 8
28
- ; VF2-NEXT: store <2 x double> [[TMP11]], ptr [[TMP5]], align 8
29
- ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
22
+ ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP12]]
23
+ ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x double>, ptr [[TMP2]], align 8
24
+ ; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
25
+ ; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
26
+ ; VF2-NEXT: [[WIDE_VEC2:%.*]] = load <4 x double>, ptr [[TMP13]], align 8
27
+ ; VF2-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
28
+ ; VF2-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
29
+ ; VF2-NEXT: [[TMP3:%.*]] = fneg <2 x double> [[STRIDED_VEC]]
30
+ ; VF2-NEXT: [[TMP14:%.*]] = fneg <2 x double> [[STRIDED_VEC3]]
31
+ ; VF2-NEXT: [[TMP4:%.*]] = fneg <2 x double> [[STRIDED_VEC1]]
32
+ ; VF2-NEXT: [[TMP9:%.*]] = fneg <2 x double> [[STRIDED_VEC4]]
33
+ ; VF2-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
34
+ ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
35
+ ; VF2-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
36
+ ; VF2-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP14]], <2 x double> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
37
+ ; VF2-NEXT: [[INTERLEAVED_VEC5:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
38
+ ; VF2-NEXT: store <4 x double> [[INTERLEAVED_VEC5]], ptr [[TMP13]], align 8
39
+ ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
30
40
; VF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
31
41
; VF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
32
42
; VF2: [[MIDDLE_BLOCK]]:
@@ -190,15 +200,18 @@ define void @test_2xi64(ptr noalias %data, ptr noalias %factor) {
190
200
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
191
201
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
192
202
; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
193
- ; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
194
- ; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
195
- ; VF2-NEXT: [[WIDE_LOAD:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
203
+ ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
196
204
; VF2-NEXT: [[TMP6:%.*]] = shl nsw i64 [[TMP0]], 1
197
205
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
198
- ; VF2-NEXT: [[STRIDED_VEC1:%.*]] = load <2 x i64>, ptr [[TMP7]], align 8
199
- ; VF2-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
200
- ; VF2-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP7]], align 8
201
- ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
206
+ ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8
207
+ ; VF2-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
208
+ ; VF2-NEXT: [[TMP23:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
209
+ ; VF2-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP11]]
210
+ ; VF2-NEXT: [[TMP24:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP23]]
211
+ ; VF2-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP24]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
212
+ ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
213
+ ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8
214
+ ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
202
215
; VF2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
203
216
; VF2-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
204
217
; VF2: [[MIDDLE_BLOCK]]:
@@ -1001,30 +1014,28 @@ define void @test_2xi64_sub_of_wide_loads(ptr noalias %data, ptr noalias %A, ptr
1001
1014
; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
1002
1015
; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
1003
1016
; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
1004
- ; VF2-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 8
1005
- ; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0
1006
- ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
1007
- ; VF2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
1008
- ; VF2-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i64 0
1009
- ; VF2-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> poison, <2 x i32> zeroinitializer
1017
+ ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
1018
+ ; VF2-NEXT: [[BROADCAST_SPLAT4:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
1010
1019
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
1011
1020
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
1012
1021
; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 2
1013
- ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
1014
- ; VF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i64 0
1015
- ; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
1016
- ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
1017
- ; VF2-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP11]], i64 0
1018
- ; VF2-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT5]], <2 x i64> poison, <2 x i32> zeroinitializer
1022
+ ; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = load <2 x i64>, ptr [[TMP8]], align 8
1023
+ ; VF2-NEXT: [[BROADCAST_SPLAT6:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
1019
1024
; VF2-NEXT: [[TMP12:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]]
1020
1025
; VF2-NEXT: [[TMP13:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT4]], [[BROADCAST_SPLAT6]]
1021
1026
; VF2-NEXT: [[TMP19:%.*]] = shl nsw i64 [[TMP0]], 1
1022
1027
; VF2-NEXT: [[TMP20:%.*]] = shl nsw i64 [[TMP1]], 1
1023
1028
; VF2-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP19]]
1024
1029
; VF2-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP20]]
1025
- ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[DATA_0]], align 8
1026
- ; VF2-NEXT: store <2 x i64> [[TMP13]], ptr [[DATA_1]], align 8
1027
- ; VF2-NEXT: [[IV_NEXT]] = add nuw i64 [[INDEX]], 2
1030
+ ; VF2-NEXT: [[TMP14:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]]
1031
+ ; VF2-NEXT: [[TMP15:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT4]], [[BROADCAST_SPLAT6]]
1032
+ ; VF2-NEXT: [[TMP16:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1033
+ ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP16]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
1034
+ ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[DATA_0]], align 8
1035
+ ; VF2-NEXT: [[TMP17:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1036
+ ; VF2-NEXT: [[INTERLEAVED_VEC4:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
1037
+ ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC4]], ptr [[DATA_1]], align 8
1038
+ ; VF2-NEXT: [[IV_NEXT]] = add nuw i64 [[INDEX]], 4
1028
1039
; VF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100
1029
1040
; VF2-NEXT: br i1 [[EC]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
1030
1041
; VF2: [[MIDDLE_BLOCK]]:
0 commit comments