Skip to content

Commit d37d057

Browse files
authored
[SLP][AArch64] Fix test failure for PR #106507 (#108442)
Updating the failing test in this patch.
1 parent 87e1104 commit d37d057

File tree

1 file changed

+36
-9
lines changed

1 file changed

+36
-9
lines changed

llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ define half @reduce_fast_half2(<2 x half> %vec2) {
1212
; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1
1313
; CHECK-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]]
1414
; CHECK-NEXT: ret half [[ADD1]]
15+
;
1516
entry:
1617
%elt0 = extractelement <2 x half> %vec2, i64 0
1718
%elt1 = extractelement <2 x half> %vec2, i64 1
@@ -27,6 +28,7 @@ define half @reduce_half2(<2 x half> %vec2) {
2728
; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1
2829
; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]]
2930
; CHECK-NEXT: ret half [[ADD1]]
31+
;
3032
entry:
3133
%elt0 = extractelement <2 x half> %vec2, i64 0
3234
%elt1 = extractelement <2 x half> %vec2, i64 1
@@ -38,8 +40,9 @@ define half @reduce_fast_half4(<4 x half> %vec4) {
3840
; CHECK-LABEL: define half @reduce_fast_half4(
3941
; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] {
4042
; CHECK-NEXT: [[ENTRY:.*:]]
41-
; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000, <4 x half> [[VEC4]])
43+
; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[VEC4]])
4244
; CHECK-NEXT: ret half [[TMP0]]
45+
;
4346
entry:
4447
%elt0 = extractelement <4 x half> %vec4, i64 0
4548
%elt1 = extractelement <4 x half> %vec4, i64 1
@@ -63,6 +66,7 @@ define half @reduce_half4(<4 x half> %vec4) {
6366
; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]]
6467
; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]]
6568
; CHECK-NEXT: ret half [[ADD3]]
69+
;
6670
entry:
6771
%elt0 = extractelement <4 x half> %vec4, i64 0
6872
%elt1 = extractelement <4 x half> %vec4, i64 1
@@ -83,12 +87,13 @@ define half @reduce_fast_half8(<8 x half> %vec8) {
8387
; CHECK-NEXT: [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6
8488
; CHECK-NEXT: [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7
8589
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x half> [[VEC8]], <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
86-
; CHECK-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000, <4 x half> [[TMP0]])
90+
; CHECK-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP0]])
8791
; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast half [[TMP1]], [[ELT4]]
8892
; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast half [[ELT5]], [[ELT6]]
8993
; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast half [[OP_RDX]], [[OP_RDX1]]
9094
; CHECK-NEXT: [[OP_RDX3:%.*]] = fadd fast half [[OP_RDX2]], [[ELT7]]
9195
; CHECK-NEXT: ret half [[OP_RDX3]]
96+
;
9297
entry:
9398
%elt0 = extractelement <8 x half> %vec8, i64 0
9499
%elt1 = extractelement <8 x half> %vec8, i64 1
@@ -128,6 +133,7 @@ define half @reduce_half8(<8 x half> %vec8) {
128133
; CHECK-NEXT: [[ADD6:%.*]] = fadd half [[ELT6]], [[ADD5]]
129134
; CHECK-NEXT: [[ADD7:%.*]] = fadd half [[ELT7]], [[ADD6]]
130135
; CHECK-NEXT: ret half [[ADD7]]
136+
;
131137
entry:
132138
%elt0 = extractelement <8 x half> %vec8, i64 0
133139
%elt1 = extractelement <8 x half> %vec8, i64 1
@@ -151,8 +157,9 @@ define half @reduce_fast_half16(<16 x half> %vec16) {
151157
; NOFP16-LABEL: define half @reduce_fast_half16(
152158
; NOFP16-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] {
153159
; NOFP16-NEXT: [[ENTRY:.*:]]
154-
; NOFP16-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> [[VEC16]])
160+
; NOFP16-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> [[VEC16]])
155161
; NOFP16-NEXT: ret half [[TMP0]]
162+
;
156163
; FP16-LABEL: define half @reduce_fast_half16(
157164
; FP16-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] {
158165
; FP16-NEXT: [[ENTRY:.*:]]
@@ -165,9 +172,9 @@ define half @reduce_fast_half16(<16 x half> %vec16) {
165172
; FP16-NEXT: [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14
166173
; FP16-NEXT: [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15
167174
; FP16-NEXT: [[TMP0:%.*]] = shufflevector <16 x half> [[VEC16]], <16 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
168-
; FP16-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000, <4 x half> [[TMP0]])
175+
; FP16-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP0]])
169176
; FP16-NEXT: [[TMP2:%.*]] = shufflevector <16 x half> [[VEC16]], <16 x half> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
170-
; FP16-NEXT: [[TMP3:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000, <4 x half> [[TMP2]])
177+
; FP16-NEXT: [[TMP3:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP2]])
171178
; FP16-NEXT: [[OP_RDX:%.*]] = fadd fast half [[TMP1]], [[TMP3]]
172179
; FP16-NEXT: [[OP_RDX1:%.*]] = fadd fast half [[OP_RDX]], [[ELT4]]
173180
; FP16-NEXT: [[OP_RDX2:%.*]] = fadd fast half [[ELT5]], [[ELT6]]
@@ -178,6 +185,7 @@ define half @reduce_fast_half16(<16 x half> %vec16) {
178185
; FP16-NEXT: [[OP_RDX7:%.*]] = fadd fast half [[OP_RDX5]], [[OP_RDX6]]
179186
; FP16-NEXT: [[OP_RDX8:%.*]] = fadd fast half [[OP_RDX7]], [[ELT15]]
180187
; FP16-NEXT: ret half [[OP_RDX8]]
188+
;
181189
entry:
182190
%elt0 = extractelement <16 x half> %vec16, i64 0
183191
%elt1 = extractelement <16 x half> %vec16, i64 1
@@ -249,6 +257,7 @@ define half @reduce_half16(<16 x half> %vec16) {
249257
; CHECK-NEXT: [[ADD14:%.*]] = fadd half [[ELT14]], [[ADD13]]
250258
; CHECK-NEXT: [[ADD15:%.*]] = fadd half [[ELT15]], [[ADD14]]
251259
; CHECK-NEXT: ret half [[ADD15]]
260+
;
252261
entry:
253262
%elt0 = extractelement <16 x half> %vec16, i64 0
254263
%elt1 = extractelement <16 x half> %vec16, i64 1
@@ -292,6 +301,7 @@ define float @reduce_fast_float2(<2 x float> %vec2) {
292301
; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1
293302
; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[ELT1]], [[ELT0]]
294303
; CHECK-NEXT: ret float [[ADD1]]
304+
;
295305
entry:
296306
%elt0 = extractelement <2 x float> %vec2, i64 0
297307
%elt1 = extractelement <2 x float> %vec2, i64 1
@@ -307,6 +317,7 @@ define float @reduce_float2(<2 x float> %vec2) {
307317
; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1
308318
; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]]
309319
; CHECK-NEXT: ret float [[ADD1]]
320+
;
310321
entry:
311322
%elt0 = extractelement <2 x float> %vec2, i64 0
312323
%elt1 = extractelement <2 x float> %vec2, i64 1
@@ -318,8 +329,9 @@ define float @reduce_fast_float4(<4 x float> %vec4) {
318329
; CHECK-LABEL: define float @reduce_fast_float4(
319330
; CHECK-SAME: <4 x float> [[VEC4:%.*]]) #[[ATTR0]] {
320331
; CHECK-NEXT: [[ENTRY:.*:]]
321-
; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[VEC4]])
332+
; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[VEC4]])
322333
; CHECK-NEXT: ret float [[TMP0]]
334+
;
323335
entry:
324336
%elt0 = extractelement <4 x float> %vec4, i64 0
325337
%elt1 = extractelement <4 x float> %vec4, i64 1
@@ -343,6 +355,7 @@ define float @reduce_float4(<4 x float> %vec4) {
343355
; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ELT2]], [[ADD1]]
344356
; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ELT3]], [[ADD2]]
345357
; CHECK-NEXT: ret float [[ADD3]]
358+
;
346359
entry:
347360
%elt0 = extractelement <4 x float> %vec4, i64 0
348361
%elt1 = extractelement <4 x float> %vec4, i64 1
@@ -358,8 +371,9 @@ define float @reduce_fast_float8(<8 x float> %vec8) {
358371
; CHECK-LABEL: define float @reduce_fast_float8(
359372
; CHECK-SAME: <8 x float> [[VEC8:%.*]]) #[[ATTR0]] {
360373
; CHECK-NEXT: [[ENTRY:.*:]]
361-
; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[VEC8]])
374+
; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[VEC8]])
362375
; CHECK-NEXT: ret float [[TMP0]]
376+
;
363377
entry:
364378
%elt0 = extractelement <8 x float> %vec8, i64 0
365379
%elt1 = extractelement <8 x float> %vec8, i64 1
@@ -399,6 +413,7 @@ define float @reduce_float8(<8 x float> %vec8) {
399413
; CHECK-NEXT: [[ADD6:%.*]] = fadd float [[ELT6]], [[ADD5]]
400414
; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ELT7]], [[ADD6]]
401415
; CHECK-NEXT: ret float [[ADD7]]
416+
;
402417
entry:
403418
%elt0 = extractelement <8 x float> %vec8, i64 0
404419
%elt1 = extractelement <8 x float> %vec8, i64 1
@@ -426,6 +441,7 @@ define double @reduce_fast_double2(<2 x double> %vec2) {
426441
; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1
427442
; CHECK-NEXT: [[ADD1:%.*]] = fadd fast double [[ELT1]], [[ELT0]]
428443
; CHECK-NEXT: ret double [[ADD1]]
444+
;
429445
entry:
430446
%elt0 = extractelement <2 x double> %vec2, i64 0
431447
%elt1 = extractelement <2 x double> %vec2, i64 1
@@ -441,6 +457,7 @@ define double @reduce_double2(<2 x double> %vec2) {
441457
; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1
442458
; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[ELT1]], [[ELT0]]
443459
; CHECK-NEXT: ret double [[ADD1]]
460+
;
444461
entry:
445462
%elt0 = extractelement <2 x double> %vec2, i64 0
446463
%elt1 = extractelement <2 x double> %vec2, i64 1
@@ -452,8 +469,9 @@ define double @reduce_fast_double4(<4 x double> %vec4) {
452469
; CHECK-LABEL: define double @reduce_fast_double4(
453470
; CHECK-SAME: <4 x double> [[VEC4:%.*]]) #[[ATTR0]] {
454471
; CHECK-NEXT: [[ENTRY:.*:]]
455-
; CHECK-NEXT: [[TMP0:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[VEC4]])
472+
; CHECK-NEXT: [[TMP0:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[VEC4]])
456473
; CHECK-NEXT: ret double [[TMP0]]
474+
;
457475
entry:
458476
%elt0 = extractelement <4 x double> %vec4, i64 0
459477
%elt1 = extractelement <4 x double> %vec4, i64 1
@@ -477,6 +495,7 @@ define double @reduce_double4(<4 x double> %vec4) {
477495
; CHECK-NEXT: [[ADD2:%.*]] = fadd double [[ELT2]], [[ADD1]]
478496
; CHECK-NEXT: [[ADD3:%.*]] = fadd double [[ELT3]], [[ADD2]]
479497
; CHECK-NEXT: ret double [[ADD3]]
498+
;
480499
entry:
481500
%elt0 = extractelement <4 x double> %vec4, i64 0
482501
%elt1 = extractelement <4 x double> %vec4, i64 1
@@ -507,6 +526,7 @@ define float @reduce_fast_float_case1(ptr %a) {
507526
; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
508527
; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[LOAD4]], [[ADD3]]
509528
; CHECK-NEXT: ret float [[ADD4]]
529+
;
510530
entry:
511531
%load = load float, ptr %a
512532
%gep = getelementptr inbounds i8, ptr %a, i64 4
@@ -543,6 +563,7 @@ define float @reduce_float_case1(ptr %a) {
543563
; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
544564
; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[LOAD4]], [[ADD3]]
545565
; CHECK-NEXT: ret float [[ADD4]]
566+
;
546567
entry:
547568
%load = load float, ptr %a
548569
%gep = getelementptr inbounds i8, ptr %a, i64 4
@@ -584,6 +605,7 @@ define float @reduce_fast_float_case2(ptr %a, ptr %b) {
584605
; CHECK-NEXT: [[RED2:%.*]] = fadd fast float [[ADD2]], [[RED1]]
585606
; CHECK-NEXT: [[RED3:%.*]] = fadd fast float [[ADD3]], [[RED2]]
586607
; CHECK-NEXT: ret float [[RED3]]
608+
;
587609
entry:
588610
%gepa1 = getelementptr inbounds float, ptr %a, i32 1
589611
%gepa2 = getelementptr inbounds float, ptr %a, i32 2
@@ -633,6 +655,7 @@ define float @reduce_float_case2(ptr %a, ptr %b) {
633655
; CHECK-NEXT: [[RED2:%.*]] = fadd float [[ADD2]], [[RED1]]
634656
; CHECK-NEXT: [[RED3:%.*]] = fadd float [[ADD3]], [[RED2]]
635657
; CHECK-NEXT: ret float [[RED3]]
658+
;
636659
entry:
637660
%gepa1 = getelementptr inbounds float, ptr %a, i32 1
638661
%gepa2 = getelementptr inbounds float, ptr %a, i32 2
@@ -694,6 +717,7 @@ define float @reduce_fast_float_case3(ptr %a) {
694717
; CHECK-NEXT: [[ADD6:%.*]] = fadd fast float [[ADD5]], [[LOG6]]
695718
; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD6]], [[LOG7]]
696719
; CHECK-NEXT: ret float [[ADD7]]
720+
;
697721
entry:
698722
%gep1 = getelementptr inbounds float, ptr %a, i32 1
699723
%gep2 = getelementptr inbounds float, ptr %a, i32 2
@@ -764,6 +788,7 @@ define float @reduce_float_case3(ptr %a) {
764788
; CHECK-NEXT: [[ADD6:%.*]] = fadd float [[ADD5]], [[LOG6]]
765789
; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ADD6]], [[LOG7]]
766790
; CHECK-NEXT: ret float [[ADD7]]
791+
;
767792
entry:
768793
%gep1 = getelementptr inbounds float, ptr %a, i32 1
769794
%gep2 = getelementptr inbounds float, ptr %a, i32 2
@@ -802,8 +827,9 @@ define half @reduce_unordered_fast_half4(<4 x half> %vec4) {
802827
; CHECK-LABEL: define half @reduce_unordered_fast_half4(
803828
; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] {
804829
; CHECK-NEXT: [[ENTRY:.*:]]
805-
; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000, <4 x half> [[VEC4]])
830+
; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[VEC4]])
806831
; CHECK-NEXT: ret half [[TMP0]]
832+
;
807833
entry:
808834
%elt0 = extractelement <4 x half> %vec4, i64 0
809835
%elt1 = extractelement <4 x half> %vec4, i64 1
@@ -826,6 +852,7 @@ define half @reduce_unordered_half4(<4 x half> %vec4) {
826852
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x half> [[TMP2]], i32 1
827853
; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[TMP3]], [[TMP4]]
828854
; CHECK-NEXT: ret half [[ADD3]]
855+
;
829856
entry:
830857
%elt0 = extractelement <4 x half> %vec4, i64 0
831858
%elt1 = extractelement <4 x half> %vec4, i64 1

0 commit comments

Comments
 (0)