@@ -12,6 +12,7 @@ define half @reduce_fast_half2(<2 x half> %vec2) {
12
12
; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1
13
13
; CHECK-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]]
14
14
; CHECK-NEXT: ret half [[ADD1]]
15
+ ;
15
16
entry:
16
17
%elt0 = extractelement <2 x half > %vec2 , i64 0
17
18
%elt1 = extractelement <2 x half > %vec2 , i64 1
@@ -27,6 +28,7 @@ define half @reduce_half2(<2 x half> %vec2) {
27
28
; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1
28
29
; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]]
29
30
; CHECK-NEXT: ret half [[ADD1]]
31
+ ;
30
32
entry:
31
33
%elt0 = extractelement <2 x half > %vec2 , i64 0
32
34
%elt1 = extractelement <2 x half > %vec2 , i64 1
@@ -38,8 +40,9 @@ define half @reduce_fast_half4(<4 x half> %vec4) {
38
40
; CHECK-LABEL: define half @reduce_fast_half4(
39
41
; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] {
40
42
; CHECK-NEXT: [[ENTRY:.*:]]
41
- ; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000 , <4 x half> [[VEC4]])
43
+ ; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000 , <4 x half> [[VEC4]])
42
44
; CHECK-NEXT: ret half [[TMP0]]
45
+ ;
43
46
entry:
44
47
%elt0 = extractelement <4 x half > %vec4 , i64 0
45
48
%elt1 = extractelement <4 x half > %vec4 , i64 1
@@ -63,6 +66,7 @@ define half @reduce_half4(<4 x half> %vec4) {
63
66
; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]]
64
67
; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]]
65
68
; CHECK-NEXT: ret half [[ADD3]]
69
+ ;
66
70
entry:
67
71
%elt0 = extractelement <4 x half > %vec4 , i64 0
68
72
%elt1 = extractelement <4 x half > %vec4 , i64 1
@@ -83,12 +87,13 @@ define half @reduce_fast_half8(<8 x half> %vec8) {
83
87
; CHECK-NEXT: [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6
84
88
; CHECK-NEXT: [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7
85
89
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x half> [[VEC8]], <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
86
- ; CHECK-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000 , <4 x half> [[TMP0]])
90
+ ; CHECK-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000 , <4 x half> [[TMP0]])
87
91
; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast half [[TMP1]], [[ELT4]]
88
92
; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast half [[ELT5]], [[ELT6]]
89
93
; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast half [[OP_RDX]], [[OP_RDX1]]
90
94
; CHECK-NEXT: [[OP_RDX3:%.*]] = fadd fast half [[OP_RDX2]], [[ELT7]]
91
95
; CHECK-NEXT: ret half [[OP_RDX3]]
96
+ ;
92
97
entry:
93
98
%elt0 = extractelement <8 x half > %vec8 , i64 0
94
99
%elt1 = extractelement <8 x half > %vec8 , i64 1
@@ -128,6 +133,7 @@ define half @reduce_half8(<8 x half> %vec8) {
128
133
; CHECK-NEXT: [[ADD6:%.*]] = fadd half [[ELT6]], [[ADD5]]
129
134
; CHECK-NEXT: [[ADD7:%.*]] = fadd half [[ELT7]], [[ADD6]]
130
135
; CHECK-NEXT: ret half [[ADD7]]
136
+ ;
131
137
entry:
132
138
%elt0 = extractelement <8 x half > %vec8 , i64 0
133
139
%elt1 = extractelement <8 x half > %vec8 , i64 1
@@ -151,8 +157,9 @@ define half @reduce_fast_half16(<16 x half> %vec16) {
151
157
; NOFP16-LABEL: define half @reduce_fast_half16(
152
158
; NOFP16-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] {
153
159
; NOFP16-NEXT: [[ENTRY:.*:]]
154
- ; NOFP16-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH8000 , <16 x half> [[VEC16]])
160
+ ; NOFP16-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000 , <16 x half> [[VEC16]])
155
161
; NOFP16-NEXT: ret half [[TMP0]]
162
+ ;
156
163
; FP16-LABEL: define half @reduce_fast_half16(
157
164
; FP16-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] {
158
165
; FP16-NEXT: [[ENTRY:.*:]]
@@ -165,9 +172,9 @@ define half @reduce_fast_half16(<16 x half> %vec16) {
165
172
; FP16-NEXT: [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14
166
173
; FP16-NEXT: [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15
167
174
; FP16-NEXT: [[TMP0:%.*]] = shufflevector <16 x half> [[VEC16]], <16 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
168
- ; FP16-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000 , <4 x half> [[TMP0]])
175
+ ; FP16-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000 , <4 x half> [[TMP0]])
169
176
; FP16-NEXT: [[TMP2:%.*]] = shufflevector <16 x half> [[VEC16]], <16 x half> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
170
- ; FP16-NEXT: [[TMP3:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000 , <4 x half> [[TMP2]])
177
+ ; FP16-NEXT: [[TMP3:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000 , <4 x half> [[TMP2]])
171
178
; FP16-NEXT: [[OP_RDX:%.*]] = fadd fast half [[TMP1]], [[TMP3]]
172
179
; FP16-NEXT: [[OP_RDX1:%.*]] = fadd fast half [[OP_RDX]], [[ELT4]]
173
180
; FP16-NEXT: [[OP_RDX2:%.*]] = fadd fast half [[ELT5]], [[ELT6]]
@@ -178,6 +185,7 @@ define half @reduce_fast_half16(<16 x half> %vec16) {
178
185
; FP16-NEXT: [[OP_RDX7:%.*]] = fadd fast half [[OP_RDX5]], [[OP_RDX6]]
179
186
; FP16-NEXT: [[OP_RDX8:%.*]] = fadd fast half [[OP_RDX7]], [[ELT15]]
180
187
; FP16-NEXT: ret half [[OP_RDX8]]
188
+ ;
181
189
entry:
182
190
%elt0 = extractelement <16 x half > %vec16 , i64 0
183
191
%elt1 = extractelement <16 x half > %vec16 , i64 1
@@ -249,6 +257,7 @@ define half @reduce_half16(<16 x half> %vec16) {
249
257
; CHECK-NEXT: [[ADD14:%.*]] = fadd half [[ELT14]], [[ADD13]]
250
258
; CHECK-NEXT: [[ADD15:%.*]] = fadd half [[ELT15]], [[ADD14]]
251
259
; CHECK-NEXT: ret half [[ADD15]]
260
+ ;
252
261
entry:
253
262
%elt0 = extractelement <16 x half > %vec16 , i64 0
254
263
%elt1 = extractelement <16 x half > %vec16 , i64 1
@@ -292,6 +301,7 @@ define float @reduce_fast_float2(<2 x float> %vec2) {
292
301
; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1
293
302
; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[ELT1]], [[ELT0]]
294
303
; CHECK-NEXT: ret float [[ADD1]]
304
+ ;
295
305
entry:
296
306
%elt0 = extractelement <2 x float > %vec2 , i64 0
297
307
%elt1 = extractelement <2 x float > %vec2 , i64 1
@@ -307,6 +317,7 @@ define float @reduce_float2(<2 x float> %vec2) {
307
317
; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1
308
318
; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]]
309
319
; CHECK-NEXT: ret float [[ADD1]]
320
+ ;
310
321
entry:
311
322
%elt0 = extractelement <2 x float > %vec2 , i64 0
312
323
%elt1 = extractelement <2 x float > %vec2 , i64 1
@@ -318,8 +329,9 @@ define float @reduce_fast_float4(<4 x float> %vec4) {
318
329
; CHECK-LABEL: define float @reduce_fast_float4(
319
330
; CHECK-SAME: <4 x float> [[VEC4:%.*]]) #[[ATTR0]] {
320
331
; CHECK-NEXT: [[ENTRY:.*:]]
321
- ; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float - 0.000000e+00, <4 x float> [[VEC4]])
332
+ ; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[VEC4]])
322
333
; CHECK-NEXT: ret float [[TMP0]]
334
+ ;
323
335
entry:
324
336
%elt0 = extractelement <4 x float > %vec4 , i64 0
325
337
%elt1 = extractelement <4 x float > %vec4 , i64 1
@@ -343,6 +355,7 @@ define float @reduce_float4(<4 x float> %vec4) {
343
355
; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ELT2]], [[ADD1]]
344
356
; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ELT3]], [[ADD2]]
345
357
; CHECK-NEXT: ret float [[ADD3]]
358
+ ;
346
359
entry:
347
360
%elt0 = extractelement <4 x float > %vec4 , i64 0
348
361
%elt1 = extractelement <4 x float > %vec4 , i64 1
@@ -358,8 +371,9 @@ define float @reduce_fast_float8(<8 x float> %vec8) {
358
371
; CHECK-LABEL: define float @reduce_fast_float8(
359
372
; CHECK-SAME: <8 x float> [[VEC8:%.*]]) #[[ATTR0]] {
360
373
; CHECK-NEXT: [[ENTRY:.*:]]
361
- ; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float - 0.000000e+00, <8 x float> [[VEC8]])
374
+ ; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[VEC8]])
362
375
; CHECK-NEXT: ret float [[TMP0]]
376
+ ;
363
377
entry:
364
378
%elt0 = extractelement <8 x float > %vec8 , i64 0
365
379
%elt1 = extractelement <8 x float > %vec8 , i64 1
@@ -399,6 +413,7 @@ define float @reduce_float8(<8 x float> %vec8) {
399
413
; CHECK-NEXT: [[ADD6:%.*]] = fadd float [[ELT6]], [[ADD5]]
400
414
; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ELT7]], [[ADD6]]
401
415
; CHECK-NEXT: ret float [[ADD7]]
416
+ ;
402
417
entry:
403
418
%elt0 = extractelement <8 x float > %vec8 , i64 0
404
419
%elt1 = extractelement <8 x float > %vec8 , i64 1
@@ -426,6 +441,7 @@ define double @reduce_fast_double2(<2 x double> %vec2) {
426
441
; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1
427
442
; CHECK-NEXT: [[ADD1:%.*]] = fadd fast double [[ELT1]], [[ELT0]]
428
443
; CHECK-NEXT: ret double [[ADD1]]
444
+ ;
429
445
entry:
430
446
%elt0 = extractelement <2 x double > %vec2 , i64 0
431
447
%elt1 = extractelement <2 x double > %vec2 , i64 1
@@ -441,6 +457,7 @@ define double @reduce_double2(<2 x double> %vec2) {
441
457
; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1
442
458
; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[ELT1]], [[ELT0]]
443
459
; CHECK-NEXT: ret double [[ADD1]]
460
+ ;
444
461
entry:
445
462
%elt0 = extractelement <2 x double > %vec2 , i64 0
446
463
%elt1 = extractelement <2 x double > %vec2 , i64 1
@@ -452,8 +469,9 @@ define double @reduce_fast_double4(<4 x double> %vec4) {
452
469
; CHECK-LABEL: define double @reduce_fast_double4(
453
470
; CHECK-SAME: <4 x double> [[VEC4:%.*]]) #[[ATTR0]] {
454
471
; CHECK-NEXT: [[ENTRY:.*:]]
455
- ; CHECK-NEXT: [[TMP0:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double - 0.000000e+00, <4 x double> [[VEC4]])
472
+ ; CHECK-NEXT: [[TMP0:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[VEC4]])
456
473
; CHECK-NEXT: ret double [[TMP0]]
474
+ ;
457
475
entry:
458
476
%elt0 = extractelement <4 x double > %vec4 , i64 0
459
477
%elt1 = extractelement <4 x double > %vec4 , i64 1
@@ -477,6 +495,7 @@ define double @reduce_double4(<4 x double> %vec4) {
477
495
; CHECK-NEXT: [[ADD2:%.*]] = fadd double [[ELT2]], [[ADD1]]
478
496
; CHECK-NEXT: [[ADD3:%.*]] = fadd double [[ELT3]], [[ADD2]]
479
497
; CHECK-NEXT: ret double [[ADD3]]
498
+ ;
480
499
entry:
481
500
%elt0 = extractelement <4 x double > %vec4 , i64 0
482
501
%elt1 = extractelement <4 x double > %vec4 , i64 1
@@ -507,6 +526,7 @@ define float @reduce_fast_float_case1(ptr %a) {
507
526
; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
508
527
; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[LOAD4]], [[ADD3]]
509
528
; CHECK-NEXT: ret float [[ADD4]]
529
+ ;
510
530
entry:
511
531
%load = load float , ptr %a
512
532
%gep = getelementptr inbounds i8 , ptr %a , i64 4
@@ -543,6 +563,7 @@ define float @reduce_float_case1(ptr %a) {
543
563
; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
544
564
; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[LOAD4]], [[ADD3]]
545
565
; CHECK-NEXT: ret float [[ADD4]]
566
+ ;
546
567
entry:
547
568
%load = load float , ptr %a
548
569
%gep = getelementptr inbounds i8 , ptr %a , i64 4
@@ -584,6 +605,7 @@ define float @reduce_fast_float_case2(ptr %a, ptr %b) {
584
605
; CHECK-NEXT: [[RED2:%.*]] = fadd fast float [[ADD2]], [[RED1]]
585
606
; CHECK-NEXT: [[RED3:%.*]] = fadd fast float [[ADD3]], [[RED2]]
586
607
; CHECK-NEXT: ret float [[RED3]]
608
+ ;
587
609
entry:
588
610
%gepa1 = getelementptr inbounds float , ptr %a , i32 1
589
611
%gepa2 = getelementptr inbounds float , ptr %a , i32 2
@@ -633,6 +655,7 @@ define float @reduce_float_case2(ptr %a, ptr %b) {
633
655
; CHECK-NEXT: [[RED2:%.*]] = fadd float [[ADD2]], [[RED1]]
634
656
; CHECK-NEXT: [[RED3:%.*]] = fadd float [[ADD3]], [[RED2]]
635
657
; CHECK-NEXT: ret float [[RED3]]
658
+ ;
636
659
entry:
637
660
%gepa1 = getelementptr inbounds float , ptr %a , i32 1
638
661
%gepa2 = getelementptr inbounds float , ptr %a , i32 2
@@ -694,6 +717,7 @@ define float @reduce_fast_float_case3(ptr %a) {
694
717
; CHECK-NEXT: [[ADD6:%.*]] = fadd fast float [[ADD5]], [[LOG6]]
695
718
; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD6]], [[LOG7]]
696
719
; CHECK-NEXT: ret float [[ADD7]]
720
+ ;
697
721
entry:
698
722
%gep1 = getelementptr inbounds float , ptr %a , i32 1
699
723
%gep2 = getelementptr inbounds float , ptr %a , i32 2
@@ -764,6 +788,7 @@ define float @reduce_float_case3(ptr %a) {
764
788
; CHECK-NEXT: [[ADD6:%.*]] = fadd float [[ADD5]], [[LOG6]]
765
789
; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ADD6]], [[LOG7]]
766
790
; CHECK-NEXT: ret float [[ADD7]]
791
+ ;
767
792
entry:
768
793
%gep1 = getelementptr inbounds float , ptr %a , i32 1
769
794
%gep2 = getelementptr inbounds float , ptr %a , i32 2
@@ -802,8 +827,9 @@ define half @reduce_unordered_fast_half4(<4 x half> %vec4) {
802
827
; CHECK-LABEL: define half @reduce_unordered_fast_half4(
803
828
; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] {
804
829
; CHECK-NEXT: [[ENTRY:.*:]]
805
- ; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000 , <4 x half> [[VEC4]])
830
+ ; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000 , <4 x half> [[VEC4]])
806
831
; CHECK-NEXT: ret half [[TMP0]]
832
+ ;
807
833
entry:
808
834
%elt0 = extractelement <4 x half > %vec4 , i64 0
809
835
%elt1 = extractelement <4 x half > %vec4 , i64 1
@@ -826,6 +852,7 @@ define half @reduce_unordered_half4(<4 x half> %vec4) {
826
852
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x half> [[TMP2]], i32 1
827
853
; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[TMP3]], [[TMP4]]
828
854
; CHECK-NEXT: ret half [[ADD3]]
855
+ ;
829
856
entry:
830
857
%elt0 = extractelement <4 x half > %vec4 , i64 0
831
858
%elt1 = extractelement <4 x half > %vec4 , i64 1
0 commit comments