@@ -21,7 +21,9 @@ declare float @tanf(float) #0
21
21
declare double @llvm.tan.f64 (double ) #0
22
22
declare float @llvm.tan.f32 (float ) #0
23
23
24
+ declare double @acos (double ) #0
24
25
declare float @acosf (float ) #0
26
+ declare double @llvm.acos.f64 (double ) #0
25
27
declare float @llvm.acos.f32 (float ) #0
26
28
27
29
declare double @asin (double ) #0
@@ -34,12 +36,19 @@ declare float @atanf(float) #0
34
36
declare double @llvm.atan.f64 (double ) #0
35
37
declare float @llvm.atan.f32 (float ) #0
36
38
39
+ declare double @sinh (double ) #0
40
+ declare float @sinhf (float ) #0
41
+ declare double @llvm.sinh.f64 (double ) #0
42
+ declare float @llvm.sinh.f32 (float ) #0
43
+
37
44
declare double @cosh (double ) #0
38
45
declare float @coshf (float ) #0
39
46
declare double @llvm.cosh.f64 (double ) #0
40
47
declare float @llvm.cosh.f32 (float ) #0
41
48
49
+ declare double @tanh (double ) #0
42
50
declare float @tanhf (float ) #0
51
+ declare double @llvm.tanh.f64 (double ) #0
43
52
declare float @llvm.tanh.f32 (float ) #0
44
53
45
54
declare double @pow (double , double ) #0
@@ -387,8 +396,35 @@ for.end:
387
396
ret void
388
397
}
389
398
399
+ define void @acos_f64 (ptr nocapture %varray ) {
400
+ ; CHECK-LABEL: @acos_f64(
401
+ ; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.acos.v2f64(<2 x double> [[TMP4:%.*]])
402
+ ; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.acos.v4f64(<4 x double> [[TMP4:%.*]])
403
+ ; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.acos.v8f64(<8 x double> [[TMP4:%.*]])
404
+ ; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.acos.v16f64(<16 x double> [[TMP4:%.*]])
405
+ ; CHECK: ret void
406
+ ;
407
+ entry:
408
+ br label %for.body
409
+
410
+ for.body:
411
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
412
+ %tmp = trunc i64 %iv to i32
413
+ %conv = sitofp i32 %tmp to double
414
+ %call = tail call double @acos (double %conv )
415
+ %arrayidx = getelementptr inbounds double , ptr %varray , i64 %iv
416
+ store double %call , ptr %arrayidx , align 4
417
+ %iv.next = add nuw nsw i64 %iv , 1
418
+ %exitcond = icmp eq i64 %iv.next , 1000
419
+ br i1 %exitcond , label %for.end , label %for.body
420
+
421
+ for.end:
422
+ ret void
423
+ }
424
+
390
425
define void @acos_f32 (ptr nocapture %varray ) {
391
426
; CHECK-LABEL: @acos_f32(
427
+ ; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
392
428
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
393
429
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]])
394
430
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
@@ -412,6 +448,32 @@ for.end:
412
448
ret void
413
449
}
414
450
451
+ define void @acos_f64_intrinsic (ptr nocapture %varray ) {
452
+ ; CHECK-LABEL: @acos_f64_intrinsic(
453
+ ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.acos.v2f64(<2 x double> [[TMP4:%.*]])
454
+ ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.acos.v4f64(<4 x double> [[TMP4:%.*]])
455
+ ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.acos.v8f64(<8 x double> [[TMP4:%.*]])
456
+ ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.acos.v16f64(<16 x double> [[TMP4:%.*]])
457
+ ; CHECK: ret void
458
+ ;
459
+ entry:
460
+ br label %for.body
461
+
462
+ for.body:
463
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
464
+ %tmp = trunc i64 %iv to i32
465
+ %conv = sitofp i32 %tmp to double
466
+ %call = tail call double @llvm.acos.f64 (double %conv )
467
+ %arrayidx = getelementptr inbounds double , ptr %varray , i64 %iv
468
+ store double %call , ptr %arrayidx , align 4
469
+ %iv.next = add nuw nsw i64 %iv , 1
470
+ %exitcond = icmp eq i64 %iv.next , 1000
471
+ br i1 %exitcond , label %for.end , label %for.body
472
+
473
+ for.end:
474
+ ret void
475
+ }
476
+
415
477
define void @acos_f32_intrinsic (ptr nocapture %varray ) {
416
478
; CHECK-LABEL: @acos_f32_intrinsic(
417
479
; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
@@ -440,7 +502,10 @@ for.end:
440
502
441
503
define void @asin_f64 (ptr nocapture %varray ) {
442
504
; CHECK-LABEL: @asin_f64(
505
+ ; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
506
+ ; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
443
507
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
508
+ ; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
444
509
; CHECK: ret void
445
510
;
446
511
entry:
@@ -463,6 +528,7 @@ for.end:
463
528
464
529
define void @asin_f32 (ptr nocapture %varray ) {
465
530
; CHECK-LABEL: @asin_f32(
531
+ ; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]])
466
532
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
467
533
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]])
468
534
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
@@ -543,6 +609,7 @@ define void @atan_f64(ptr nocapture %varray) {
543
609
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]])
544
610
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
545
611
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
612
+ ; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]])
546
613
; CHECK: ret void
547
614
;
548
615
entry:
@@ -565,6 +632,7 @@ for.end:
565
632
566
633
define void @atan_f32 (ptr nocapture %varray ) {
567
634
; CHECK-LABEL: @atan_f32(
635
+ ; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]])
568
636
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
569
637
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]])
570
638
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
@@ -640,9 +708,116 @@ for.end:
640
708
ret void
641
709
}
642
710
711
+ define void @sinh_f64 (ptr nocapture %varray ) {
712
+ ; CHECK-LABEL: @sinh_f64(
713
+ ; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.sinh.v2f64(<2 x double> [[TMP4:%.*]])
714
+ ; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.sinh.v4f64(<4 x double> [[TMP4:%.*]])
715
+ ; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.sinh.v8f64(<8 x double> [[TMP4:%.*]])
716
+ ; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.sinh.v16f64(<16 x double> [[TMP4:%.*]])
717
+ ; CHECK: ret void
718
+ ;
719
+ entry:
720
+ br label %for.body
721
+
722
+ for.body:
723
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
724
+ %tmp = trunc i64 %iv to i32
725
+ %conv = sitofp i32 %tmp to double
726
+ %call = tail call double @sinh (double %conv )
727
+ %arrayidx = getelementptr inbounds double , ptr %varray , i64 %iv
728
+ store double %call , ptr %arrayidx , align 4
729
+ %iv.next = add nuw nsw i64 %iv , 1
730
+ %exitcond = icmp eq i64 %iv.next , 1000
731
+ br i1 %exitcond , label %for.end , label %for.body
732
+
733
+ for.end:
734
+ ret void
735
+ }
736
+
737
+ define void @sinh_f32 (ptr nocapture %varray ) {
738
+ ; CHECK-LABEL: @sinh_f32(
739
+ ; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4:%.*]])
740
+ ; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x float> @llvm.sinh.v4f32(<4 x float> [[TMP4:%.*]])
741
+ ; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x float> @llvm.sinh.v8f32(<8 x float> [[TMP4:%.*]])
742
+ ; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x float> @llvm.sinh.v16f32(<16 x float> [[TMP4:%.*]])
743
+ ; CHECK: ret void
744
+ ;
745
+ entry:
746
+ br label %for.body
747
+
748
+ for.body:
749
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
750
+ %tmp = trunc i64 %iv to i32
751
+ %conv = sitofp i32 %tmp to float
752
+ %call = tail call float @sinhf (float %conv )
753
+ %arrayidx = getelementptr inbounds float , ptr %varray , i64 %iv
754
+ store float %call , ptr %arrayidx , align 4
755
+ %iv.next = add nuw nsw i64 %iv , 1
756
+ %exitcond = icmp eq i64 %iv.next , 1000
757
+ br i1 %exitcond , label %for.end , label %for.body
758
+
759
+ for.end:
760
+ ret void
761
+ }
762
+
763
+ define void @sinh_f64_intrinsic (ptr nocapture %varray ) {
764
+ ; CHECK-LABEL: @sinh_f64_intrinsic(
765
+ ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.sinh.v2f64(<2 x double> [[TMP4:%.*]])
766
+ ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.sinh.v4f64(<4 x double> [[TMP4:%.*]])
767
+ ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.sinh.v8f64(<8 x double> [[TMP4:%.*]])
768
+ ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.sinh.v16f64(<16 x double> [[TMP4:%.*]])
769
+ ; CHECK: ret void
770
+ ;
771
+ entry:
772
+ br label %for.body
773
+
774
+ for.body:
775
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
776
+ %tmp = trunc i64 %iv to i32
777
+ %conv = sitofp i32 %tmp to double
778
+ %call = tail call double @llvm.sinh.f64 (double %conv )
779
+ %arrayidx = getelementptr inbounds double , ptr %varray , i64 %iv
780
+ store double %call , ptr %arrayidx , align 4
781
+ %iv.next = add nuw nsw i64 %iv , 1
782
+ %exitcond = icmp eq i64 %iv.next , 1000
783
+ br i1 %exitcond , label %for.end , label %for.body
784
+
785
+ for.end:
786
+ ret void
787
+ }
788
+
789
+ define void @sinh_f32_intrinsic (ptr nocapture %varray ) {
790
+ ; CHECK-LABEL: @sinh_f32_intrinsic(
791
+ ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4:%.*]])
792
+ ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @llvm.sinh.v4f32(<4 x float> [[TMP4:%.*]])
793
+ ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @llvm.sinh.v8f32(<8 x float> [[TMP4:%.*]])
794
+ ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @llvm.sinh.v16f32(<16 x float> [[TMP4:%.*]])
795
+ ; CHECK: ret void
796
+ ;
797
+ entry:
798
+ br label %for.body
799
+
800
+ for.body:
801
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
802
+ %tmp = trunc i64 %iv to i32
803
+ %conv = sitofp i32 %tmp to float
804
+ %call = tail call float @llvm.sinh.f32 (float %conv )
805
+ %arrayidx = getelementptr inbounds float , ptr %varray , i64 %iv
806
+ store float %call , ptr %arrayidx , align 4
807
+ %iv.next = add nuw nsw i64 %iv , 1
808
+ %exitcond = icmp eq i64 %iv.next , 1000
809
+ br i1 %exitcond , label %for.end , label %for.body
810
+
811
+ for.end:
812
+ ret void
813
+ }
814
+
643
815
define void @cosh_f64 (ptr nocapture %varray ) {
644
816
; CHECK-LABEL: @cosh_f64(
645
817
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
818
+ ; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]])
819
+ ; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]])
820
+ ; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]])
646
821
; CHECK: ret void
647
822
;
648
823
entry:
@@ -665,8 +840,10 @@ for.end:
665
840
666
841
define void @cosh_f32 (ptr nocapture %varray ) {
667
842
; CHECK-LABEL: @cosh_f32(
843
+ ; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]])
668
844
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
669
845
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]])
846
+ ; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]])
670
847
; CHECK: ret void
671
848
;
672
849
entry:
@@ -739,8 +916,35 @@ for.end:
739
916
ret void
740
917
}
741
918
919
+ define void @tanh_f64 (ptr nocapture %varray ) {
920
+ ; CHECK-LABEL: @tanh_f64(
921
+ ; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.tanh.v2f64(<2 x double> [[TMP4:%.*]])
922
+ ; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.tanh.v4f64(<4 x double> [[TMP4:%.*]])
923
+ ; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.tanh.v8f64(<8 x double> [[TMP4:%.*]])
924
+ ; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.tanh.v16f64(<16 x double> [[TMP4:%.*]])
925
+ ; CHECK: ret void
926
+ ;
927
+ entry:
928
+ br label %for.body
929
+
930
+ for.body:
931
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
932
+ %tmp = trunc i64 %iv to i32
933
+ %conv = sitofp i32 %tmp to double
934
+ %call = tail call double @tanh (double %conv )
935
+ %arrayidx = getelementptr inbounds double , ptr %varray , i64 %iv
936
+ store double %call , ptr %arrayidx , align 4
937
+ %iv.next = add nuw nsw i64 %iv , 1
938
+ %exitcond = icmp eq i64 %iv.next , 1000
939
+ br i1 %exitcond , label %for.end , label %for.body
940
+
941
+ for.end:
942
+ ret void
943
+ }
944
+
742
945
define void @tanh_f32 (ptr nocapture %varray ) {
743
946
; CHECK-LABEL: @tanh_f32(
947
+ ; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
744
948
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
745
949
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]])
746
950
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
@@ -764,6 +968,32 @@ for.end:
764
968
ret void
765
969
}
766
970
971
+ define void @tanh_f64_intrinsic (ptr nocapture %varray ) {
972
+ ; CHECK-LABEL: @tanh_f64_intrinsic(
973
+ ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.tanh.v2f64(<2 x double> [[TMP4:%.*]])
974
+ ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.tanh.v4f64(<4 x double> [[TMP4:%.*]])
975
+ ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.tanh.v8f64(<8 x double> [[TMP4:%.*]])
976
+ ; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.tanh.v16f64(<16 x double> [[TMP4:%.*]])
977
+ ; CHECK: ret void
978
+ ;
979
+ entry:
980
+ br label %for.body
981
+
982
+ for.body:
983
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
984
+ %tmp = trunc i64 %iv to i32
985
+ %conv = sitofp i32 %tmp to double
986
+ %call = tail call double @llvm.tanh.f64 (double %conv )
987
+ %arrayidx = getelementptr inbounds double , ptr %varray , i64 %iv
988
+ store double %call , ptr %arrayidx , align 4
989
+ %iv.next = add nuw nsw i64 %iv , 1
990
+ %exitcond = icmp eq i64 %iv.next , 1000
991
+ br i1 %exitcond , label %for.end , label %for.body
992
+
993
+ for.end:
994
+ ret void
995
+ }
996
+
767
997
define void @tanh_f32_intrinsic (ptr nocapture %varray ) {
768
998
; CHECK-LABEL: @tanh_f32_intrinsic(
769
999
; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
0 commit comments