Skip to content

Commit 4d412be

Browse files
committed
[LoopVectorize][X86] amdlibm-calls.ll - add missing sinh and f64 test coverage to all functions
Shows failure to vectorise acos/asin/atan and cosh/sinh/tanh libcalls if they don't have a corresponding veclib mapping
1 parent 0f7400c commit 4d412be

File tree

1 file changed

+230
-0
lines changed

1 file changed

+230
-0
lines changed

llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ declare float @tanf(float) #0
2121
declare double @llvm.tan.f64(double) #0
2222
declare float @llvm.tan.f32(float) #0
2323

24+
declare double @acos(double) #0
2425
declare float @acosf(float) #0
26+
declare double @llvm.acos.f64(double) #0
2527
declare float @llvm.acos.f32(float) #0
2628

2729
declare double @asin(double) #0
@@ -34,12 +36,19 @@ declare float @atanf(float) #0
3436
declare double @llvm.atan.f64(double) #0
3537
declare float @llvm.atan.f32(float) #0
3638

39+
declare double @sinh(double) #0
40+
declare float @sinhf(float) #0
41+
declare double @llvm.sinh.f64(double) #0
42+
declare float @llvm.sinh.f32(float) #0
43+
3744
declare double @cosh(double) #0
3845
declare float @coshf(float) #0
3946
declare double @llvm.cosh.f64(double) #0
4047
declare float @llvm.cosh.f32(float) #0
4148

49+
declare double @tanh(double) #0
4250
declare float @tanhf(float) #0
51+
declare double @llvm.tanh.f64(double) #0
4352
declare float @llvm.tanh.f32(float) #0
4453

4554
declare double @pow(double, double) #0
@@ -387,8 +396,35 @@ for.end:
387396
ret void
388397
}
389398

399+
define void @acos_f64(ptr nocapture %varray) {
400+
; CHECK-LABEL: @acos_f64(
401+
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.acos.v2f64(<2 x double> [[TMP4:%.*]])
402+
; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.acos.v4f64(<4 x double> [[TMP4:%.*]])
403+
; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.acos.v8f64(<8 x double> [[TMP4:%.*]])
404+
; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.acos.v16f64(<16 x double> [[TMP4:%.*]])
405+
; CHECK: ret void
406+
;
407+
entry:
408+
br label %for.body
409+
410+
for.body:
411+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
412+
%tmp = trunc i64 %iv to i32
413+
%conv = sitofp i32 %tmp to double
414+
%call = tail call double @acos(double %conv)
415+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
416+
store double %call, ptr %arrayidx, align 4
417+
%iv.next = add nuw nsw i64 %iv, 1
418+
%exitcond = icmp eq i64 %iv.next, 1000
419+
br i1 %exitcond, label %for.end, label %for.body
420+
421+
for.end:
422+
ret void
423+
}
424+
390425
define void @acos_f32(ptr nocapture %varray) {
391426
; CHECK-LABEL: @acos_f32(
427+
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
392428
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
393429
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]])
394430
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
@@ -412,6 +448,32 @@ for.end:
412448
ret void
413449
}
414450

451+
define void @acos_f64_intrinsic(ptr nocapture %varray) {
452+
; CHECK-LABEL: @acos_f64_intrinsic(
453+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.acos.v2f64(<2 x double> [[TMP4:%.*]])
454+
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.acos.v4f64(<4 x double> [[TMP4:%.*]])
455+
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.acos.v8f64(<8 x double> [[TMP4:%.*]])
456+
; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.acos.v16f64(<16 x double> [[TMP4:%.*]])
457+
; CHECK: ret void
458+
;
459+
entry:
460+
br label %for.body
461+
462+
for.body:
463+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
464+
%tmp = trunc i64 %iv to i32
465+
%conv = sitofp i32 %tmp to double
466+
%call = tail call double @llvm.acos.f64(double %conv)
467+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
468+
store double %call, ptr %arrayidx, align 4
469+
%iv.next = add nuw nsw i64 %iv, 1
470+
%exitcond = icmp eq i64 %iv.next, 1000
471+
br i1 %exitcond, label %for.end, label %for.body
472+
473+
for.end:
474+
ret void
475+
}
476+
415477
define void @acos_f32_intrinsic(ptr nocapture %varray) {
416478
; CHECK-LABEL: @acos_f32_intrinsic(
417479
; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
@@ -440,7 +502,10 @@ for.end:
440502

441503
define void @asin_f64(ptr nocapture %varray) {
442504
; CHECK-LABEL: @asin_f64(
505+
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
506+
; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
443507
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
508+
; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
444509
; CHECK: ret void
445510
;
446511
entry:
@@ -463,6 +528,7 @@ for.end:
463528

464529
define void @asin_f32(ptr nocapture %varray) {
465530
; CHECK-LABEL: @asin_f32(
531+
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]])
466532
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
467533
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]])
468534
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
@@ -543,6 +609,7 @@ define void @atan_f64(ptr nocapture %varray) {
543609
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]])
544610
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
545611
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
612+
; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]])
546613
; CHECK: ret void
547614
;
548615
entry:
@@ -565,6 +632,7 @@ for.end:
565632

566633
define void @atan_f32(ptr nocapture %varray) {
567634
; CHECK-LABEL: @atan_f32(
635+
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]])
568636
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
569637
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]])
570638
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
@@ -640,9 +708,116 @@ for.end:
640708
ret void
641709
}
642710

711+
define void @sinh_f64(ptr nocapture %varray) {
712+
; CHECK-LABEL: @sinh_f64(
713+
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.sinh.v2f64(<2 x double> [[TMP4:%.*]])
714+
; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.sinh.v4f64(<4 x double> [[TMP4:%.*]])
715+
; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.sinh.v8f64(<8 x double> [[TMP4:%.*]])
716+
; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.sinh.v16f64(<16 x double> [[TMP4:%.*]])
717+
; CHECK: ret void
718+
;
719+
entry:
720+
br label %for.body
721+
722+
for.body:
723+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
724+
%tmp = trunc i64 %iv to i32
725+
%conv = sitofp i32 %tmp to double
726+
%call = tail call double @sinh(double %conv)
727+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
728+
store double %call, ptr %arrayidx, align 4
729+
%iv.next = add nuw nsw i64 %iv, 1
730+
%exitcond = icmp eq i64 %iv.next, 1000
731+
br i1 %exitcond, label %for.end, label %for.body
732+
733+
for.end:
734+
ret void
735+
}
736+
737+
define void @sinh_f32(ptr nocapture %varray) {
738+
; CHECK-LABEL: @sinh_f32(
739+
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4:%.*]])
740+
; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x float> @llvm.sinh.v4f32(<4 x float> [[TMP4:%.*]])
741+
; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x float> @llvm.sinh.v8f32(<8 x float> [[TMP4:%.*]])
742+
; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x float> @llvm.sinh.v16f32(<16 x float> [[TMP4:%.*]])
743+
; CHECK: ret void
744+
;
745+
entry:
746+
br label %for.body
747+
748+
for.body:
749+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
750+
%tmp = trunc i64 %iv to i32
751+
%conv = sitofp i32 %tmp to float
752+
%call = tail call float @sinhf(float %conv)
753+
%arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
754+
store float %call, ptr %arrayidx, align 4
755+
%iv.next = add nuw nsw i64 %iv, 1
756+
%exitcond = icmp eq i64 %iv.next, 1000
757+
br i1 %exitcond, label %for.end, label %for.body
758+
759+
for.end:
760+
ret void
761+
}
762+
763+
define void @sinh_f64_intrinsic(ptr nocapture %varray) {
764+
; CHECK-LABEL: @sinh_f64_intrinsic(
765+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.sinh.v2f64(<2 x double> [[TMP4:%.*]])
766+
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.sinh.v4f64(<4 x double> [[TMP4:%.*]])
767+
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.sinh.v8f64(<8 x double> [[TMP4:%.*]])
768+
; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.sinh.v16f64(<16 x double> [[TMP4:%.*]])
769+
; CHECK: ret void
770+
;
771+
entry:
772+
br label %for.body
773+
774+
for.body:
775+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
776+
%tmp = trunc i64 %iv to i32
777+
%conv = sitofp i32 %tmp to double
778+
%call = tail call double @llvm.sinh.f64(double %conv)
779+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
780+
store double %call, ptr %arrayidx, align 4
781+
%iv.next = add nuw nsw i64 %iv, 1
782+
%exitcond = icmp eq i64 %iv.next, 1000
783+
br i1 %exitcond, label %for.end, label %for.body
784+
785+
for.end:
786+
ret void
787+
}
788+
789+
define void @sinh_f32_intrinsic(ptr nocapture %varray) {
790+
; CHECK-LABEL: @sinh_f32_intrinsic(
791+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4:%.*]])
792+
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @llvm.sinh.v4f32(<4 x float> [[TMP4:%.*]])
793+
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @llvm.sinh.v8f32(<8 x float> [[TMP4:%.*]])
794+
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @llvm.sinh.v16f32(<16 x float> [[TMP4:%.*]])
795+
; CHECK: ret void
796+
;
797+
entry:
798+
br label %for.body
799+
800+
for.body:
801+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
802+
%tmp = trunc i64 %iv to i32
803+
%conv = sitofp i32 %tmp to float
804+
%call = tail call float @llvm.sinh.f32(float %conv)
805+
%arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
806+
store float %call, ptr %arrayidx, align 4
807+
%iv.next = add nuw nsw i64 %iv, 1
808+
%exitcond = icmp eq i64 %iv.next, 1000
809+
br i1 %exitcond, label %for.end, label %for.body
810+
811+
for.end:
812+
ret void
813+
}
814+
643815
define void @cosh_f64(ptr nocapture %varray) {
644816
; CHECK-LABEL: @cosh_f64(
645817
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
818+
; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]])
819+
; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]])
820+
; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]])
646821
; CHECK: ret void
647822
;
648823
entry:
@@ -665,8 +840,10 @@ for.end:
665840

666841
define void @cosh_f32(ptr nocapture %varray) {
667842
; CHECK-LABEL: @cosh_f32(
843+
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]])
668844
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
669845
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]])
846+
; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]])
670847
; CHECK: ret void
671848
;
672849
entry:
@@ -739,8 +916,35 @@ for.end:
739916
ret void
740917
}
741918

919+
define void @tanh_f64(ptr nocapture %varray) {
920+
; CHECK-LABEL: @tanh_f64(
921+
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.tanh.v2f64(<2 x double> [[TMP4:%.*]])
922+
; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.tanh.v4f64(<4 x double> [[TMP4:%.*]])
923+
; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.tanh.v8f64(<8 x double> [[TMP4:%.*]])
924+
; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.tanh.v16f64(<16 x double> [[TMP4:%.*]])
925+
; CHECK: ret void
926+
;
927+
entry:
928+
br label %for.body
929+
930+
for.body:
931+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
932+
%tmp = trunc i64 %iv to i32
933+
%conv = sitofp i32 %tmp to double
934+
%call = tail call double @tanh(double %conv)
935+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
936+
store double %call, ptr %arrayidx, align 4
937+
%iv.next = add nuw nsw i64 %iv, 1
938+
%exitcond = icmp eq i64 %iv.next, 1000
939+
br i1 %exitcond, label %for.end, label %for.body
940+
941+
for.end:
942+
ret void
943+
}
944+
742945
define void @tanh_f32(ptr nocapture %varray) {
743946
; CHECK-LABEL: @tanh_f32(
947+
; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
744948
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
745949
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]])
746950
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
@@ -764,6 +968,32 @@ for.end:
764968
ret void
765969
}
766970

971+
define void @tanh_f64_intrinsic(ptr nocapture %varray) {
972+
; CHECK-LABEL: @tanh_f64_intrinsic(
973+
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.tanh.v2f64(<2 x double> [[TMP4:%.*]])
974+
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.tanh.v4f64(<4 x double> [[TMP4:%.*]])
975+
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.tanh.v8f64(<8 x double> [[TMP4:%.*]])
976+
; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.tanh.v16f64(<16 x double> [[TMP4:%.*]])
977+
; CHECK: ret void
978+
;
979+
entry:
980+
br label %for.body
981+
982+
for.body:
983+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
984+
%tmp = trunc i64 %iv to i32
985+
%conv = sitofp i32 %tmp to double
986+
%call = tail call double @llvm.tanh.f64(double %conv)
987+
%arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
988+
store double %call, ptr %arrayidx, align 4
989+
%iv.next = add nuw nsw i64 %iv, 1
990+
%exitcond = icmp eq i64 %iv.next, 1000
991+
br i1 %exitcond, label %for.end, label %for.body
992+
993+
for.end:
994+
ret void
995+
}
996+
767997
define void @tanh_f32_intrinsic(ptr nocapture %varray) {
768998
; CHECK-LABEL: @tanh_f32_intrinsic(
769999
; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])

0 commit comments

Comments
 (0)