@@ -801,6 +801,106 @@ entry:
801
801
%vecins.3 = insertelement <4 x float > %vecins.2 , float %4 , i32 3
802
802
ret <4 x float > %vecins.3
803
803
}
804
+ declare float @atan2f (float ,float ) readonly nounwind willreturn
805
+ define <4 x float > @atan2_4x (ptr %a , ptr %b ) {
806
+ ; CHECK-LABEL: @atan2_4x(
807
+ ; CHECK-NEXT: entry:
808
+ ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
809
+ ; CHECK-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
810
+ ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatan2f(<4 x float> [[TMP0]], <4 x float> [[BB]])
811
+ ; CHECK-NEXT: ret <4 x float> [[TMP1]]
812
+ ;
813
+ ; NOACCELERATE-LABEL: @atan2_4x(
814
+ ; NOACCELERATE-NEXT: entry:
815
+ ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
816
+ ; NOACCELERATE-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
817
+ ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
818
+ ; NOACCELERATE-NEXT: [[VECEXTB:%.*]] = extractelement <4 x float> [[BB]], i32 0
819
+ ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @atan2f(float [[VECEXT]], float [[VECEXTB]])
820
+ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
821
+ ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
822
+ ; NOACCELERATE-NEXT: [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1
823
+ ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @atan2f(float [[VECEXT_1]], float [[VECEXTB_1]])
824
+ ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
825
+ ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
826
+ ; NOACCELERATE-NEXT: [[VECEXTB_2:%.*]] = extractelement <4 x float> [[BB]], i32 2
827
+ ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @atan2f(float [[VECEXT_2]], float [[VECEXTB_2]])
828
+ ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
829
+ ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
830
+ ; NOACCELERATE-NEXT: [[VECEXTB_3:%.*]] = extractelement <4 x float> [[BB]], i32 3
831
+ ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @atan2f(float [[VECEXT_3]], float [[VECEXTB_3]])
832
+ ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
833
+ ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
834
+ ;
835
+ entry:
836
+ %0 = load <4 x float >, ptr %a , align 16
837
+ %bb = load <4 x float >, ptr %b , align 16
838
+ %vecext = extractelement <4 x float > %0 , i32 0
839
+ %vecextb = extractelement <4 x float > %bb , i32 0
840
+ %1 = tail call fast float @atan2f (float %vecext , float %vecextb )
841
+ %vecins = insertelement <4 x float > poison, float %1 , i32 0
842
+ %vecext.1 = extractelement <4 x float > %0 , i32 1
843
+ %vecextb.1 = extractelement <4 x float > %bb , i32 1
844
+ %2 = tail call fast float @atan2f (float %vecext.1 , float %vecextb.1 )
845
+ %vecins.1 = insertelement <4 x float > %vecins , float %2 , i32 1
846
+ %vecext.2 = extractelement <4 x float > %0 , i32 2
847
+ %vecextb.2 = extractelement <4 x float > %bb , i32 2
848
+ %3 = tail call fast float @atan2f (float %vecext.2 , float %vecextb.2 )
849
+ %vecins.2 = insertelement <4 x float > %vecins.1 , float %3 , i32 2
850
+ %vecext.3 = extractelement <4 x float > %0 , i32 3
851
+ %vecextb.3 = extractelement <4 x float > %bb , i32 3
852
+ %4 = tail call fast float @atan2f (float %vecext.3 , float %vecextb.3 )
853
+ %vecins.3 = insertelement <4 x float > %vecins.2 , float %4 , i32 3
854
+ ret <4 x float > %vecins.3
855
+ }
856
+ define <4 x float > @int_atan2_4x (ptr %a , ptr %b ) {
857
+ ; CHECK-LABEL: @int_atan2_4x(
858
+ ; CHECK-NEXT: entry:
859
+ ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
860
+ ; CHECK-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
861
+ ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatan2f(<4 x float> [[TMP0]], <4 x float> [[BB]])
862
+ ; CHECK-NEXT: ret <4 x float> [[TMP1]]
863
+ ;
864
+ ; NOACCELERATE-LABEL: @int_atan2_4x(
865
+ ; NOACCELERATE-NEXT: entry:
866
+ ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
867
+ ; NOACCELERATE-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
868
+ ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
869
+ ; NOACCELERATE-NEXT: [[VECEXTB:%.*]] = extractelement <4 x float> [[BB]], i32 0
870
+ ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT]], float [[VECEXTB]])
871
+ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
872
+ ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
873
+ ; NOACCELERATE-NEXT: [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1
874
+ ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT_1]], float [[VECEXTB_1]])
875
+ ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
876
+ ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
877
+ ; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[BB]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
878
+ ; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.atan2.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
879
+ ; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
880
+ ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
881
+ ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
882
+ ;
883
+ entry:
884
+ %0 = load <4 x float >, ptr %a , align 16
885
+ %bb = load <4 x float >, ptr %b , align 16
886
+ %vecext = extractelement <4 x float > %0 , i32 0
887
+ %vecextb = extractelement <4 x float > %bb , i32 0
888
+ %1 = tail call fast float @llvm.atan2.f32 (float %vecext , float %vecextb )
889
+ %vecins = insertelement <4 x float > poison, float %1 , i32 0
890
+ %vecext.1 = extractelement <4 x float > %0 , i32 1
891
+ %vecextb.1 = extractelement <4 x float > %bb , i32 1
892
+ %2 = tail call fast float @llvm.atan2.f32 (float %vecext.1 , float %vecextb.1 )
893
+ %vecins.1 = insertelement <4 x float > %vecins , float %2 , i32 1
894
+ %vecext.2 = extractelement <4 x float > %0 , i32 2
895
+ %vecextb.2 = extractelement <4 x float > %bb , i32 2
896
+ %3 = tail call fast float @llvm.atan2.f32 (float %vecext.2 , float %vecextb.2 )
897
+ %vecins.2 = insertelement <4 x float > %vecins.1 , float %3 , i32 2
898
+ %vecext.3 = extractelement <4 x float > %0 , i32 3
899
+ %vecextb.3 = extractelement <4 x float > %bb , i32 3
900
+ %4 = tail call fast float @llvm.atan2.f32 (float %vecext.3 , float %vecextb.3 )
901
+ %vecins.3 = insertelement <4 x float > %vecins.2 , float %4 , i32 3
902
+ ret <4 x float > %vecins.3
903
+ }
804
904
declare float @sinhf (float ) readonly nounwind willreturn
805
905
define <4 x float > @sinh_4x (ptr %a ) {
806
906
; CHECK-LABEL: @sinh_4x(
0 commit comments