@@ -801,6 +801,103 @@ entry:
801
801
%vecins.3 = insertelement <4 x float > %vecins.2 , float %4 , i32 3
802
802
ret <4 x float > %vecins.3
803
803
}
804
+ declare float @atan2f (float ,float ) readonly nounwind willreturn
805
+ define <4 x float > @atan2_4x (ptr %a , ptr %b ) {
806
+ ; CHECK-LABEL: @atan2_4x(
807
+ ; CHECK-NEXT: entry:
808
+ ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
809
+ ; CHECK-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
810
+ ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatan2f(<4 x float> [[TMP0]], <4 x float> [[BB]])
811
+ ; CHECK-NEXT: ret <4 x float> [[TMP1]]
812
+ ;
813
+ ; NOACCELERATE-LABEL: @atan2_4x(
814
+ ; NOACCELERATE-NEXT: entry:
815
+ ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
816
+ ; NOACCELERATE-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
817
+ ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
818
+ ; NOACCELERATE-NEXT: [[VECEXTB:%.*]] = extractelement <4 x float> [[BB]], i32 0
819
+ ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @atan2f(float [[VECEXT]], float [[VECEXTB]])
820
+ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
821
+ ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
822
+ ; NOACCELERATE-NEXT: [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1
823
+ ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @atan2f(float [[VECEXT_1]], float [[VECEXTB_1]])
824
+ ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
825
+ ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
826
+ ; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[BB]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
827
+ ; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.atan2.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
828
+ ; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
829
+ ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
830
+ ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
831
+ ;
832
+ entry:
833
+ %0 = load <4 x float >, ptr %a , align 16
834
+ %bb = load <4 x float >, ptr %b , align 16
835
+ %vecext = extractelement <4 x float > %0 , i32 0
836
+ %vecextb = extractelement <4 x float > %bb , i32 0
837
+ %1 = tail call fast float @atan2f (float %vecext , float %vecextb )
838
+ %vecins = insertelement <4 x float > poison, float %1 , i32 0
839
+ %vecext.1 = extractelement <4 x float > %0 , i32 1
840
+ %vecextb.1 = extractelement <4 x float > %bb , i32 1
841
+ %2 = tail call fast float @atan2f (float %vecext.1 , float %vecextb.1 )
842
+ %vecins.1 = insertelement <4 x float > %vecins , float %2 , i32 1
843
+ %vecext.2 = extractelement <4 x float > %0 , i32 2
844
+ %vecextb.2 = extractelement <4 x float > %bb , i32 2
845
+ %3 = tail call fast float @atan2f (float %vecext.2 , float %vecextb.2 )
846
+ %vecins.2 = insertelement <4 x float > %vecins.1 , float %3 , i32 2
847
+ %vecext.3 = extractelement <4 x float > %0 , i32 3
848
+ %vecextb.3 = extractelement <4 x float > %bb , i32 3
849
+ %4 = tail call fast float @atan2f (float %vecext.3 , float %vecextb.3 )
850
+ %vecins.3 = insertelement <4 x float > %vecins.2 , float %4 , i32 3
851
+ ret <4 x float > %vecins.3
852
+ }
853
+ define <4 x float > @int_atan2_4x (ptr %a , ptr %b ) {
854
+ ; CHECK-LABEL: @int_atan2_4x(
855
+ ; CHECK-NEXT: entry:
856
+ ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
857
+ ; CHECK-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
858
+ ; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatan2f(<4 x float> [[TMP0]], <4 x float> [[BB]])
859
+ ; CHECK-NEXT: ret <4 x float> [[TMP1]]
860
+ ;
861
+ ; NOACCELERATE-LABEL: @int_atan2_4x(
862
+ ; NOACCELERATE-NEXT: entry:
863
+ ; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
864
+ ; NOACCELERATE-NEXT: [[BB:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16
865
+ ; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
866
+ ; NOACCELERATE-NEXT: [[VECEXTB:%.*]] = extractelement <4 x float> [[BB]], i32 0
867
+ ; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT]], float [[VECEXTB]])
868
+ ; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
869
+ ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
870
+ ; NOACCELERATE-NEXT: [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1
871
+ ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT_1]], float [[VECEXTB_1]])
872
+ ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
873
+ ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
874
+ ; NOACCELERATE-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[BB]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
875
+ ; NOACCELERATE-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.atan2.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
876
+ ; NOACCELERATE-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
877
+ ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
878
+ ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
879
+ ;
880
+ entry:
881
+ %0 = load <4 x float >, ptr %a , align 16
882
+ %bb = load <4 x float >, ptr %b , align 16
883
+ %vecext = extractelement <4 x float > %0 , i32 0
884
+ %vecextb = extractelement <4 x float > %bb , i32 0
885
+ %1 = tail call fast float @llvm.atan2.f32 (float %vecext , float %vecextb )
886
+ %vecins = insertelement <4 x float > poison, float %1 , i32 0
887
+ %vecext.1 = extractelement <4 x float > %0 , i32 1
888
+ %vecextb.1 = extractelement <4 x float > %bb , i32 1
889
+ %2 = tail call fast float @llvm.atan2.f32 (float %vecext.1 , float %vecextb.1 )
890
+ %vecins.1 = insertelement <4 x float > %vecins , float %2 , i32 1
891
+ %vecext.2 = extractelement <4 x float > %0 , i32 2
892
+ %vecextb.2 = extractelement <4 x float > %bb , i32 2
893
+ %3 = tail call fast float @llvm.atan2.f32 (float %vecext.2 , float %vecextb.2 )
894
+ %vecins.2 = insertelement <4 x float > %vecins.1 , float %3 , i32 2
895
+ %vecext.3 = extractelement <4 x float > %0 , i32 3
896
+ %vecextb.3 = extractelement <4 x float > %bb , i32 3
897
+ %4 = tail call fast float @llvm.atan2.f32 (float %vecext.3 , float %vecextb.3 )
898
+ %vecins.3 = insertelement <4 x float > %vecins.2 , float %4 , i32 3
899
+ ret <4 x float > %vecins.3
900
+ }
804
901
declare float @sinhf (float ) readonly nounwind willreturn
805
902
define <4 x float > @sinh_4x (ptr %a ) {
806
903
; CHECK-LABEL: @sinh_4x(
0 commit comments