Skip to content

Commit f45c1e3

Browse files
committed
[aarch64] atan2 intrinsic lowering (p5)
This change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294 - `VecFuncs.def`: define intrinsic to sleef/armpl mapping - `LegalizerHelper.cpp`: add missing fewerElementsVector handling for the new atan2 intrinsic - `AArch64ISelLowering.cpp`: Add arch64 specializations for lowering like neon instructions - `AArch64LegalizerInfo.cpp`: Legalize atan2. Part 5 for Implement the atan2 HLSL Function llvm#70096.
1 parent 6d8d7a3 commit f45c1e3

12 files changed

+418
-19
lines changed

llvm/include/llvm/Analysis/VecFuncs.def

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,9 @@ TLI_DEFINE_VECFUNC("llvm.atan.f64", "_simd_atan_d2", FIXED(2), "_ZGV_LLVM_N2v")
9292
TLI_DEFINE_VECFUNC("atanf", "_simd_atan_f4", FIXED(4), "_ZGV_LLVM_N4v")
9393
TLI_DEFINE_VECFUNC("llvm.atan.f32", "_simd_atan_f4", FIXED(4), "_ZGV_LLVM_N4v")
9494
TLI_DEFINE_VECFUNC("atan2", "_simd_atan2_d2", FIXED(2), "_ZGV_LLVM_N2vv")
95+
TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_simd_atan2_d2", FIXED(2), "_ZGV_LLVM_N2vv")
9596
TLI_DEFINE_VECFUNC("atan2f", "_simd_atan2_f4", FIXED(4), "_ZGV_LLVM_N4vv")
97+
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_simd_atan2_f4", FIXED(4), "_ZGV_LLVM_N4vv")
9698

9799
TLI_DEFINE_VECFUNC("cos", "_simd_cos_d2", FIXED(2), "_ZGV_LLVM_N2v")
98100
TLI_DEFINE_VECFUNC("llvm.cos.f64", "_simd_cos_d2", FIXED(2), "_ZGV_LLVM_N2v")
@@ -531,6 +533,7 @@ TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v")
531533
TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v")
532534

533535
TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv")
536+
TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv")
534537

535538
TLI_DEFINE_VECFUNC("atanh", "_ZGVnN2v_atanh", FIXED(2), "_ZGV_LLVM_N2v")
536539

@@ -635,6 +638,7 @@ TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v")
635638
TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v")
636639

637640
TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv")
641+
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv")
638642

639643
TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN4v_atanhf", FIXED(4), "_ZGV_LLVM_N4v")
640644

@@ -748,6 +752,8 @@ TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGV
748752

749753
TLI_DEFINE_VECFUNC("atan2", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv")
750754
TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv")
755+
TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv")
756+
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv")
751757

752758
TLI_DEFINE_VECFUNC("atanh", "_ZGVsMxv_atanh", SCALABLE(2), MASKED, "_ZGVsMxv")
753759
TLI_DEFINE_VECFUNC("atanhf", "_ZGVsMxv_atanhf", SCALABLE(4), MASKED, "_ZGVsMxv")
@@ -933,6 +939,11 @@ TLI_DEFINE_VECFUNC("atan2f", "armpl_vatan2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N
933939
TLI_DEFINE_VECFUNC("atan2", "armpl_svatan2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv")
934940
TLI_DEFINE_VECFUNC("atan2f", "armpl_svatan2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvv")
935941

942+
TLI_DEFINE_VECFUNC("llvm.atan2.f64", "armpl_vatan2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv")
943+
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "armpl_vatan2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv")
944+
TLI_DEFINE_VECFUNC("llvm.atan2.f64", "armpl_svatan2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv")
945+
TLI_DEFINE_VECFUNC("llvm.atan2.f32", "armpl_svatan2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvv")
946+
936947
TLI_DEFINE_VECFUNC("atanh", "armpl_vatanhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
937948
TLI_DEFINE_VECFUNC("atanhf", "armpl_vatanhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
938949
TLI_DEFINE_VECFUNC("atanh", "armpl_svatanh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5141,6 +5141,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
51415141
case G_FACOS:
51425142
case G_FASIN:
51435143
case G_FATAN:
5144+
case G_FATAN2:
51445145
case G_FCOSH:
51455146
case G_FSINH:
51465147
case G_FTANH:

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -734,18 +734,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
734734
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Promote);
735735
}
736736

737-
for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
738-
ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
739-
ISD::FACOS, ISD::FASIN, ISD::FATAN,
740-
ISD::FCOSH, ISD::FSINH, ISD::FTANH,
741-
ISD::FTAN, ISD::FEXP, ISD::FEXP2,
742-
ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
743-
ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW,
744-
ISD::STRICT_FPOWI, ISD::STRICT_FCOS, ISD::STRICT_FSIN,
745-
ISD::STRICT_FACOS, ISD::STRICT_FASIN, ISD::STRICT_FATAN,
746-
ISD::STRICT_FCOSH, ISD::STRICT_FSINH, ISD::STRICT_FTANH,
747-
ISD::STRICT_FEXP, ISD::STRICT_FEXP2, ISD::STRICT_FLOG,
748-
ISD::STRICT_FLOG2, ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) {
737+
for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
738+
ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
739+
ISD::FACOS, ISD::FASIN, ISD::FATAN,
740+
ISD::FATAN2, ISD::FCOSH, ISD::FSINH,
741+
ISD::FTANH, ISD::FTAN, ISD::FEXP,
742+
ISD::FEXP2, ISD::FEXP10, ISD::FLOG,
743+
ISD::FLOG2, ISD::FLOG10, ISD::STRICT_FREM,
744+
ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS,
745+
ISD::STRICT_FSIN, ISD::STRICT_FACOS, ISD::STRICT_FASIN,
746+
ISD::STRICT_FATAN, ISD::STRICT_FATAN2, ISD::STRICT_FCOSH,
747+
ISD::STRICT_FSINH, ISD::STRICT_FTANH, ISD::STRICT_FEXP,
748+
ISD::STRICT_FEXP2, ISD::STRICT_FLOG, ISD::STRICT_FLOG2,
749+
ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) {
749750
setOperationAction(Op, MVT::f16, Promote);
750751
setOperationAction(Op, MVT::v4f16, Expand);
751752
setOperationAction(Op, MVT::v8f16, Expand);
@@ -1190,7 +1191,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
11901191
// silliness like this:
11911192
// clang-format off
11921193
for (auto Op :
1193-
{ISD::SELECT, ISD::SELECT_CC,
1194+
{ISD::SELECT, ISD::SELECT_CC, ISD::FATAN2,
11941195
ISD::BR_CC, ISD::FADD, ISD::FSUB,
11951196
ISD::FMUL, ISD::FDIV, ISD::FMA,
11961197
ISD::FNEG, ISD::FABS, ISD::FCEIL,
@@ -1649,6 +1650,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
16491650
setOperationAction(ISD::FACOS, VT, Expand);
16501651
setOperationAction(ISD::FASIN, VT, Expand);
16511652
setOperationAction(ISD::FATAN, VT, Expand);
1653+
setOperationAction(ISD::FATAN2, VT, Expand);
16521654
setOperationAction(ISD::FCOSH, VT, Expand);
16531655
setOperationAction(ISD::FSINH, VT, Expand);
16541656
setOperationAction(ISD::FTANH, VT, Expand);
@@ -1904,6 +1906,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
19041906
setOperationAction(ISD::FASIN, VT, Expand);
19051907
setOperationAction(ISD::FACOS, VT, Expand);
19061908
setOperationAction(ISD::FATAN, VT, Expand);
1909+
setOperationAction(ISD::FATAN2, VT, Expand);
19071910
setOperationAction(ISD::FSINH, VT, Expand);
19081911
setOperationAction(ISD::FCOSH, VT, Expand);
19091912
setOperationAction(ISD::FTANH, VT, Expand);

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -269,9 +269,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
269269
.libcallFor({{s64, s128}})
270270
.minScalarOrElt(1, MinFPScalar);
271271

272-
getActionDefinitionsBuilder(
273-
{G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, G_FTAN, G_FEXP,
274-
G_FEXP2, G_FEXP10, G_FACOS, G_FASIN, G_FATAN, G_FCOSH, G_FSINH, G_FTANH})
272+
getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
273+
G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
274+
G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
275+
G_FSINH, G_FTANH})
275276
// We need a call for these, so we always need to scalarize.
276277
.scalarize(0)
277278
// Regardless of FP16 support, widen 16-bit elements to 32-bits.

llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2345,6 +2345,14 @@ define float @test_atan_f32(float %x) {
23452345
ret float %y
23462346
}
23472347

2348+
declare float @llvm.atan2.f32(float, float)
2349+
define float @test_atan2_f32(float %x, float %y) {
2350+
; CHECK-LABEL: name: test_atan2_f32
2351+
; CHECK: %{{[0-9]+}}:_(s32) = G_FATAN2 %{{[0-9]+}}
2352+
%z = call float @llvm.atan2.f32(float %x, float %y)
2353+
ret float %z
2354+
}
2355+
23482356
declare float @llvm.cosh.f32(float)
23492357
define float @test_cosh_f32(float %x) {
23502358
; CHECK-LABEL: name: test_cosh_f32
Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
# RUN: llc -verify-machineinstrs -mtriple aarch64--- \
2+
# RUN: -run-pass=legalizer -mattr=+fullfp16 -global-isel %s -o - \
3+
# RUN: | FileCheck %s
4+
...
5+
---
6+
name: test_v4f16.atan2
7+
alignment: 4
8+
tracksRegLiveness: true
9+
registers:
10+
- { id: 0, class: _ }
11+
- { id: 1, class: _ }
12+
body: |
13+
bb.0:
14+
liveins: $d0, $d1
15+
; CHECK-LABEL: name: test_v4f16.atan2
16+
; CHECK: [[V1:%[0-9]+]]:_(s16), [[V2:%[0-9]+]]:_(s16), [[V3:%[0-9]+]]:_(s16), [[V4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s16>)
17+
; CHECK: [[V5:%[0-9]+]]:_(s16), [[V6:%[0-9]+]]:_(s16), [[V7:%[0-9]+]]:_(s16), [[V8:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s16>)
18+
19+
; CHECK-DAG: [[V1_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V1]](s16)
20+
; CHECK-DAG: [[V5_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V5]](s16)
21+
; CHECK-NEXT: ADJCALLSTACKDOWN
22+
; CHECK-NEXT: $s0 = COPY [[V1_S32]](s32)
23+
; CHECK-NEXT: $s1 = COPY [[V5_S32]](s32)
24+
; CHECK-NEXT: BL &atan2f
25+
; CHECK-NEXT: ADJCALLSTACKUP
26+
; CHECK-NEXT: [[ELT1_S32:%[0-9]+]]:_(s32) = COPY $s0
27+
; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT1_S32]](s32)
28+
29+
; CHECK-DAG: [[V2_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V2]](s16)
30+
; CHECK-DAG: [[V6_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V6]](s16)
31+
; CHECK-NEXT: ADJCALLSTACKDOWN
32+
; CHECK-NEXT: $s0 = COPY [[V2_S32]](s32)
33+
; CHECK-NEXT: $s1 = COPY [[V6_S32]](s32)
34+
; CHECK-NEXT: BL &atan2f
35+
; CHECK-NEXT: ADJCALLSTACKUP
36+
; CHECK-NEXT: [[ELT2_S32:%[0-9]+]]:_(s32) = COPY $s0
37+
; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT2_S32]](s32)
38+
39+
; CHECK-DAG: [[V3_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V3]](s16)
40+
; CHECK-DAG: [[V7_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V7]](s16)
41+
; CHECK-NEXT: ADJCALLSTACKDOWN
42+
; CHECK-NEXT: $s0 = COPY [[V3_S32]](s32)
43+
; CHECK-NEXT: $s1 = COPY [[V7_S32]](s32)
44+
; CHECK-NEXT: BL &atan2f
45+
; CHECK-NEXT: ADJCALLSTACKUP
46+
; CHECK-NEXT: [[ELT3_S32:%[0-9]+]]:_(s32) = COPY $s0
47+
; CHECK-NEXT: [[ELT3:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT3_S32]](s32)
48+
49+
; CHECK-DAG: [[V4_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V4]](s16)
50+
; CHECK-DAG: [[V8_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V8]](s16)
51+
; CHECK-NEXT: ADJCALLSTACKDOWN
52+
; CHECK-NEXT: $s0 = COPY [[V4_S32]](s32)
53+
; CHECK-NEXT: $s1 = COPY [[V8_S32]](s32)
54+
; CHECK-NEXT: BL &atan2f
55+
; CHECK-NEXT: ADJCALLSTACKUP
56+
; CHECK-NEXT: [[ELT4_S32:%[0-9]+]]:_(s32) = COPY $s0
57+
; CHECK-NEXT: [[ELT4:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT4_S32]](s32)
58+
59+
; CHECK-DAG: %{{[0-9]+}}:_(<4 x s16>) = G_BUILD_VECTOR [[ELT1]](s16), [[ELT2]](s16), [[ELT3]](s16), [[ELT4]](s16)
60+
61+
%0:_(<4 x s16>) = COPY $d0
62+
%1:_(<4 x s16>) = COPY $d1
63+
%2:_(<4 x s16>) = G_FATAN2 %0, %1
64+
$d0 = COPY %2(<4 x s16>)
65+
RET_ReallyLR implicit $d0
66+
67+
...
68+
---
69+
name: test_v8f16.atan2
70+
alignment: 4
71+
tracksRegLiveness: true
72+
registers:
73+
- { id: 0, class: _ }
74+
- { id: 1, class: _ }
75+
body: |
76+
bb.0:
77+
liveins: $q0, $q1
78+
79+
; CHECK-LABEL: name: test_v8f16.atan2
80+
81+
; This is big, so let's just check for the 8 calls to atan2f, the the
82+
; G_UNMERGE_VALUES, and the G_BUILD_VECTOR. The other instructions ought
83+
; to be covered by the other tests.
84+
85+
; CHECK: G_UNMERGE_VALUES
86+
; CHECK: BL &atan2f
87+
; CHECK: BL &atan2f
88+
; CHECK: BL &atan2f
89+
; CHECK: BL &atan2f
90+
; CHECK: BL &atan2f
91+
; CHECK: BL &atan2f
92+
; CHECK: BL &atan2f
93+
; CHECK: BL &atan2f
94+
; CHECK: G_BUILD_VECTOR
95+
96+
%0:_(<8 x s16>) = COPY $q0
97+
%1:_(<8 x s16>) = COPY $q1
98+
%2:_(<8 x s16>) = G_FATAN2 %0, %1
99+
$q0 = COPY %2(<8 x s16>)
100+
RET_ReallyLR implicit $q0
101+
102+
...
103+
---
104+
name: test_v2f32.atan2
105+
alignment: 4
106+
tracksRegLiveness: true
107+
registers:
108+
- { id: 0, class: _ }
109+
- { id: 1, class: _ }
110+
body: |
111+
bb.0:
112+
liveins: $d0, $d1
113+
114+
; CHECK-LABEL: name: test_v2f32.atan2
115+
; CHECK: [[V1:%[0-9]+]]:_(s32), [[V2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s32>)
116+
; CHECK: [[V3:%[0-9]+]]:_(s32), [[V4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s32>)
117+
118+
; CHECK-NEXT: ADJCALLSTACKDOWN
119+
; CHECK-DAG: $s0 = COPY [[V1]](s32)
120+
; CHECK-DAG: $s1 = COPY [[V3]](s32)
121+
; CHECK: BL &atan2f
122+
; CHECK: ADJCALLSTACKUP
123+
; CHECK: [[ELT1:%[0-9]+]]:_(s32) = COPY $s0
124+
125+
; CHECK-NEXT: ADJCALLSTACKDOWN
126+
; CHECK-DAG: $s0 = COPY [[V2]](s32)
127+
; CHECK-DAG: $s1 = COPY [[V4]](s32)
128+
; CHECK: BL &atan2f
129+
; CHECK: ADJCALLSTACKUP
130+
; CHECK: [[ELT2:%[0-9]+]]:_(s32) = COPY $s0
131+
132+
; CHECK: %2:_(<2 x s32>) = G_BUILD_VECTOR [[ELT1]](s32), [[ELT2]](s32)
133+
134+
%0:_(<2 x s32>) = COPY $d0
135+
%1:_(<2 x s32>) = COPY $d1
136+
%2:_(<2 x s32>) = G_FATAN2 %0, %1
137+
$d0 = COPY %2(<2 x s32>)
138+
RET_ReallyLR implicit $d0
139+
140+
...
141+
---
142+
name: test_v4f32.atan2
143+
alignment: 4
144+
tracksRegLiveness: true
145+
registers:
146+
- { id: 0, class: _ }
147+
- { id: 1, class: _ }
148+
body: |
149+
bb.0:
150+
liveins: $q0, $q1
151+
; CHECK-LABEL: name: test_v4f32.atan2
152+
; CHECK: [[V1:%[0-9]+]]:_(s32), [[V2:%[0-9]+]]:_(s32), [[V3:%[0-9]+]]:_(s32), [[V4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s32>)
153+
; CHECK: [[V5:%[0-9]+]]:_(s32), [[V6:%[0-9]+]]:_(s32), [[V7:%[0-9]+]]:_(s32), [[V8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s32>)
154+
155+
; CHECK-NEXT: ADJCALLSTACKDOWN
156+
; CHECK-DAG: $s0 = COPY [[V1]](s32)
157+
; CHECK-DAG: $s1 = COPY [[V5]](s32)
158+
; CHECK: BL &atan2f
159+
; CHECK: ADJCALLSTACKUP
160+
; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s32) = COPY $s0
161+
162+
; CHECK-NEXT: ADJCALLSTACKDOWN
163+
; CHECK-DAG: $s0 = COPY [[V2]](s32)
164+
; CHECK-DAG: $s1 = COPY [[V6]](s32)
165+
; CHECK: BL &atan2f
166+
; CHECK: ADJCALLSTACKUP
167+
; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s32) = COPY $s0
168+
169+
; CHECK-NEXT: ADJCALLSTACKDOWN
170+
; CHECK-DAG: $s0 = COPY [[V3]](s32)
171+
; CHECK-DAG: $s1 = COPY [[V7]](s32)
172+
; CHECK: BL &atan2f
173+
; CHECK: ADJCALLSTACKUP
174+
; CHECK-NEXT: [[ELT3:%[0-9]+]]:_(s32) = COPY $s0
175+
176+
; CHECK-NEXT: ADJCALLSTACKDOWN
177+
; CHECK-DAG: $s0 = COPY [[V4]](s32)
178+
; CHECK-DAG: $s1 = COPY [[V8]](s32)
179+
; CHECK: BL &atan2f
180+
; CHECK: ADJCALLSTACKUP
181+
; CHECK-NEXT: [[ELT4:%[0-9]+]]:_(s32) = COPY $s0
182+
183+
; CHECK: %2:_(<4 x s32>) = G_BUILD_VECTOR [[ELT1]](s32), [[ELT2]](s32), [[ELT3]](s32), [[ELT4]](s32)
184+
185+
%0:_(<4 x s32>) = COPY $q0
186+
%1:_(<4 x s32>) = COPY $q1
187+
%2:_(<4 x s32>) = G_FATAN2 %0, %1
188+
$q0 = COPY %2(<4 x s32>)
189+
RET_ReallyLR implicit $q0
190+
191+
...
192+
---
193+
name: test_v2f64.atan2
194+
alignment: 4
195+
tracksRegLiveness: true
196+
registers:
197+
- { id: 0, class: _ }
198+
- { id: 1, class: _ }
199+
body: |
200+
bb.0:
201+
liveins: $q0, $q1
202+
203+
; CHECK-LABEL: name: test_v2f64.atan2
204+
; CHECK: [[V1:%[0-9]+]]:_(s64), [[V2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s64>)
205+
; CHECK: [[V3:%[0-9]+]]:_(s64), [[V4:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s64>)
206+
207+
; CHECK-NEXT: ADJCALLSTACKDOWN
208+
; CHECK-DAG: $d0 = COPY [[V1]](s64)
209+
; CHECK-DAG: $d1 = COPY [[V3]](s64)
210+
; CHECK: BL &atan2
211+
; CHECK: ADJCALLSTACKUP
212+
; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s64) = COPY $d0
213+
214+
; CHECK-NEXT: ADJCALLSTACKDOWN
215+
; CHECK-DAG: $d0 = COPY [[V2]](s64)
216+
; CHECK-DAG: $d1 = COPY [[V4]](s64)
217+
; CHECK: BL &atan2
218+
; CHECK: ADJCALLSTACKUP
219+
; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s64) = COPY $d0
220+
221+
; CHECK: %2:_(<2 x s64>) = G_BUILD_VECTOR [[ELT1]](s64), [[ELT2]](s64)
222+
223+
%0:_(<2 x s64>) = COPY $q0
224+
%1:_(<2 x s64>) = COPY $q1
225+
%2:_(<2 x s64>) = G_FATAN2 %0, %1
226+
$q0 = COPY %2(<2 x s64>)
227+
RET_ReallyLR implicit $q0
228+
229+
...
230+
---
231+
name: test_atan2_half
232+
alignment: 4
233+
tracksRegLiveness: true
234+
registers:
235+
- { id: 0, class: _ }
236+
- { id: 1, class: _ }
237+
body: |
238+
bb.0:
239+
liveins: $h0, $h1
240+
; CHECK-LABEL: name: test_atan2_half
241+
; CHECK: [[REG1:%[0-9]+]]:_(s32) = G_FPEXT %0(s16)
242+
; CHECK: [[REG2:%[0-9]+]]:_(s32) = G_FPEXT %1(s16)
243+
; CHECK-NEXT: ADJCALLSTACKDOWN
244+
; CHECK-NEXT: $s0 = COPY [[REG1]](s32)
245+
; CHECK-NEXT: $s1 = COPY [[REG2]](s32)
246+
; CHECK-NEXT: BL &atan2f
247+
; CHECK: ADJCALLSTACKUP
248+
; CHECK-NEXT: [[REG2:%[0-9]+]]:_(s32) = COPY $s0
249+
; CHECK-NEXT: [[RES:%[0-9]+]]:_(s16) = G_FPTRUNC [[REG2]](s32)
250+
251+
%0:_(s16) = COPY $h0
252+
%1:_(s16) = COPY $h1
253+
%2:_(s16) = G_FATAN2 %0, %1
254+
$h0 = COPY %2(s16)
255+
RET_ReallyLR implicit $h0

0 commit comments

Comments
 (0)