Skip to content

Commit 378fe2f

Browse files
authored
[X86][LoopVectorize] Add support for arc and hyperbolic trig functions (#99383)
This change is part 2 x86 Loop Vectorization of : #96222 It also has veclib call loop vectorization hence the test cases in `llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll` finally the last pr missed tests for `llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll` and `llvm/test/CodeGen/X86/vec-libcalls.ll` so added those aswell. No evidence was found for arc and hyperbolic trig glibc vector math functions https://github.com/lattera/glibc/blob/master/sysdeps/x86/fpu/bits/math-vector.h so no new `_ZGVbN2v_*` and `_ZGVdN4v_*` . So no new tests in `llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll` Also no new svml and no new tests to: `llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll` There was not enough evidence that there were svml arc and hyperbolic trig vector implementations, Documentation was scarces so looked at test cases in [numpy](https://github.com/numpy/SVML/blob/32bf2a98420762a63ab418aaa0a7d6e17eb9627a/linux/avx512/svml_z0_acos_d_la.s#L8). Someone with more experience with svml should investigate. ## Note amd libm doesn't have a vector hyperbolic sine api hence why youi might notice there are no tests for `sinh`. ## History This change is part of #87367 investigation on supporting IEEE math operations as intrinsics. Which was discussed in this RFC: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294 This change adds loop vectorization for `acos`, `asin`, `atan`, `cosh`, `sinh`, and `tanh`. resolves #70079 resolves #70080 resolves #70081 resolves #70083 resolves #70084 resolves #95966
1 parent ea7cc12 commit 378fe2f

File tree

5 files changed

+1863
-0
lines changed

5 files changed

+1863
-0
lines changed

llvm/include/llvm/Analysis/VecFuncs.def

+27
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,19 @@ TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", FIXED(4), "_ZGV_LLVM_N4v")
5151
TLI_DEFINE_VECFUNC("tanf", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")
5252
TLI_DEFINE_VECFUNC("llvm.tan.f32", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")
5353
TLI_DEFINE_VECFUNC("asinf", "vasinf", FIXED(4), "_ZGV_LLVM_N4v")
54+
TLI_DEFINE_VECFUNC("llvm.asin.f32", "vasinf", FIXED(4), "_ZGV_LLVM_N4v")
5455
TLI_DEFINE_VECFUNC("acosf", "vacosf", FIXED(4), "_ZGV_LLVM_N4v")
56+
TLI_DEFINE_VECFUNC("llvm.acos.f32", "vacosf", FIXED(4), "_ZGV_LLVM_N4v")
5557
TLI_DEFINE_VECFUNC("atanf", "vatanf", FIXED(4), "_ZGV_LLVM_N4v")
58+
TLI_DEFINE_VECFUNC("llvm.atan.f32", "vatanf", FIXED(4), "_ZGV_LLVM_N4v")
5659

5760
// Hyperbolic Functions
5861
TLI_DEFINE_VECFUNC("sinhf", "vsinhf", FIXED(4), "_ZGV_LLVM_N4v")
62+
TLI_DEFINE_VECFUNC("llvm.sinh.f32", "vsinhf", FIXED(4), "_ZGV_LLVM_N4v")
5963
TLI_DEFINE_VECFUNC("coshf", "vcoshf", FIXED(4), "_ZGV_LLVM_N4v")
64+
TLI_DEFINE_VECFUNC("llvm.cosh.f32", "vcoshf", FIXED(4), "_ZGV_LLVM_N4v")
6065
TLI_DEFINE_VECFUNC("tanhf", "vtanhf", FIXED(4), "_ZGV_LLVM_N4v")
66+
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "vtanhf", FIXED(4), "_ZGV_LLVM_N4v")
6167
TLI_DEFINE_VECFUNC("asinhf", "vasinhf", FIXED(4), "_ZGV_LLVM_N4v")
6268
TLI_DEFINE_VECFUNC("acoshf", "vacoshf", FIXED(4), "_ZGV_LLVM_N4v")
6369
TLI_DEFINE_VECFUNC("atanhf", "vatanhf", FIXED(4), "_ZGV_LLVM_N4v")
@@ -1358,22 +1364,43 @@ TLI_DEFINE_VECFUNC("asinf", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13581364
TLI_DEFINE_VECFUNC("asinf", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13591365
TLI_DEFINE_VECFUNC("asinf", "amd_vrs16_asinf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
13601366

1367+
TLI_DEFINE_VECFUNC("llvm.asin.f64", "amd_vrd8_asin", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1368+
TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1369+
TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1370+
TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs16_asinf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
1371+
13611372
TLI_DEFINE_VECFUNC("acosf", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13621373
TLI_DEFINE_VECFUNC("acosf", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13631374

1375+
TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1376+
TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1377+
13641378
TLI_DEFINE_VECFUNC("atan", "amd_vrd2_atan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
13651379
TLI_DEFINE_VECFUNC("atan", "amd_vrd4_atan", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13661380
TLI_DEFINE_VECFUNC("atan", "amd_vrd8_atan", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13671381
TLI_DEFINE_VECFUNC("atanf", "amd_vrs4_atanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13681382
TLI_DEFINE_VECFUNC("atanf", "amd_vrs8_atanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13691383
TLI_DEFINE_VECFUNC("atanf", "amd_vrs16_atanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
13701384

1385+
TLI_DEFINE_VECFUNC("llvm.atan.f64", "amd_vrd2_atan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
1386+
TLI_DEFINE_VECFUNC("llvm.atan.f64", "amd_vrd4_atan", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1387+
TLI_DEFINE_VECFUNC("llvm.atan.f64", "amd_vrd8_atan", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1388+
TLI_DEFINE_VECFUNC("llvm.atan.f32", "amd_vrs4_atanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1389+
TLI_DEFINE_VECFUNC("llvm.atan.f32", "amd_vrs8_atanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1390+
TLI_DEFINE_VECFUNC("llvm.atan.f32", "amd_vrs16_atanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
1391+
13711392
TLI_DEFINE_VECFUNC("coshf", "amd_vrs4_coshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13721393
TLI_DEFINE_VECFUNC("coshf", "amd_vrs8_coshf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13731394

1395+
TLI_DEFINE_VECFUNC("llvm.cosh.f32", "amd_vrs4_coshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1396+
TLI_DEFINE_VECFUNC("llvm.cosh.f32", "amd_vrs8_coshf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1397+
13741398
TLI_DEFINE_VECFUNC("tanhf", "amd_vrs4_tanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13751399
TLI_DEFINE_VECFUNC("tanhf", "amd_vrs8_tanhf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
13761400

1401+
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs4_tanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
1402+
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs8_tanhf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
1403+
13771404
TLI_DEFINE_VECFUNC("cbrt", "amd_vrd2_cbrt", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
13781405
TLI_DEFINE_VECFUNC("cbrtf", "amd_vrs4_cbrtf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
13791406

llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll

+90
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,90 @@ define float @tan(float %x) #0 {
177177
ret float %result
178178
}
179179

180+
define float @acos(float %x) #0 {
181+
; CHECK-LABEL: acos:
182+
; CHECK: # %bb.0:
183+
; CHECK-NEXT: pushl %eax
184+
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
185+
; CHECK-NEXT: fstps (%esp)
186+
; CHECK-NEXT: wait
187+
; CHECK-NEXT: calll _acosf
188+
; CHECK-NEXT: popl %eax
189+
; CHECK-NEXT: retl
190+
%result = call float @llvm.experimental.constrained.acos.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
191+
ret float %result
192+
}
193+
194+
define float @asin(float %x) #0 {
195+
; CHECK-LABEL: asin:
196+
; CHECK: # %bb.0:
197+
; CHECK-NEXT: pushl %eax
198+
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
199+
; CHECK-NEXT: fstps (%esp)
200+
; CHECK-NEXT: wait
201+
; CHECK-NEXT: calll _asinf
202+
; CHECK-NEXT: popl %eax
203+
; CHECK-NEXT: retl
204+
%result = call float @llvm.experimental.constrained.asin.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
205+
ret float %result
206+
}
207+
208+
define float @atan(float %x) #0 {
209+
; CHECK-LABEL: atan:
210+
; CHECK: # %bb.0:
211+
; CHECK-NEXT: pushl %eax
212+
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
213+
; CHECK-NEXT: fstps (%esp)
214+
; CHECK-NEXT: wait
215+
; CHECK-NEXT: calll _atanf
216+
; CHECK-NEXT: popl %eax
217+
; CHECK-NEXT: retl
218+
%result = call float @llvm.experimental.constrained.atan.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
219+
ret float %result
220+
}
221+
222+
define float @cosh(float %x) #0 {
223+
; CHECK-LABEL: cosh:
224+
; CHECK: # %bb.0:
225+
; CHECK-NEXT: pushl %eax
226+
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
227+
; CHECK-NEXT: fstps (%esp)
228+
; CHECK-NEXT: wait
229+
; CHECK-NEXT: calll _coshf
230+
; CHECK-NEXT: popl %eax
231+
; CHECK-NEXT: retl
232+
%result = call float @llvm.experimental.constrained.cosh.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
233+
ret float %result
234+
}
235+
236+
define float @sinh(float %x) #0 {
237+
; CHECK-LABEL: sinh:
238+
; CHECK: # %bb.0:
239+
; CHECK-NEXT: pushl %eax
240+
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
241+
; CHECK-NEXT: fstps (%esp)
242+
; CHECK-NEXT: wait
243+
; CHECK-NEXT: calll _sinhf
244+
; CHECK-NEXT: popl %eax
245+
; CHECK-NEXT: retl
246+
%result = call float @llvm.experimental.constrained.sinh.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
247+
ret float %result
248+
}
249+
250+
define float @tanh(float %x) #0 {
251+
; CHECK-LABEL: tanh:
252+
; CHECK: # %bb.0:
253+
; CHECK-NEXT: pushl %eax
254+
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
255+
; CHECK-NEXT: fstps (%esp)
256+
; CHECK-NEXT: wait
257+
; CHECK-NEXT: calll _tanhf
258+
; CHECK-NEXT: popl %eax
259+
; CHECK-NEXT: retl
260+
%result = call float @llvm.experimental.constrained.tanh.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
261+
ret float %result
262+
}
263+
180264
attributes #0 = { strictfp }
181265

182266
declare float @llvm.experimental.constrained.ceil.f32(float, metadata)
@@ -189,3 +273,9 @@ declare float @llvm.experimental.constrained.log10.f32(float, metadata, metadata
189273
declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata)
190274
declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata)
191275
declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata)
276+
declare float @llvm.experimental.constrained.acos.f32(float, metadata, metadata)
277+
declare float @llvm.experimental.constrained.asin.f32(float, metadata, metadata)
278+
declare float @llvm.experimental.constrained.atan.f32(float, metadata, metadata)
279+
declare float @llvm.experimental.constrained.cosh.f32(float, metadata, metadata)
280+
declare float @llvm.experimental.constrained.sinh.f32(float, metadata, metadata)
281+
declare float @llvm.experimental.constrained.tanh.f32(float, metadata, metadata)

0 commit comments

Comments
 (0)