Skip to content

Commit 3e9a7a2

Browse files
committed
Add missing veclib tests
1 parent 750b661 commit 3e9a7a2

File tree

5 files changed

+195
-3
lines changed

5 files changed

+195
-3
lines changed

llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
target triple = "aarch64-unknown-linux-gnu"
55

66
;.
7-
; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxvv_pow, ptr @_ZGVsMxvv_powf, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxv_acos, ptr @_ZGVsMxv_acosf, ptr @_ZGVsMxv_asin, ptr @_ZGVsMxv_asinf, ptr @_ZGVsMxv_atan, ptr @_ZGVsMxv_atanf, ptr @_ZGVsMxv_cosh, ptr @_ZGVsMxv_coshf, ptr @_ZGVsMxv_sinh, ptr @_ZGVsMxv_sinhf, ptr @_ZGVsMxv_tanh, ptr @_ZGVsMxv_tanhf], section "llvm.metadata"
7+
; CHECK: @llvm.compiler.used = appending global [34 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxvv_pow, ptr @_ZGVsMxvv_powf, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxv_acos, ptr @_ZGVsMxv_acosf, ptr @_ZGVsMxv_asin, ptr @_ZGVsMxv_asinf, ptr @_ZGVsMxv_atan, ptr @_ZGVsMxv_atanf, ptr @_ZGVsMxvv_atan2, ptr @_ZGVsMxvv_atan2f, ptr @_ZGVsMxv_cosh, ptr @_ZGVsMxv_coshf, ptr @_ZGVsMxv_sinh, ptr @_ZGVsMxv_sinhf, ptr @_ZGVsMxv_tanh, ptr @_ZGVsMxv_tanhf], section "llvm.metadata"
88
;.
99
define <vscale x 2 x double> @llvm_ceil_vscale_f64(<vscale x 2 x double> %in) {
1010
; CHECK-LABEL: @llvm_ceil_vscale_f64(
@@ -438,6 +438,24 @@ define <vscale x 4 x float> @llvm_atan_vscale_f32(<vscale x 4 x float> %in) {
438438
ret <vscale x 4 x float> %1
439439
}
440440

441+
define <vscale x 2 x double> @llvm_atan2_vscale_f64(<vscale x 2 x double> %x, <vscale x 2 x double> %y) {
442+
; CHECK-LABEL: @llvm_atan2_vscale_f64(
443+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[INX:%.*]], <vscale x 2 x double> [[INY:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
444+
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
445+
;
446+
%1 = call fast <vscale x 2 x double> @llvm.atan2.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x double> %y)
447+
ret <vscale x 2 x double> %1
448+
}
449+
450+
define <vscale x 4 x float> @llvm_atan2_vscale_f32(<vscale x 4 x float> %x, <vscale x 4 x float> %y) {
451+
; CHECK-LABEL: @llvm_atan2_vscale_f32(
452+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[INX:%.*]], <vscale x 4 x float> [[INY:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
453+
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
454+
;
455+
%1 = call fast <vscale x 4 x float> @llvm.atan2.nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x float> %y)
456+
ret <vscale x 4 x float> %1
457+
}
458+
441459
define <vscale x 2 x double> @llvm_cosh_vscale_f64(<vscale x 2 x double> %in) {
442460
; CHECK-LABEL: @llvm_cosh_vscale_f64(
443461
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_cosh(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))

llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
target triple = "aarch64-unknown-linux-gnu"
55

66
;.
7-
; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2vv_pow, ptr @_ZGVnN4vv_powf, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2v_acos, ptr @_ZGVnN4v_acosf, ptr @_ZGVnN2v_asin, ptr @_ZGVnN4v_asinf, ptr @_ZGVnN2v_atan, ptr @_ZGVnN4v_atanf, ptr @_ZGVnN2v_cosh, ptr @_ZGVnN4v_coshf, ptr @_ZGVnN2v_sinh, ptr @_ZGVnN4v_sinhf, ptr @_ZGVnN2v_tanh, ptr @_ZGVnN4v_tanhf], section "llvm.metadata"
7+
; CHECK: @llvm.compiler.used = appending global [34 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2vv_pow, ptr @_ZGVnN4vv_powf, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2v_acos, ptr @_ZGVnN4v_acosf, ptr @_ZGVnN2v_asin, ptr @_ZGVnN4v_asinf, ptr @_ZGVnN2v_atan, ptr @_ZGVnN4v_atanf, ptr @_ZGVnN2vv_atan2, ptr @_ZGVnN4vv_atan2f, ptr @_ZGVnN2v_cosh, ptr @_ZGVnN4v_coshf, ptr @_ZGVnN2v_sinh, ptr @_ZGVnN4v_sinhf, ptr @_ZGVnN2v_tanh, ptr @_ZGVnN4v_tanhf], section "llvm.metadata"
88
;.
99
define <2 x double> @llvm_ceil_f64(<2 x double> %in) {
1010
; CHECK-LABEL: @llvm_ceil_f64(
@@ -438,6 +438,24 @@ define <4 x float> @llvm_atan_f32(<4 x float> %in) {
438438
ret <4 x float> %1
439439
}
440440

441+
define <2 x double> @llvm_atan2_f64(<2 x double> %x, <2 x double> %y) {
442+
; CHECK-LABEL: @llvm_atan2_f64(
443+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[INX:%.*]], <2 x double> [[INY:%.*]])
444+
; CHECK-NEXT: ret <2 x double> [[TMP1]]
445+
;
446+
%1 = call fast <2 x double> @llvm.atan2.v2f64(<2 x double> %x, <2 x double> %y)
447+
ret <2 x double> %1
448+
}
449+
450+
define <4 x float> @llvm_atan2_f32(<4 x float> %x, <4 x float> %y) {
451+
; CHECK-LABEL: @llvm_atan2_f32(
452+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[INX:%.*]], <4 x float> [[INY:%.*]])
453+
; CHECK-NEXT: ret <4 x float> [[TMP1]]
454+
;
455+
%1 = call fast <4 x float> @llvm.atan2.v4f32(<4 x float> %x, <4 x float> %y)
456+
ret <4 x float> %1
457+
}
458+
441459
define <2 x double> @llvm_cosh_f64(<2 x double> %in) {
442460
; CHECK-LABEL: @llvm_cosh_f64(
443461
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_cosh(<2 x double> [[IN:%.*]])

llvm/test/CodeGen/AArch64/vec-libcalls.ll

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ declare <3 x float> @llvm.tan.v3f32(<3 x float>)
2424
declare <3 x float> @llvm.asin.v3f32(<3 x float>)
2525
declare <3 x float> @llvm.acos.v3f32(<3 x float>)
2626
declare <3 x float> @llvm.atan.v3f32(<3 x float>)
27+
declare <3 x float> @llvm.atan2.v3f32(<3 x float>, <3 x float>)
2728
declare <3 x float> @llvm.sinh.v3f32(<3 x float>)
2829
declare <3 x float> @llvm.cosh.v3f32(<3 x float>)
2930
declare <3 x float> @llvm.tanh.v3f32(<3 x float>)
@@ -428,6 +429,40 @@ define <3 x float> @atan_v3f32(<3 x float> %x) nounwind {
428429
ret <3 x float> %r
429430
}
430431

432+
define <3 x float> @atan2_v3f32(<3 x float> %x, <3 x float> %y) nounwind {
433+
; CHECK-LABEL: atan2_v3f32:
434+
; CHECK: // %bb.0:
435+
; CHECK-NEXT: sub sp, sp, #64
436+
; CHECK-NEXT: stp q0, q1, [sp, #16] // 32-byte Folded Spill
437+
; CHECK-NEXT: mov s0, v0.s[1]
438+
; CHECK-NEXT: mov s1, v1.s[1]
439+
; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
440+
; CHECK-NEXT: bl atan2f
441+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
442+
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
443+
; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
444+
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
445+
; CHECK-NEXT: // kill: def $s1 killed $s1 killed $q1
446+
; CHECK-NEXT: bl atan2f
447+
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
448+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
449+
; CHECK-NEXT: mov v0.s[1], v1.s[0]
450+
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
451+
; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
452+
; CHECK-NEXT: mov s0, v0.s[2]
453+
; CHECK-NEXT: mov s1, v1.s[2]
454+
; CHECK-NEXT: bl atan2f
455+
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
456+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
457+
; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
458+
; CHECK-NEXT: mov v1.s[2], v0.s[0]
459+
; CHECK-NEXT: mov v0.16b, v1.16b
460+
; CHECK-NEXT: add sp, sp, #64
461+
; CHECK-NEXT: ret
462+
%r = call <3 x float> @llvm.atan2.v3f32(<3 x float> %x, <3 x float> %y)
463+
ret <3 x float> %r
464+
}
465+
431466
define <3 x float> @sinh_v3f32(<3 x float> %x) nounwind {
432467
; CHECK-LABEL: sinh_v3f32:
433468
; CHECK: // %bb.0:

llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,54 @@ for.end:
579579
ret void
580580
}
581581

582+
declare float @llvm.atan2.f32(float, float) nounwind readnone
583+
define void @atan2_v4f32_intrinsic(i64 %n, ptr noalias %y, ptr noalias %x) {
584+
; CHECK-LABEL: @atan2_v4f32_intrinsic(
585+
; CHECK: call <4 x float> @_simd_atan2_f4(<4 x float>
586+
; CHECK: ret void
587+
588+
entry:
589+
br label %for.body
590+
591+
for.body:
592+
%iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
593+
%gep.y = getelementptr inbounds float, ptr %y, i64 %iv
594+
%lv = load float, ptr %gep.y, align 4
595+
%call = tail call float @llvm.atan2.f32(float %lv, float %lv)
596+
%gep.x = getelementptr inbounds float, ptr %x, i64 %iv
597+
store float %call, ptr %gep.x, align 4
598+
%iv.next = add i64 %iv, 1
599+
%exitcond = icmp eq i64 %iv.next, %n
600+
br i1 %exitcond, label %for.end, label %for.body
601+
602+
for.end:
603+
ret void
604+
}
605+
606+
declare double @llvm.atan2.f64(double, double) nounwind readnone
607+
define void @atan2_v2f64_intrinsic(i64 %n, ptr noalias %y, ptr noalias %x) {
608+
; CHECK-LABEL: @atan2_v2f64_intrinsic(
609+
; CHECK: call <2 x double> @_simd_atan2_d2(<2 x double>
610+
; CHECK: ret void
611+
612+
entry:
613+
br label %for.body
614+
615+
for.body:
616+
%iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
617+
%gep.y = getelementptr inbounds double, ptr %y, i64 %iv
618+
%lv = load double, ptr %gep.y, align 4
619+
%call = tail call double @llvm.atan2.f64(double %lv, double %lv)
620+
%gep.x = getelementptr inbounds double, ptr %x, i64 %iv
621+
store double %call, ptr %gep.x, align 4
622+
%iv.next = add i64 %iv, 1
623+
%exitcond = icmp eq i64 %iv.next, %n
624+
br i1 %exitcond, label %for.end, label %for.body
625+
626+
for.end:
627+
ret void
628+
}
629+
582630
declare float @llvm.cosh.f32(float) nounwind readnone
583631
define void @cosh_v4f32_intrinsic(i64 %n, ptr noalias %y, ptr noalias %x) {
584632
; CHECK-LABEL: @cosh_v4f32_intrinsic(

llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --filter "call.*(acos|asin|atan|cos|cosh|exp|log|sin|sinh|pow|ceil|copysign|fabs|floor|fma|m..num|nearbyint|rint|round|sqrt|tan|tanh|trunc)" --version 2
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --filter "call.*(acos|asin|atan|atan2|cos|cosh|exp|log|sin|sinh|pow|ceil|copysign|fabs|floor|fma|m..num|nearbyint|rint|round|sqrt|tan|tanh|trunc)" --version 2
22

33
; RUN: opt -mattr=+neon -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=SLEEF-NEON
44
; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s --check-prefix=SLEEF-SVE
@@ -231,6 +231,79 @@ define void @atan_f32(ptr noalias %in.ptr, ptr %out.ptr) {
231231
ret void
232232
}
233233

234+
declare double @llvm.atan2.f64(double, double)
235+
declare float @llvm.atan2.f32(float, float)
236+
237+
define void @atan2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
238+
; SLEEF-NEON-LABEL: define void @atan2_f64
239+
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
240+
; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
241+
;
242+
; SLEEF-SVE-LABEL: define void @atan2_f64
243+
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
244+
; SLEEF-SVE: [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
245+
;
246+
; ARMPL-NEON-LABEL: define void @atan2_f64
247+
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
248+
; ARMPL-NEON: [[TMP3:%.*]] = call <2 x double> @armpl_vatan2q_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
249+
;
250+
; ARMPL-SVE-LABEL: define void @atan2_f64
251+
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
252+
; ARMPL-SVE: [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svatan2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
253+
;
254+
entry:
255+
br label %for.body
256+
257+
for.body:
258+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
259+
%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
260+
%in = load double, ptr %in.gep, align 8
261+
%call = tail call double @llvm.atan2.f64(double %in, double %in)
262+
%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
263+
store double %call, ptr %out.gep, align 8
264+
%iv.next = add nuw nsw i64 %iv, 1
265+
%exitcond = icmp eq i64 %iv.next, 1000
266+
br i1 %exitcond, label %for.end, label %for.body
267+
268+
for.end:
269+
ret void
270+
}
271+
272+
define void @atan2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
273+
; SLEEF-NEON-LABEL: define void @atan2_f32
274+
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
275+
; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
276+
;
277+
; SLEEF-SVE-LABEL: define void @atan2_f32
278+
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
279+
; SLEEF-SVE: [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
280+
;
281+
; ARMPL-NEON-LABEL: define void @atan2_f32
282+
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
283+
; ARMPL-NEON: [[TMP3:%.*]] = call <4 x float> @armpl_vatan2q_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
284+
;
285+
; ARMPL-SVE-LABEL: define void @atan2_f32
286+
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
287+
; ARMPL-SVE: [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svatan2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
288+
;
289+
entry:
290+
br label %for.body
291+
292+
for.body:
293+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
294+
%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
295+
%in = load float, ptr %in.gep, align 8
296+
%call = tail call float @llvm.atan2.f32(float %in, float %in)
297+
%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
298+
store float %call, ptr %out.gep, align 4
299+
%iv.next = add nuw nsw i64 %iv, 1
300+
%exitcond = icmp eq i64 %iv.next, 1000
301+
br i1 %exitcond, label %for.end, label %for.body
302+
303+
for.end:
304+
ret void
305+
}
306+
234307
declare double @llvm.ceil.f64(double)
235308
declare float @llvm.ceil.f32(float)
236309

0 commit comments

Comments
 (0)