Skip to content

Commit 4e45379

Browse files
committed
[CodeGen] Support SLPVectorizer cases of tan across all backends
Add a default f16 type promotion
1 parent f2d215f commit 4e45379

File tree

10 files changed

+235
-19
lines changed

10 files changed

+235
-19
lines changed

llvm/include/llvm/Analysis/TargetLibraryInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,10 +415,12 @@ class TargetLibraryInfo {
415415
return false;
416416
switch (F) {
417417
default: break;
418+
// clang-format off
418419
case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl:
419420
case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl:
420421
case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinl:
421422
case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl:
423+
case LibFunc_tan: case LibFunc_tanf: case LibFunc_tanl:
422424
case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl:
423425
case LibFunc_sqrt_finite: case LibFunc_sqrtf_finite:
424426
case LibFunc_sqrtl_finite:
@@ -437,6 +439,7 @@ class TargetLibraryInfo {
437439
case LibFunc_memcmp: case LibFunc_bcmp: case LibFunc_strcmp:
438440
case LibFunc_strcpy: case LibFunc_stpcpy: case LibFunc_strlen:
439441
case LibFunc_strnlen: case LibFunc_memchr: case LibFunc_mempcpy:
442+
// clang-format on
440443
return true;
441444
}
442445
return false;

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,14 +156,17 @@ class TargetTransformInfoImplBase {
156156
StringRef Name = F->getName();
157157

158158
// These will all likely lower to a single selection DAG node.
159+
// clang-format off
159160
if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
160-
Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
161+
Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
161162
Name == "fmin" || Name == "fminf" || Name == "fminl" ||
162163
Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
163-
Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
164-
Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
164+
Name == "sin" || Name == "sinf" || Name == "sinl" ||
165+
Name == "cos" || Name == "cosf" || Name == "cosl" ||
166+
Name == "tan" || Name == "tanf" || Name == "tanl" ||
167+
Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
165168
return false;
166-
169+
// clang-format on
167170
// These are all likely to be optimized into something smaller.
168171
if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
169172
Name == "exp2l" || Name == "exp2f" || Name == "floor" ||

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3993,6 +3993,10 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
39933993
case LibFunc_cosf:
39943994
case LibFunc_cosl:
39953995
return Intrinsic::cos;
3996+
case LibFunc_tan:
3997+
case LibFunc_tanf:
3998+
case LibFunc_tanl:
3999+
return Intrinsic::tan;
39964000
case LibFunc_exp:
39974001
case LibFunc_expf:
39984002
case LibFunc_expl:

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -961,7 +961,7 @@ void TargetLoweringBase::initActions() {
961961
setOperationAction(
962962
{ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG,
963963
ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG,
964-
ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT},
964+
ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::FTAN},
965965
VT, Expand);
966966

967967
// Constrained floating-point operations default to expand.
@@ -1020,6 +1020,7 @@ void TargetLoweringBase::initActions() {
10201020
ISD::FTAN},
10211021
{MVT::f32, MVT::f64, MVT::f128}, Expand);
10221022

1023+
setOperationAction(ISD::FTAN, MVT::f16, Promote);
10231024
// Default ISD::TRAP to expand (which turns it into abort).
10241025
setOperationAction(ISD::TRAP, MVT::Other, Expand);
10251026

llvm/test/CodeGen/RISCV/half-intrinsics.ll

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2862,3 +2862,123 @@ define i1 @isnan_d_fpclass(half %x) {
28622862
%1 = call i1 @llvm.is.fpclass.f16(half %x, i32 3) ; nan
28632863
ret i1 %1
28642864
}
2865+
2866+
declare half @llvm.tan.f16(half)
2867+
2868+
define half @tan_f16(half %a) nounwind {
2869+
; RV32IZFH-LABEL: tan_f16:
2870+
; RV32IZFH: # %bb.0:
2871+
; RV32IZFH-NEXT: addi sp, sp, -16
2872+
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
2873+
; RV32IZFH-NEXT: fcvt.s.h fa0, fa0
2874+
; RV32IZFH-NEXT: call tanf
2875+
; RV32IZFH-NEXT: fcvt.h.s fa0, fa0
2876+
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
2877+
; RV32IZFH-NEXT: addi sp, sp, 16
2878+
; RV32IZFH-NEXT: ret
2879+
;
2880+
; RV64IZFH-LABEL: tan_f16:
2881+
; RV64IZFH: # %bb.0:
2882+
; RV64IZFH-NEXT: addi sp, sp, -16
2883+
; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
2884+
; RV64IZFH-NEXT: fcvt.s.h fa0, fa0
2885+
; RV64IZFH-NEXT: call tanf
2886+
; RV64IZFH-NEXT: fcvt.h.s fa0, fa0
2887+
; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
2888+
; RV64IZFH-NEXT: addi sp, sp, 16
2889+
; RV64IZFH-NEXT: ret
2890+
;
2891+
; RV32IZHINX-LABEL: tan_f16:
2892+
; RV32IZHINX: # %bb.0:
2893+
; RV32IZHINX-NEXT: addi sp, sp, -16
2894+
; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
2895+
; RV32IZHINX-NEXT: fcvt.s.h a0, a0
2896+
; RV32IZHINX-NEXT: call tanf
2897+
; RV32IZHINX-NEXT: fcvt.h.s a0, a0
2898+
; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
2899+
; RV32IZHINX-NEXT: addi sp, sp, 16
2900+
; RV32IZHINX-NEXT: ret
2901+
;
2902+
; RV64IZHINX-LABEL: tan_f16:
2903+
; RV64IZHINX: # %bb.0:
2904+
; RV64IZHINX-NEXT: addi sp, sp, -16
2905+
; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
2906+
; RV64IZHINX-NEXT: fcvt.s.h a0, a0
2907+
; RV64IZHINX-NEXT: call tanf
2908+
; RV64IZHINX-NEXT: fcvt.h.s a0, a0
2909+
; RV64IZHINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
2910+
; RV64IZHINX-NEXT: addi sp, sp, 16
2911+
; RV64IZHINX-NEXT: ret
2912+
;
2913+
; RV32I-LABEL: tan_f16:
2914+
; RV32I: # %bb.0:
2915+
; RV32I-NEXT: addi sp, sp, -16
2916+
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
2917+
; RV32I-NEXT: slli a0, a0, 16
2918+
; RV32I-NEXT: srli a0, a0, 16
2919+
; RV32I-NEXT: call __extendhfsf2
2920+
; RV32I-NEXT: call tanf
2921+
; RV32I-NEXT: call __truncsfhf2
2922+
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
2923+
; RV32I-NEXT: addi sp, sp, 16
2924+
; RV32I-NEXT: ret
2925+
;
2926+
; RV64I-LABEL: tan_f16:
2927+
; RV64I: # %bb.0:
2928+
; RV64I-NEXT: addi sp, sp, -16
2929+
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
2930+
; RV64I-NEXT: slli a0, a0, 48
2931+
; RV64I-NEXT: srli a0, a0, 48
2932+
; RV64I-NEXT: call __extendhfsf2
2933+
; RV64I-NEXT: call tanf
2934+
; RV64I-NEXT: call __truncsfhf2
2935+
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
2936+
; RV64I-NEXT: addi sp, sp, 16
2937+
; RV64I-NEXT: ret
2938+
;
2939+
; RV32IZFHMIN-LABEL: tan_f16:
2940+
; RV32IZFHMIN: # %bb.0:
2941+
; RV32IZFHMIN-NEXT: addi sp, sp, -16
2942+
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
2943+
; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0
2944+
; RV32IZFHMIN-NEXT: call tanf
2945+
; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0
2946+
; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
2947+
; RV32IZFHMIN-NEXT: addi sp, sp, 16
2948+
; RV32IZFHMIN-NEXT: ret
2949+
;
2950+
; RV64IZFHMIN-LABEL: tan_f16:
2951+
; RV64IZFHMIN: # %bb.0:
2952+
; RV64IZFHMIN-NEXT: addi sp, sp, -16
2953+
; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
2954+
; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0
2955+
; RV64IZFHMIN-NEXT: call tanf
2956+
; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0
2957+
; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
2958+
; RV64IZFHMIN-NEXT: addi sp, sp, 16
2959+
; RV64IZFHMIN-NEXT: ret
2960+
;
2961+
; RV32IZHINXMIN-LABEL: tan_f16:
2962+
; RV32IZHINXMIN: # %bb.0:
2963+
; RV32IZHINXMIN-NEXT: addi sp, sp, -16
2964+
; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
2965+
; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0
2966+
; RV32IZHINXMIN-NEXT: call tanf
2967+
; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0
2968+
; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
2969+
; RV32IZHINXMIN-NEXT: addi sp, sp, 16
2970+
; RV32IZHINXMIN-NEXT: ret
2971+
;
2972+
; RV64IZHINXMIN-LABEL: tan_f16:
2973+
; RV64IZHINXMIN: # %bb.0:
2974+
; RV64IZHINXMIN-NEXT: addi sp, sp, -16
2975+
; RV64IZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
2976+
; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0
2977+
; RV64IZHINXMIN-NEXT: call tanf
2978+
; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0
2979+
; RV64IZHINXMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
2980+
; RV64IZHINXMIN-NEXT: addi sp, sp, 16
2981+
; RV64IZHINXMIN-NEXT: ret
2982+
%1 = call half @llvm.tan.f16(half %a)
2983+
ret half %1
2984+
}

llvm/test/CodeGen/WebAssembly/simd-unsupported.ll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,14 @@ define <4 x float> @cos_v4f32(<4 x float> %x) {
377377
ret <4 x float> %v
378378
}
379379

380+
; CHECK-LABEL: tan_v4f32:
381+
; CHECK: call $push[[L:[0-9]+]]=, tanf
382+
declare <4 x float> @llvm.tan.v4f32(<4 x float>)
383+
define <4 x float> @tan_v4f32(<4 x float> %x) {
384+
%v = call <4 x float> @llvm.tan.v4f32(<4 x float> %x)
385+
ret <4 x float> %v
386+
}
387+
380388
; CHECK-LABEL: powi_v4f32:
381389
; CHECK: call $push[[L:[0-9]+]]=, __powisf2
382390
declare <4 x float> @llvm.powi.v4f32.i32(<4 x float>, i32)
@@ -469,6 +477,14 @@ define <2 x double> @cos_v2f64(<2 x double> %x) {
469477
ret <2 x double> %v
470478
}
471479

480+
; CHECK-LABEL: tan_v2f64:
481+
; CHECK: call $push[[L:[0-9]+]]=, tan
482+
declare <2 x double> @llvm.tan.v2f64(<2 x double>)
483+
define <2 x double> @tan_v2f64(<2 x double> %x) {
484+
%v = call <2 x double> @llvm.tan.v2f64(<2 x double> %x)
485+
ret <2 x double> %v
486+
}
487+
472488
; CHECK-LABEL: powi_v2f64:
473489
; CHECK: call $push[[L:[0-9]+]]=, __powidf2
474490
declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32)

llvm/test/Transforms/LoopVectorize/intrinsic.ll

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,60 @@ for.end: ; preds = %for.body, %entry
162162

163163
declare double @llvm.cos.f64(double)
164164

165+
define void @tan_f32(i32 %n, ptr %y, ptr %x) {
166+
; CHECK-LABEL: @tan_f32(
167+
; CHECK: llvm.tan.v4f32
168+
; CHECK: ret void
169+
;
170+
entry:
171+
%cmp6 = icmp sgt i32 %n, 0
172+
br i1 %cmp6, label %for.body, label %for.end
173+
174+
for.body: ; preds = %entry, %for.body
175+
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
176+
%arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
177+
%0 = load float, ptr %arrayidx, align 4
178+
%call = tail call float @llvm.tan.f32(float %0)
179+
%arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
180+
store float %call, ptr %arrayidx2, align 4
181+
%indvars.iv.next = add i64 %indvars.iv, 1
182+
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
183+
%exitcond = icmp eq i32 %lftr.wideiv, %n
184+
br i1 %exitcond, label %for.end, label %for.body
185+
186+
for.end: ; preds = %for.body, %entry
187+
ret void
188+
}
189+
190+
declare float @llvm.tan.f32(float)
191+
192+
define void @tan_f64(i32 %n, ptr %y, ptr %x) {
193+
; CHECK-LABEL: @tan_f64(
194+
; CHECK: llvm.tan.v4f64
195+
; CHECK: ret void
196+
;
197+
entry:
198+
%cmp6 = icmp sgt i32 %n, 0
199+
br i1 %cmp6, label %for.body, label %for.end
200+
201+
for.body: ; preds = %entry, %for.body
202+
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
203+
%arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
204+
%0 = load double, ptr %arrayidx, align 8
205+
%call = tail call double @llvm.tan.f64(double %0)
206+
%arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
207+
store double %call, ptr %arrayidx2, align 8
208+
%indvars.iv.next = add i64 %indvars.iv, 1
209+
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
210+
%exitcond = icmp eq i32 %lftr.wideiv, %n
211+
br i1 %exitcond, label %for.end, label %for.body
212+
213+
for.end: ; preds = %for.body, %entry
214+
ret void
215+
}
216+
217+
declare double @llvm.tan.f64(double)
218+
165219
define void @exp_f32(i32 %n, ptr %y, ptr %x) {
166220
; CHECK-LABEL: @exp_f32(
167221
; CHECK: llvm.exp.v4f32

llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -548,13 +548,11 @@ define <4 x float> @tan_4x(ptr %a) {
548548
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
549549
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
550550
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
551-
; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
552-
; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
553-
; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
554-
; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
555-
; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
556-
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
557-
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
551+
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
552+
; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP3]])
553+
; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
554+
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
555+
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
558556
;
559557
entry:
560558
%0 = load <4 x float>, ptr %a, align 16

llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -548,13 +548,11 @@ define <4 x float> @tan_4x(ptr %a) {
548548
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
549549
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
550550
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
551-
; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
552-
; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
553-
; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
554-
; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
555-
; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
556-
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
557-
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
551+
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
552+
; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP3]])
553+
; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
554+
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
555+
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
558556
;
559557
entry:
560558
%0 = load <4 x float>, ptr %a, align 16

llvm/test/Transforms/SLPVectorizer/X86/call.ll

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ target triple = "x86_64-apple-macosx10.8.0"
66

77
declare double @sin(double) nounwind willreturn
88
declare double @cos(double) nounwind willreturn
9+
declare double @tan(double) nounwind willreturn
910
declare double @pow(double, double) nounwind willreturn
1011
declare double @exp2(double) nounwind willreturn
1112
declare double @sqrt(double) nounwind willreturn
@@ -48,6 +49,24 @@ define void @cos_libm(ptr %a, ptr %b) {
4849
ret void
4950
}
5051

52+
define void @tan_libm(ptr %a, ptr %b) {
53+
; CHECK-LABEL: @tan_libm(
54+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
55+
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.tan.v2f64(<2 x double> [[TMP2]])
56+
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
57+
; CHECK-NEXT: ret void
58+
;
59+
%a0 = load double, ptr %a, align 8
60+
%idx1 = getelementptr inbounds double, ptr %a, i64 1
61+
%a1 = load double, ptr %idx1, align 8
62+
%tan1 = tail call double @tan(double %a0) nounwind readnone
63+
%tan2 = tail call double @tan(double %a1) nounwind readnone
64+
store double %tan1, ptr %b, align 8
65+
%idx2 = getelementptr inbounds double, ptr %b, i64 1
66+
store double %tan2, ptr %idx2, align 8
67+
ret void
68+
}
69+
5170
define void @pow_libm(ptr %a, ptr %b) {
5271
; CHECK-LABEL: @pow_libm(
5372
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8

0 commit comments

Comments
 (0)