Skip to content

Commit 5a2bfb2

Browse files
tex3dDanielCChen
authored andcommitted
Fix scalar overload name constructed by ReplaceWithVeclib.cpp (llvm#111095)
ReplaceWithVeclib.cpp would construct overload name using all the arguments in the intrinsic, but overloads should only be constructed from arguments for which isVectorIntrinsicWithOverloadTypeAtArg returns true, including the return type first (index -1). Additionally, - skip when `Intrinsic::not_intrinsic`, otherwise `isVectorIntrinsicWithOverloadTypeAtArg` asserts for some IntrinsicCalls. Unblocks translation for pow and atan2 intrinsics. Fixes llvm#111093
1 parent 4684f5c commit 5a2bfb2

File tree

4 files changed

+31
-23
lines changed

4 files changed

+31
-23
lines changed

llvm/lib/CodeGen/ReplaceWithVeclib.cpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,20 +100,34 @@ static void replaceWithTLIFunction(IntrinsicInst *II, VFInfo &Info,
100100
static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
101101
IntrinsicInst *II) {
102102
assert(II != nullptr && "Intrinsic cannot be null");
103+
Intrinsic::ID IID = II->getIntrinsicID();
104+
Type *RetTy = II->getType();
105+
Type *ScalarRetTy = RetTy->getScalarType();
103106
// At the moment VFABI assumes the return type is always widened unless it is
104107
// a void type.
105-
auto *VTy = dyn_cast<VectorType>(II->getType());
108+
auto *VTy = dyn_cast<VectorType>(RetTy);
106109
ElementCount EC(VTy ? VTy->getElementCount() : ElementCount::getFixed(0));
110+
111+
// OloadTys collects types used in scalar intrinsic overload name.
112+
SmallVector<Type *, 3> OloadTys;
113+
if (!RetTy->isVoidTy() && isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
114+
OloadTys.push_back(ScalarRetTy);
115+
107116
// Compute the argument types of the corresponding scalar call and check that
108117
// all vector operands match the previously found EC.
109118
SmallVector<Type *, 8> ScalarArgTypes;
110-
Intrinsic::ID IID = II->getIntrinsicID();
111119
for (auto Arg : enumerate(II->args())) {
112120
auto *ArgTy = Arg.value()->getType();
121+
bool IsOloadTy = isVectorIntrinsicWithOverloadTypeAtArg(IID, Arg.index());
113122
if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) {
114123
ScalarArgTypes.push_back(ArgTy);
124+
if (IsOloadTy)
125+
OloadTys.push_back(ArgTy);
115126
} else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
116-
ScalarArgTypes.push_back(VectorArgTy->getElementType());
127+
auto *ScalarArgTy = VectorArgTy->getElementType();
128+
ScalarArgTypes.push_back(ScalarArgTy);
129+
if (IsOloadTy)
130+
OloadTys.push_back(ScalarArgTy);
117131
// When return type is void, set EC to the first vector argument, and
118132
// disallow vector arguments with different ECs.
119133
if (EC.isZero())
@@ -129,7 +143,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
129143
// using scalar argument types.
130144
std::string ScalarName =
131145
Intrinsic::isOverloaded(IID)
132-
? Intrinsic::getName(IID, ScalarArgTypes, II->getModule())
146+
? Intrinsic::getName(IID, OloadTys, II->getModule())
133147
: Intrinsic::getName(IID).str();
134148

135149
// Try to find the mapping for the scalar version of this intrinsic and the
@@ -146,7 +160,6 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
146160

147161
// Replace the call to the intrinsic with a call to the vector library
148162
// function.
149-
Type *ScalarRetTy = II->getType()->getScalarType();
150163
FunctionType *ScalarFTy =
151164
FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false);
152165
const std::string MangledName = VD->getVectorFunctionABIVariantString();
@@ -193,6 +206,8 @@ static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
193206
for (auto &I : instructions(F)) {
194207
// Process only intrinsic calls that return void or a vector.
195208
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
209+
if (II->getIntrinsicID() == Intrinsic::not_intrinsic)
210+
continue;
196211
if (!II->getType()->isVectorTy() && !II->getType()->isVoidTy())
197212
continue;
198213

llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
1515
declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
1616

1717
;.
18-
; CHECK: @llvm.compiler.used = appending global [60 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vtanq_f64, ptr @armpl_vtanq_f32, ptr @armpl_svtan_f64_x, ptr @armpl_svtan_f32_x, ptr @armpl_vacosq_f64, ptr @armpl_vacosq_f32, ptr @armpl_svacos_f64_x, ptr @armpl_svacos_f32_x, ptr @armpl_vasinq_f64, ptr @armpl_vasinq_f32, ptr @armpl_svasin_f64_x, ptr @armpl_svasin_f32_x, ptr @armpl_vatanq_f64, ptr @armpl_vatanq_f32, ptr @armpl_svatan_f64_x, ptr @armpl_svatan_f32_x, ptr @armpl_vcoshq_f64, ptr @armpl_vcoshq_f32, ptr @armpl_svcosh_f64_x, ptr @armpl_svcosh_f32_x, ptr @armpl_vsinhq_f64, ptr @armpl_vsinhq_f32, ptr @armpl_svsinh_f64_x, ptr @armpl_svsinh_f32_x, ptr @armpl_vtanhq_f64, ptr @armpl_vtanhq_f32, ptr @armpl_svtanh_f64_x, ptr @armpl_svtanh_f32_x], section "llvm.metadata"
18+
; CHECK: @llvm.compiler.used = appending global [64 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vpowq_f64, ptr @armpl_vpowq_f32, ptr @armpl_svpow_f64_x, ptr @armpl_svpow_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vtanq_f64, ptr @armpl_vtanq_f32, ptr @armpl_svtan_f64_x, ptr @armpl_svtan_f32_x, ptr @armpl_vacosq_f64, ptr @armpl_vacosq_f32, ptr @armpl_svacos_f64_x, ptr @armpl_svacos_f32_x, ptr @armpl_vasinq_f64, ptr @armpl_vasinq_f32, ptr @armpl_svasin_f64_x, ptr @armpl_svasin_f32_x, ptr @armpl_vatanq_f64, ptr @armpl_vatanq_f32, ptr @armpl_svatan_f64_x, ptr @armpl_svatan_f32_x, ptr @armpl_vcoshq_f64, ptr @armpl_vcoshq_f32, ptr @armpl_svcosh_f64_x, ptr @armpl_svcosh_f32_x, ptr @armpl_vsinhq_f64, ptr @armpl_vsinhq_f32, ptr @armpl_svsinh_f64_x, ptr @armpl_svsinh_f32_x, ptr @armpl_vtanhq_f64, ptr @armpl_vtanhq_f32, ptr @armpl_svtanh_f64_x, ptr @armpl_svtanh_f32_x], section "llvm.metadata"
1919

2020
;.
2121
define <2 x double> @llvm_cos_f64(<2 x double> %in) {
@@ -333,17 +333,10 @@ declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
333333
declare <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
334334
declare <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
335335

336-
;
337-
; There is a bug in the replace-with-veclib pass, and for intrinsics which take
338-
; more than one arguments, but has just one overloaded type, it incorrectly
339-
; reconstructs the scalar name, for pow specifically it is searching for:
340-
; llvm.pow.f64.f64 and llvm.pow.f32.f32
341-
;
342-
343336
define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %power) {
344337
; CHECK-LABEL: define <2 x double> @llvm_pow_f64
345338
; CHECK-SAME: (<2 x double> [[IN:%.*]], <2 x double> [[POWER:%.*]]) {
346-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.pow.v2f64(<2 x double> [[IN]], <2 x double> [[POWER]])
339+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vpowq_f64(<2 x double> [[IN]], <2 x double> [[POWER]])
347340
; CHECK-NEXT: ret <2 x double> [[TMP1]]
348341
;
349342
%1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %power)
@@ -353,7 +346,7 @@ define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %power) {
353346
define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %power) {
354347
; CHECK-LABEL: define <4 x float> @llvm_pow_f32
355348
; CHECK-SAME: (<4 x float> [[IN:%.*]], <4 x float> [[POWER:%.*]]) {
356-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.pow.v4f32(<4 x float> [[IN]], <4 x float> [[POWER]])
349+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vpowq_f32(<4 x float> [[IN]], <4 x float> [[POWER]])
357350
; CHECK-NEXT: ret <4 x float> [[TMP1]]
358351
;
359352
%1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %power)
@@ -363,7 +356,7 @@ define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %power) {
363356
define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vscale x 2 x double> %power) #0 {
364357
; CHECK-LABEL: define <vscale x 2 x double> @llvm_pow_vscale_f64
365358
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POWER:%.*]]) #[[ATTR1]] {
366-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> [[IN]], <vscale x 2 x double> [[POWER]])
359+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x double> [[POWER]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
367360
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
368361
;
369362
%1 = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> %in, <vscale x 2 x double> %power)
@@ -373,7 +366,7 @@ define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vs
373366
define <vscale x 4 x float> @llvm_pow_vscale_f32(<vscale x 4 x float> %in, <vscale x 4 x float> %power) #0 {
374367
; CHECK-LABEL: define <vscale x 4 x float> @llvm_pow_vscale_f32
375368
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POWER:%.*]]) #[[ATTR1]] {
376-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> [[IN]], <vscale x 4 x float> [[POWER]])
369+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x float> [[POWER]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
377370
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
378371
;
379372
%1 = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %in, <vscale x 4 x float> %power)

llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
target triple = "aarch64-unknown-linux-gnu"
55

66
;.
7-
; CHECK: @llvm.compiler.used = appending global [30 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxv_acos, ptr @_ZGVsMxv_acosf, ptr @_ZGVsMxv_asin, ptr @_ZGVsMxv_asinf, ptr @_ZGVsMxv_atan, ptr @_ZGVsMxv_atanf, ptr @_ZGVsMxv_cosh, ptr @_ZGVsMxv_coshf, ptr @_ZGVsMxv_sinh, ptr @_ZGVsMxv_sinhf, ptr @_ZGVsMxv_tanh, ptr @_ZGVsMxv_tanhf], section "llvm.metadata"
7+
; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxvv_pow, ptr @_ZGVsMxvv_powf, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxv_acos, ptr @_ZGVsMxv_acosf, ptr @_ZGVsMxv_asin, ptr @_ZGVsMxv_asinf, ptr @_ZGVsMxv_atan, ptr @_ZGVsMxv_atanf, ptr @_ZGVsMxv_cosh, ptr @_ZGVsMxv_coshf, ptr @_ZGVsMxv_sinh, ptr @_ZGVsMxv_sinhf, ptr @_ZGVsMxv_tanh, ptr @_ZGVsMxv_tanhf], section "llvm.metadata"
88
;.
99
define <vscale x 2 x double> @llvm_ceil_vscale_f64(<vscale x 2 x double> %in) {
1010
; CHECK-LABEL: @llvm_ceil_vscale_f64(
@@ -278,7 +278,7 @@ define <vscale x 4 x float> @llvm_nearbyint_vscale_f32(<vscale x 4 x float> %in)
278278

279279
define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vscale x 2 x double> %pow) {
280280
; CHECK-LABEL: @llvm_pow_vscale_f64(
281-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POW:%.*]])
281+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POW:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
282282
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
283283
;
284284
%1 = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> %in, <vscale x 2 x double> %pow)
@@ -287,7 +287,7 @@ define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vs
287287

288288
define <vscale x 4 x float> @llvm_pow_vscale_f32(<vscale x 4 x float> %in, <vscale x 4 x float> %pow) {
289289
; CHECK-LABEL: @llvm_pow_vscale_f32(
290-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POW:%.*]])
290+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POW:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
291291
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
292292
;
293293
%1 = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %in, <vscale x 4 x float> %pow)

llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
target triple = "aarch64-unknown-linux-gnu"
55

66
;.
7-
; CHECK: @llvm.compiler.used = appending global [30 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2v_acos, ptr @_ZGVnN4v_acosf, ptr @_ZGVnN2v_asin, ptr @_ZGVnN4v_asinf, ptr @_ZGVnN2v_atan, ptr @_ZGVnN4v_atanf, ptr @_ZGVnN2v_cosh, ptr @_ZGVnN4v_coshf, ptr @_ZGVnN2v_sinh, ptr @_ZGVnN4v_sinhf, ptr @_ZGVnN2v_tanh, ptr @_ZGVnN4v_tanhf], section "llvm.metadata"
7+
; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2vv_pow, ptr @_ZGVnN4vv_powf, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2v_acos, ptr @_ZGVnN4v_acosf, ptr @_ZGVnN2v_asin, ptr @_ZGVnN4v_asinf, ptr @_ZGVnN2v_atan, ptr @_ZGVnN4v_atanf, ptr @_ZGVnN2v_cosh, ptr @_ZGVnN4v_coshf, ptr @_ZGVnN2v_sinh, ptr @_ZGVnN4v_sinhf, ptr @_ZGVnN2v_tanh, ptr @_ZGVnN4v_tanhf], section "llvm.metadata"
88
;.
99
define <2 x double> @llvm_ceil_f64(<2 x double> %in) {
1010
; CHECK-LABEL: @llvm_ceil_f64(
@@ -278,7 +278,7 @@ define <4 x float> @llvm_nearbyint_f32(<4 x float> %in) {
278278

279279
define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %pow) {
280280
; CHECK-LABEL: @llvm_pow_f64(
281-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.pow.v2f64(<2 x double> [[IN:%.*]], <2 x double> [[POW:%.*]])
281+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2vv_pow(<2 x double> [[IN:%.*]], <2 x double> [[POW:%.*]])
282282
; CHECK-NEXT: ret <2 x double> [[TMP1]]
283283
;
284284
%1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %pow)
@@ -287,7 +287,7 @@ define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %pow) {
287287

288288
define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %pow) {
289289
; CHECK-LABEL: @llvm_pow_f32(
290-
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.pow.v4f32(<4 x float> [[IN:%.*]], <4 x float> [[POW:%.*]])
290+
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4vv_powf(<4 x float> [[IN:%.*]], <4 x float> [[POW:%.*]])
291291
; CHECK-NEXT: ret <4 x float> [[TMP1]]
292292
;
293293
%1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %pow)

0 commit comments

Comments
 (0)