diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 3c178aa789970..64c61dd8c0558 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15760,7 +15760,8 @@ Syntax: """"""" This is an overloaded intrinsic. You can use ``llvm.lrint`` on any -floating-point type. Not all targets support all types however. +floating-point type or vector of floating-point type. Not all targets +support all types however. :: @@ -15804,7 +15805,8 @@ Syntax: """"""" This is an overloaded intrinsic. You can use ``llvm.llrint`` on any -floating-point type. Not all targets support all types however. +floating-point type or vector of floating-point type. Not all targets +support all types however. :: diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 3dd16dafe3c42..eb7511770619f 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1847,6 +1847,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::rint: ISD = ISD::FRINT; break; + case Intrinsic::lrint: + ISD = ISD::LRINT; + break; + case Intrinsic::llrint: + ISD = ISD::LLRINT; + break; case Intrinsic::round: ISD = ISD::FROUND; break; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2dfdddad3cc38..1add486255b89 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -505,6 +505,7 @@ namespace { SDValue visitUINT_TO_FP(SDNode *N); SDValue visitFP_TO_SINT(SDNode *N); SDValue visitFP_TO_UINT(SDNode *N); + SDValue visitXRINT(SDNode *N); SDValue visitFP_ROUND(SDNode *N); SDValue visitFP_EXTEND(SDNode *N); SDValue visitFNEG(SDNode *N); @@ -1911,6 +1912,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { } SDValue DAGCombiner::visit(SDNode *N) { + // clang-format off switch (N->getOpcode()) { default: break; case ISD::TokenFactor: return visitTokenFactor(N); @@ -2011,6 +2013,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); + case ISD::LRINT: + case ISD::LLRINT: return visitXRINT(N); case ISD::FP_ROUND: return visitFP_ROUND(N); case ISD::FP_EXTEND: return visitFP_EXTEND(N); case ISD::FNEG: return visitFNEG(N); @@ -2065,6 +2069,7 @@ SDValue DAGCombiner::visit(SDNode *N) { #include "llvm/IR/VPIntrinsics.def" return visitVPOp(N); } + // clang-format on return SDValue(); } @@ -17480,6 +17485,21 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { return FoldIntToFPToInt(N, DAG); } +SDValue DAGCombiner::visitXRINT(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (lrint|llrint undef) -> undef + if (N0.isUndef()) + return DAG.getUNDEF(VT); + + // fold (lrint|llrint c1fp) -> c1 + if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0); + + return SDValue(); +} + SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 95f1812178035..1c429546a8a73 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2198,6 +2198,7 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { // to use the promoted float operand. Nodes that produce at least one // promotion-requiring floating point result have their operands legalized as // a part of PromoteFloatResult. + // clang-format off switch (N->getOpcode()) { default: #ifndef NDEBUG @@ -2209,7 +2210,9 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break; case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break; case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break; + case ISD::FP_TO_UINT: + case ISD::LRINT: + case ISD::LLRINT: R = PromoteFloatOp_UnaryOp(N, OpNo); break; case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: R = PromoteFloatOp_FP_TO_XINT_SAT(N, OpNo); break; @@ -2218,6 +2221,7 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break; case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break; } + // clang-format on if (R.getNode()) ReplaceValueWith(SDValue(N, 0), R); @@ -2251,7 +2255,7 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) { } // Convert the promoted float value to the desired integer type -SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) { +SDValue DAGTypeLegalizer::PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo) { SDValue Op = GetPromotedFloat(N->getOperand(0)); return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index c802604a3470e..33726267750ce 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -711,7 +711,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); - SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index ddc3b94e9c29a..b3f1e5d5a8182 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -402,6 +402,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FCEIL: case ISD::FTRUNC: case ISD::FRINT: + case ISD::LRINT: + case ISD::LLRINT: case ISD::FNEARBYINT: case ISD::FROUND: case ISD::FROUNDEVEN: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 1bb6fbbf064b9..2c5343c3c4b16 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -101,6 +101,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: + case ISD::LRINT: + case ISD::LLRINT: case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: @@ -681,6 +683,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: + case ISD::LRINT: + case ISD::LLRINT: Res = ScalarizeVecOp_UnaryOp(N); break; case ISD::STRICT_SINT_TO_FP: @@ -1097,6 +1101,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_FP_TO_UINT: case ISD::FRINT: case ISD::VP_FRINT: + case ISD::LRINT: + case ISD::LLRINT: case ISD::FROUND: case ISD::VP_FROUND: case ISD::FROUNDEVEN: @@ -2974,6 +2980,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: case ISD::FTRUNC: + case ISD::LRINT: + case ISD::LLRINT: Res = SplitVecOp_UnaryOp(N); break; case ISD::FLDEXP: @@ -4209,6 +4217,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FLOG2: case ISD::FNEARBYINT: case ISD::FRINT: + case ISD::LRINT: + case ISD::LLRINT: case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: @@ -5958,7 +5968,11 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break; case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break; case ISD::FLDEXP: - case ISD::FCOPYSIGN: Res = WidenVecOp_UnrollVectorOp(N); break; + case ISD::FCOPYSIGN: + case ISD::LRINT: + case ISD::LLRINT: + Res = WidenVecOp_UnrollVectorOp(N); + break; case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break; case ISD::ANY_EXTEND: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 3c131d9247d72..d243b858f4e2f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5135,6 +5135,8 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FRINT: + case ISD::LRINT: + case ISD::LLRINT: case ISD::FNEARBYINT: case ISD::FLDEXP: { if (SNaN) diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 3e4bff5ddce12..99eadf4bb9d57 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -873,13 +873,13 @@ void TargetLoweringBase::initActions() { // These operations default to expand for vector types. if (VT.isVector()) - setOperationAction({ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, - ISD::ANY_EXTEND_VECTOR_INREG, - ISD::SIGN_EXTEND_VECTOR_INREG, - ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR}, - VT, Expand); + setOperationAction( + {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG, + ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG, + ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT}, + VT, Expand); - // Constrained floating-point operations default to expand. + // Constrained floating-point operations default to expand. #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ setOperationAction(ISD::STRICT_##DAGN, VT, Expand); #include "llvm/IR/ConstrainedOps.def" diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 3c4efd7e359c5..6d7d4b758ad3f 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5669,10 +5669,28 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { } break; } - case Intrinsic::lround: - case Intrinsic::llround: case Intrinsic::lrint: case Intrinsic::llrint: { + Type *ValTy = Call.getArgOperand(0)->getType(); + Type *ResultTy = Call.getType(); + Check( + ValTy->isFPOrFPVectorTy() && ResultTy->isIntOrIntVectorTy(), + "llvm.lrint, llvm.llrint: argument must be floating-point or vector " + "of floating-points, and result must be integer or vector of integers", + &Call); + Check(ValTy->isVectorTy() == ResultTy->isVectorTy(), + "llvm.lrint, llvm.llrint: argument and result disagree on vector use", + &Call); + if (ValTy->isVectorTy()) { + Check(cast(ValTy)->getElementCount() == + cast(ResultTy)->getElementCount(), + "llvm.lrint, llvm.llrint: argument must be same length as result", + &Call); + } + break; + } + case Intrinsic::lround: + case Intrinsic::llround: { Type *ValTy = Call.getArgOperand(0)->getType(); Type *ResultTy = Call.getType(); Check(!ValTy->isVectorTy() && !ResultTy->isVectorTy(), diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index e8f001e491cdc..fcc262e246cb7 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -731,7 +731,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, VT, Custom); setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, Custom); - + setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom); setOperationAction( {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal); @@ -2950,6 +2950,31 @@ lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT())); } +// Expand vector LRINT and LLRINT by converting to the integer domain. +static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT VT = Op.getSimpleValueType(); + assert(VT.isVector() && "Unexpected type"); + + SDLoc DL(Op); + SDValue Src = Op.getOperand(0); + MVT ContainerVT = VT; + + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); + } + + auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + SDValue Truncated = + DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL); + + if (!VT.isFixedLengthVector()) + return Truncated; + + return convertFromScalableVector(VT, Truncated, DAG, Subtarget); +} + static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, @@ -5978,6 +6003,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::FROUND: case ISD::FROUNDEVEN: return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); + case ISD::LRINT: + case ISD::LLRINT: + return lowerVectorXRINT(Op, DAG, Subtarget); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_SMAX: diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index e947d4d1e8acd..09f9b6035aeb0 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -668,6 +668,31 @@ static const CostTblEntry VectorIntrinsicCostTable[]{ {Intrinsic::rint, MVT::nxv2f64, 7}, {Intrinsic::rint, MVT::nxv4f64, 7}, {Intrinsic::rint, MVT::nxv8f64, 7}, + {Intrinsic::lrint, MVT::v2i32, 1}, + {Intrinsic::lrint, MVT::v4i32, 1}, + {Intrinsic::lrint, MVT::v8i32, 1}, + {Intrinsic::lrint, MVT::v16i32, 1}, + {Intrinsic::lrint, MVT::nxv1i32, 1}, + {Intrinsic::lrint, MVT::nxv2i32, 1}, + {Intrinsic::lrint, MVT::nxv4i32, 1}, + {Intrinsic::lrint, MVT::nxv8i32, 1}, + {Intrinsic::lrint, MVT::nxv16i32, 1}, + {Intrinsic::lrint, MVT::v2i64, 1}, + {Intrinsic::lrint, MVT::v4i64, 1}, + {Intrinsic::lrint, MVT::v8i64, 1}, + {Intrinsic::lrint, MVT::v16i64, 1}, + {Intrinsic::lrint, MVT::nxv1i64, 1}, + {Intrinsic::lrint, MVT::nxv2i64, 1}, + {Intrinsic::lrint, MVT::nxv4i64, 1}, + {Intrinsic::lrint, MVT::nxv8i64, 1}, + {Intrinsic::llrint, MVT::v2i64, 1}, + {Intrinsic::llrint, MVT::v4i64, 1}, + {Intrinsic::llrint, MVT::v8i64, 1}, + {Intrinsic::llrint, MVT::v16i64, 1}, + {Intrinsic::llrint, MVT::nxv1i64, 1}, + {Intrinsic::llrint, MVT::nxv2i64, 1}, + {Intrinsic::llrint, MVT::nxv4i64, 1}, + {Intrinsic::llrint, MVT::nxv8i64, 1}, {Intrinsic::nearbyint, MVT::v2f32, 9}, {Intrinsic::nearbyint, MVT::v4f32, 9}, {Intrinsic::nearbyint, MVT::v8f32, 9}, @@ -1051,6 +1076,8 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, case Intrinsic::floor: case Intrinsic::trunc: case Intrinsic::rint: + case Intrinsic::lrint: + case Intrinsic::llrint: case Intrinsic::round: case Intrinsic::roundeven: { // These all use the same code. diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll index 84fc4c454b7f7..71dd64d765128 100644 --- a/llvm/test/Analysis/CostModel/RISCV/fround.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll @@ -181,6 +181,96 @@ define void @rint() { ret void } +define void @lrint() { +; CHECK-LABEL: 'lrint' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.lrint.i64.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.lrint.nxv1i64.nxv1f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.lrint.nxv2i64.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.lrint.nxv4i64.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.lrint.nxv8i64.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.lrint.nxv16i64.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call i64 @llvm.lrint.i64.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.lrint.nxv1i64.nxv1f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.lrint.nxv2i64.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.lrint.nxv4i64.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.lrint.nxv8i64.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call i64 @llvm.lrint.i64.f32(float undef) + call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> undef) + call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> undef) + call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> undef) + call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> undef) + call @llvm.lrint.nvx1i64.nvx1f32( undef) + call @llvm.lrint.nvx2i64.nvx2f32( undef) + call @llvm.lrint.nvx4i64.nvx4f32( undef) + call @llvm.lrint.nvx8i64.nvx8f32( undef) + call @llvm.lrint.nvx16i64.nvx16f32( undef) + call i64 @llvm.lrint.i64.f64(double undef) + call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> undef) + call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> undef) + call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> undef) + call <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double> undef) + call @llvm.lrint.nvx1i64.nvx1f64( undef) + call @llvm.lrint.nvx2i64.nvx2f64( undef) + call @llvm.lrint.nvx4i64.nvx4f64( undef) + call @llvm.lrint.nvx8i64.nvx8f64( undef) + ret void +} + +define void @llrint() { +; CHECK-LABEL: 'llrint' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.llrint.i64.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.llrint.nxv1i64.nxv1f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.llrint.nxv2i64.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.llrint.nxv4i64.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.llrint.nxv8i64.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.llrint.nxv16i64.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call i64 @llvm.llrint.i64.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.llrint.nxv1i64.nxv1f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.llrint.nxv2i64.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.llrint.nxv4i64.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.llrint.nxv8i64.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call i64 @llvm.llrint.i64.f32(float undef) + call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> undef) + call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef) + call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef) + call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef) + call @llvm.llrint.nvx1i64.nvx1f32( undef) + call @llvm.llrint.nvx2i64.nvx2f32( undef) + call @llvm.llrint.nvx4i64.nvx4f32( undef) + call @llvm.llrint.nvx8i64.nvx8f32( undef) + call @llvm.llrint.nvx16i64.nvx16f32( undef) + call i64 @llvm.llrint.i64.f64(double undef) + call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef) + call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef) + call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef) + call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> undef) + call @llvm.llrint.nvx1i64.nvx1f64( undef) + call @llvm.llrint.nvx2i64.nvx2f64( undef) + call @llvm.llrint.nvx4i64.nvx4f64( undef) + call @llvm.llrint.nvx8i64.nvx8f64( undef) + ret void +} + define void @nearbyint() { ; CHECK-LABEL: 'nearbyint' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.nearbyint.f32(float undef) @@ -683,6 +773,46 @@ declare @llvm.rint.nvx2f64() declare @llvm.rint.nvx4f64() declare @llvm.rint.nvx8f64() +declare i64 @llvm.lrint.i64.f32(float) +declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float>) +declare <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float>) +declare <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float>) +declare <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float>) +declare @llvm.lrint.nvx1i64.nvx1f32() +declare @llvm.lrint.nvx2i64.nvx2f32() +declare @llvm.lrint.nvx4i64.nvx4f32() +declare @llvm.lrint.nvx8i64.nvx8f32() +declare @llvm.lrint.nvx16i64.nvx16f32() +declare i64 @llvm.lrint.i64.f64(double) +declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>) +declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>) +declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>) +declare <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double>) +declare @llvm.lrint.nvx1i64.nvx1f64() +declare @llvm.lrint.nvx2i64.nvx2f64() +declare @llvm.lrint.nvx4i64.nvx4f64() +declare @llvm.lrint.nvx8i64.nvx8f64() + +declare i64 @llvm.llrint.i64.f32(float) +declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>) +declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>) +declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) +declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) +declare @llvm.llrint.nvx1i64.nvx1f32() +declare @llvm.llrint.nvx2i64.nvx2f32() +declare @llvm.llrint.nvx4i64.nvx4f32() +declare @llvm.llrint.nvx8i64.nvx8f32() +declare @llvm.llrint.nvx16i64.nvx16f32() +declare i64 @llvm.llrint.i64.f64(double) +declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>) +declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) +declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) +declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>) +declare @llvm.llrint.nvx1i64.nvx1f64() +declare @llvm.llrint.nvx2i64.nvx2f64() +declare @llvm.llrint.nvx4i64.nvx4f64() +declare @llvm.llrint.nvx8i64.nvx8f64() + declare float @llvm.nearbyint.f32(float) declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll new file mode 100644 index 0000000000000..beb2b6a134600 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll @@ -0,0 +1,621 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s + +define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { +; CHECK-LABEL: llrint_v1i64_v1f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %a = call <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>) + +define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { +; CHECK-LABEL: llrint_v1i64_v2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: ret + %a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>) + +define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { +; CHECK-LABEL: llrint_v4i64_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[2] +; CHECK-NEXT: mov h2, v0.h[1] +; CHECK-NEXT: mov h3, v0.h[3] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fcvt s2, h2 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: ret + %a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>) + +define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { +; CHECK-LABEL: llrint_v8i64_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov h4, v0.h[2] +; CHECK-NEXT: mov h3, v0.h[1] +; CHECK-NEXT: mov h7, v0.h[3] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: mov h2, v1.h[2] +; CHECK-NEXT: mov h5, v1.h[1] +; CHECK-NEXT: mov h6, v1.h[3] +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fcvt s4, h4 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fcvt s7, h7 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvt s2, h2 +; CHECK-NEXT: fcvt s5, h5 +; CHECK-NEXT: fcvt s6, h6 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: frintx s7, s7 +; CHECK-NEXT: fcvtzs x9, s0 +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: fcvtzs x12, s4 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fcvtzs x15, s7 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: fcvtzs x13, s5 +; CHECK-NEXT: fcvtzs x14, s6 +; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: fmov d1, x12 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: fmov d3, x10 +; CHECK-NEXT: mov v2.d[1], x13 +; CHECK-NEXT: mov v1.d[1], x15 +; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: ret + %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>) + +define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { +; CHECK-LABEL: llrint_v16i64_v16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: mov h17, v0.h[1] +; CHECK-NEXT: mov h19, v0.h[2] +; CHECK-NEXT: fcvt s18, h0 +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: mov h4, v2.h[1] +; CHECK-NEXT: mov h5, v2.h[2] +; CHECK-NEXT: fcvt s7, h3 +; CHECK-NEXT: fcvt s6, h2 +; CHECK-NEXT: mov h16, v3.h[2] +; CHECK-NEXT: mov h2, v2.h[3] +; CHECK-NEXT: fcvt s17, h17 +; CHECK-NEXT: fcvt s19, h19 +; CHECK-NEXT: frintx s18, s18 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s4, h4 +; CHECK-NEXT: fcvt s5, h5 +; CHECK-NEXT: frintx s7, s7 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvt s16, h16 +; CHECK-NEXT: fcvt s2, h2 +; CHECK-NEXT: frintx s17, s17 +; CHECK-NEXT: frintx s19, s19 +; CHECK-NEXT: fcvtzs x13, s18 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: fcvtzs x9, s7 +; CHECK-NEXT: mov h7, v1.h[2] +; CHECK-NEXT: fcvtzs x8, s6 +; CHECK-NEXT: mov h6, v1.h[1] +; CHECK-NEXT: frintx s16, s16 +; CHECK-NEXT: fcvtzs x14, s17 +; CHECK-NEXT: fcvtzs x15, s19 +; CHECK-NEXT: fcvtzs x10, s4 +; CHECK-NEXT: mov h4, v3.h[1] +; CHECK-NEXT: fcvtzs x11, s5 +; CHECK-NEXT: mov h5, v1.h[3] +; CHECK-NEXT: mov h3, v3.h[3] +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fcvt s7, h7 +; CHECK-NEXT: fcvt s6, h6 +; CHECK-NEXT: fcvtzs x12, s16 +; CHECK-NEXT: frintx s16, s2 +; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: fcvt s4, h4 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fcvt s5, h5 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: frintx s7, s7 +; CHECK-NEXT: frintx s17, s6 +; CHECK-NEXT: fmov d6, x9 +; CHECK-NEXT: mov v2.d[1], x10 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: frintx s18, s3 +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: fcvtzs x9, s7 +; CHECK-NEXT: fmov d3, x11 +; CHECK-NEXT: fcvtzs x11, s0 +; CHECK-NEXT: fmov d7, x12 +; CHECK-NEXT: fcvtzs x12, s16 +; CHECK-NEXT: fcvtzs x16, s17 +; CHECK-NEXT: fcvtzs x17, s4 +; CHECK-NEXT: fmov d0, x13 +; CHECK-NEXT: fmov d1, x15 +; CHECK-NEXT: fcvtzs x18, s18 +; CHECK-NEXT: fcvtzs x0, s5 +; CHECK-NEXT: fmov d4, x8 +; CHECK-NEXT: fmov d5, x9 +; CHECK-NEXT: mov v0.d[1], x14 +; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: mov v3.d[1], x12 +; CHECK-NEXT: mov v4.d[1], x16 +; CHECK-NEXT: mov v6.d[1], x17 +; CHECK-NEXT: mov v7.d[1], x18 +; CHECK-NEXT: mov v5.d[1], x0 +; CHECK-NEXT: ret + %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>) + +define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { +; CHECK-LABEL: llrint_v32i64_v32f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v5.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v6.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov h19, v0.h[1] +; CHECK-NEXT: fcvt s21, h0 +; CHECK-NEXT: mov h23, v1.h[2] +; CHECK-NEXT: fcvt s22, h1 +; CHECK-NEXT: fcvt s26, h2 +; CHECK-NEXT: mov h27, v2.h[1] +; CHECK-NEXT: mov h28, v2.h[2] +; CHECK-NEXT: mov h16, v4.h[2] +; CHECK-NEXT: fcvt s17, h5 +; CHECK-NEXT: mov h18, v5.h[2] +; CHECK-NEXT: mov h20, v6.h[2] +; CHECK-NEXT: fcvt s24, h7 +; CHECK-NEXT: fcvt s25, h6 +; CHECK-NEXT: fcvt s19, h19 +; CHECK-NEXT: frintx s22, s22 +; CHECK-NEXT: fcvt s16, h16 +; CHECK-NEXT: frintx s17, s17 +; CHECK-NEXT: fcvt s18, h18 +; CHECK-NEXT: fcvt s20, h20 +; CHECK-NEXT: frintx s16, s16 +; CHECK-NEXT: fcvtzs x12, s17 +; CHECK-NEXT: frintx s17, s18 +; CHECK-NEXT: frintx s18, s21 +; CHECK-NEXT: fcvt s21, h23 +; CHECK-NEXT: frintx s23, s24 +; CHECK-NEXT: frintx s24, s25 +; CHECK-NEXT: frintx s25, s19 +; CHECK-NEXT: mov h19, v7.h[1] +; CHECK-NEXT: fcvtzs x13, s16 +; CHECK-NEXT: frintx s16, s20 +; CHECK-NEXT: frintx s20, s26 +; CHECK-NEXT: fcvtzs x9, s23 +; CHECK-NEXT: mov h23, v3.h[2] +; CHECK-NEXT: fcvt s26, h27 +; CHECK-NEXT: fcvtzs x15, s24 +; CHECK-NEXT: fcvtzs x10, s25 +; CHECK-NEXT: fcvt s24, h28 +; CHECK-NEXT: mov h25, v3.h[3] +; CHECK-NEXT: fcvtzs x14, s17 +; CHECK-NEXT: frintx s21, s21 +; CHECK-NEXT: fmov d17, x12 +; CHECK-NEXT: fcvtzs x12, s16 +; CHECK-NEXT: fmov d16, x13 +; CHECK-NEXT: fcvtzs x13, s22 +; CHECK-NEXT: fcvt s22, h3 +; CHECK-NEXT: mov h3, v3.h[1] +; CHECK-NEXT: mov h27, v0.h[2] +; CHECK-NEXT: mov h28, v2.h[3] +; CHECK-NEXT: fcvt s23, h23 +; CHECK-NEXT: frintx s26, s26 +; CHECK-NEXT: fcvtzs x16, s20 +; CHECK-NEXT: frintx s20, s24 +; CHECK-NEXT: fcvt s24, h25 +; CHECK-NEXT: fcvtzs x11, s18 +; CHECK-NEXT: fmov d18, x14 +; CHECK-NEXT: fcvtzs x14, s21 +; CHECK-NEXT: frintx s22, s22 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fcvt s25, h27 +; CHECK-NEXT: fcvt s27, h28 +; CHECK-NEXT: frintx s23, s23 +; CHECK-NEXT: mov h21, v1.h[3] +; CHECK-NEXT: fmov d2, x15 +; CHECK-NEXT: fcvtzs x15, s26 +; CHECK-NEXT: fmov d26, x13 +; CHECK-NEXT: mov h1, v1.h[1] +; CHECK-NEXT: fcvtzs x13, s20 +; CHECK-NEXT: frintx s20, s24 +; CHECK-NEXT: fmov d24, x14 +; CHECK-NEXT: fcvtzs x14, s22 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: fmov d22, x16 +; CHECK-NEXT: frintx s27, s27 +; CHECK-NEXT: fcvtzs x16, s23 +; CHECK-NEXT: fcvt s21, h21 +; CHECK-NEXT: frintx s25, s25 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: mov h23, v7.h[2] +; CHECK-NEXT: mov v22.d[1], x15 +; CHECK-NEXT: fcvtzs x15, s20 +; CHECK-NEXT: fmov d20, x13 +; CHECK-NEXT: fcvtzs x13, s3 +; CHECK-NEXT: fmov d3, x14 +; CHECK-NEXT: fcvtzs x14, s27 +; CHECK-NEXT: fmov d27, x16 +; CHECK-NEXT: frintx s21, s21 +; CHECK-NEXT: mov h7, v7.h[3] +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s23, h23 +; CHECK-NEXT: fcvt s19, h19 +; CHECK-NEXT: mov v27.d[1], x15 +; CHECK-NEXT: fcvtzs x15, s25 +; CHECK-NEXT: mov h25, v6.h[3] +; CHECK-NEXT: mov h6, v6.h[1] +; CHECK-NEXT: mov v3.d[1], x13 +; CHECK-NEXT: fcvtzs x13, s21 +; CHECK-NEXT: mov h21, v5.h[1] +; CHECK-NEXT: mov h5, v5.h[3] +; CHECK-NEXT: mov v20.d[1], x14 +; CHECK-NEXT: fcvtzs x14, s1 +; CHECK-NEXT: mov h1, v4.h[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvt s25, h25 +; CHECK-NEXT: fcvt s7, h7 +; CHECK-NEXT: stp q3, q27, [x8, #192] +; CHECK-NEXT: fcvt s6, h6 +; CHECK-NEXT: mov h3, v4.h[3] +; CHECK-NEXT: stp q22, q20, [x8, #128] +; CHECK-NEXT: fcvt s21, h21 +; CHECK-NEXT: fcvt s5, h5 +; CHECK-NEXT: mov v24.d[1], x13 +; CHECK-NEXT: mov v26.d[1], x14 +; CHECK-NEXT: fcvt s4, h4 +; CHECK-NEXT: frintx s22, s25 +; CHECK-NEXT: fmov d20, x12 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fcvtzs x12, s0 +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: frintx s21, s21 +; CHECK-NEXT: fmov d0, x11 +; CHECK-NEXT: stp q26, q24, [x8, #64] +; CHECK-NEXT: fmov d24, x15 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: fcvtzs x11, s22 +; CHECK-NEXT: frintx s22, s23 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvtzs x13, s6 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: frintx s6, s7 +; CHECK-NEXT: fcvtzs x14, s5 +; CHECK-NEXT: mov v24.d[1], x12 +; CHECK-NEXT: frintx s5, s19 +; CHECK-NEXT: fcvtzs x12, s21 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: fcvtzs x10, s4 +; CHECK-NEXT: mov v20.d[1], x11 +; CHECK-NEXT: fcvtzs x11, s22 +; CHECK-NEXT: mov v2.d[1], x13 +; CHECK-NEXT: fcvtzs x15, s3 +; CHECK-NEXT: fcvtzs x13, s1 +; CHECK-NEXT: mov v18.d[1], x14 +; CHECK-NEXT: fcvtzs x14, s6 +; CHECK-NEXT: stp q0, q24, [x8] +; CHECK-NEXT: mov v17.d[1], x12 +; CHECK-NEXT: fcvtzs x12, s5 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d1, x11 +; CHECK-NEXT: stp q2, q20, [x8, #224] +; CHECK-NEXT: fmov d2, x9 +; CHECK-NEXT: mov v16.d[1], x15 +; CHECK-NEXT: stp q17, q18, [x8, #160] +; CHECK-NEXT: mov v0.d[1], x13 +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: mov v2.d[1], x12 +; CHECK-NEXT: stp q0, q16, [x8, #96] +; CHECK-NEXT: stp q2, q1, [x8, #32] +; CHECK-NEXT: ret + %a = call <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>) + +define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { +; CHECK-LABEL: llrint_v1i64_v1f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>) + +define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { +; CHECK-LABEL: llrint_v2i64_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: ret + %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>) + +define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { +; CHECK-LABEL: llrint_v4i64_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov s3, v0.s[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: mov s2, v1.s[1] +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: fcvtzs x9, s0 +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v1.d[1], x10 +; CHECK-NEXT: ret + %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>) + +define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { +; CHECK-LABEL: llrint_v8i64_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: mov s4, v0.s[1] +; CHECK-NEXT: mov s7, v1.s[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: mov s5, v2.s[1] +; CHECK-NEXT: mov s6, v3.s[1] +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: frintx s7, s7 +; CHECK-NEXT: fcvtzs x9, s0 +; CHECK-NEXT: fcvtzs x12, s1 +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvtzs x8, s2 +; CHECK-NEXT: fcvtzs x10, s3 +; CHECK-NEXT: fcvtzs x11, s4 +; CHECK-NEXT: fcvtzs x15, s7 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fmov d2, x12 +; CHECK-NEXT: fcvtzs x13, s5 +; CHECK-NEXT: fcvtzs x14, s6 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fmov d3, x10 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v2.d[1], x15 +; CHECK-NEXT: mov v1.d[1], x13 +; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: ret + %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) + +define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { +; CHECK-LABEL: llrint_v16i64_v16f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v5.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: frintx s7, s0 +; CHECK-NEXT: ext v16.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: frintx s17, s4 +; CHECK-NEXT: mov s4, v4.s[1] +; CHECK-NEXT: mov s18, v5.s[1] +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: frintx s19, s6 +; CHECK-NEXT: fcvtzs x8, s7 +; CHECK-NEXT: frintx s7, s16 +; CHECK-NEXT: mov s6, v6.s[1] +; CHECK-NEXT: mov s16, v16.s[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: fcvtzs x9, s17 +; CHECK-NEXT: frintx s17, s1 +; CHECK-NEXT: mov s1, v1.s[1] +; CHECK-NEXT: frintx s18, s18 +; CHECK-NEXT: fcvtzs x10, s5 +; CHECK-NEXT: mov s5, v2.s[1] +; CHECK-NEXT: fcvtzs x11, s19 +; CHECK-NEXT: mov s19, v3.s[1] +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: fcvtzs x12, s7 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvtzs x13, s4 +; CHECK-NEXT: frintx s4, s3 +; CHECK-NEXT: frintx s16, s16 +; CHECK-NEXT: fcvtzs x14, s18 +; CHECK-NEXT: frintx s18, s1 +; CHECK-NEXT: fcvtzs x15, s17 +; CHECK-NEXT: frintx s20, s5 +; CHECK-NEXT: frintx s17, s19 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fcvtzs x9, s2 +; CHECK-NEXT: fmov d5, x11 +; CHECK-NEXT: fmov d3, x10 +; CHECK-NEXT: fcvtzs x11, s4 +; CHECK-NEXT: fcvtzs x10, s0 +; CHECK-NEXT: fmov d7, x12 +; CHECK-NEXT: fcvtzs x12, s18 +; CHECK-NEXT: fcvtzs x17, s6 +; CHECK-NEXT: fcvtzs x18, s16 +; CHECK-NEXT: fcvtzs x16, s20 +; CHECK-NEXT: fcvtzs x0, s17 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d2, x15 +; CHECK-NEXT: fmov d4, x9 +; CHECK-NEXT: mov v1.d[1], x13 +; CHECK-NEXT: fmov d6, x11 +; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v5.d[1], x17 +; CHECK-NEXT: mov v7.d[1], x18 +; CHECK-NEXT: mov v2.d[1], x12 +; CHECK-NEXT: mov v4.d[1], x16 +; CHECK-NEXT: mov v6.d[1], x0 +; CHECK-NEXT: ret + %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) + +define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { +; CHECK-LABEL: llrint_v1i64_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>) + +define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { +; CHECK-LABEL: llrint_v2i64_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: ret + %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>) + +define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { +; CHECK-LABEL: llrint_v4i64_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d2, v0.d[1] +; CHECK-NEXT: mov d3, v1.d[1] +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: frintx d2, d2 +; CHECK-NEXT: frintx d3, d3 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: fcvtzs x10, d2 +; CHECK-NEXT: fcvtzs x11, d3 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: ret + %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) + +define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { +; CHECK-LABEL: llrint_v8i64_v8f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d4, v0.d[1] +; CHECK-NEXT: mov d5, v1.d[1] +; CHECK-NEXT: mov d6, v2.d[1] +; CHECK-NEXT: mov d7, v3.d[1] +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: frintx d2, d2 +; CHECK-NEXT: frintx d3, d3 +; CHECK-NEXT: frintx d4, d4 +; CHECK-NEXT: frintx d5, d5 +; CHECK-NEXT: frintx d6, d6 +; CHECK-NEXT: frintx d7, d7 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: fcvtzs x10, d2 +; CHECK-NEXT: fcvtzs x11, d3 +; CHECK-NEXT: fcvtzs x12, d4 +; CHECK-NEXT: fcvtzs x13, d5 +; CHECK-NEXT: fcvtzs x14, d6 +; CHECK-NEXT: fcvtzs x15, d7 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d2, x10 +; CHECK-NEXT: fmov d3, x11 +; CHECK-NEXT: mov v0.d[1], x12 +; CHECK-NEXT: mov v1.d[1], x13 +; CHECK-NEXT: mov v2.d[1], x14 +; CHECK-NEXT: mov v3.d[1], x15 +; CHECK-NEXT: ret + %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll new file mode 100644 index 0000000000000..9c46cf69cb0bf --- /dev/null +++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll @@ -0,0 +1,622 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=aarch64 -mattr=+neon | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=aarch64 -mattr=+neon | FileCheck %s + +define <1 x i64> @lrint_v1f16(<1 x half> %x) { +; CHECK-LABEL: lrint_v1f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %a = call <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half>) + +define <2 x i64> @lrint_v2f16(<2 x half> %x) { +; CHECK-LABEL: lrint_v2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: ret + %a = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half>) + +define <4 x i64> @lrint_v4f16(<4 x half> %x) { +; CHECK-LABEL: lrint_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[2] +; CHECK-NEXT: mov h2, v0.h[1] +; CHECK-NEXT: mov h3, v0.h[3] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fcvt s2, h2 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: ret + %a = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half>) + +define <8 x i64> @lrint_v8f16(<8 x half> %x) { +; CHECK-LABEL: lrint_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov h4, v0.h[2] +; CHECK-NEXT: mov h3, v0.h[1] +; CHECK-NEXT: mov h7, v0.h[3] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: mov h2, v1.h[2] +; CHECK-NEXT: mov h5, v1.h[1] +; CHECK-NEXT: mov h6, v1.h[3] +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fcvt s4, h4 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fcvt s7, h7 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvt s2, h2 +; CHECK-NEXT: fcvt s5, h5 +; CHECK-NEXT: fcvt s6, h6 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: frintx s7, s7 +; CHECK-NEXT: fcvtzs x9, s0 +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: fcvtzs x12, s4 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fcvtzs x15, s7 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: fcvtzs x13, s5 +; CHECK-NEXT: fcvtzs x14, s6 +; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: fmov d1, x12 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: fmov d3, x10 +; CHECK-NEXT: mov v2.d[1], x13 +; CHECK-NEXT: mov v1.d[1], x15 +; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: ret + %a = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half>) + +define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { +; CHECK-LABEL: lrint_v16i64_v16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: mov h17, v0.h[1] +; CHECK-NEXT: mov h19, v0.h[2] +; CHECK-NEXT: fcvt s18, h0 +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: mov h4, v2.h[1] +; CHECK-NEXT: mov h5, v2.h[2] +; CHECK-NEXT: fcvt s7, h3 +; CHECK-NEXT: fcvt s6, h2 +; CHECK-NEXT: mov h16, v3.h[2] +; CHECK-NEXT: mov h2, v2.h[3] +; CHECK-NEXT: fcvt s17, h17 +; CHECK-NEXT: fcvt s19, h19 +; CHECK-NEXT: frintx s18, s18 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s4, h4 +; CHECK-NEXT: fcvt s5, h5 +; CHECK-NEXT: frintx s7, s7 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvt s16, h16 +; CHECK-NEXT: fcvt s2, h2 +; CHECK-NEXT: frintx s17, s17 +; CHECK-NEXT: frintx s19, s19 +; CHECK-NEXT: fcvtzs x13, s18 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: fcvtzs x9, s7 +; CHECK-NEXT: mov h7, v1.h[2] +; CHECK-NEXT: fcvtzs x8, s6 +; CHECK-NEXT: mov h6, v1.h[1] +; CHECK-NEXT: frintx s16, s16 +; CHECK-NEXT: fcvtzs x14, s17 +; CHECK-NEXT: fcvtzs x15, s19 +; CHECK-NEXT: fcvtzs x10, s4 +; CHECK-NEXT: mov h4, v3.h[1] +; CHECK-NEXT: fcvtzs x11, s5 +; CHECK-NEXT: mov h5, v1.h[3] +; CHECK-NEXT: mov h3, v3.h[3] +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fcvt s7, h7 +; CHECK-NEXT: fcvt s6, h6 +; CHECK-NEXT: fcvtzs x12, s16 +; CHECK-NEXT: frintx s16, s2 +; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: fcvt s4, h4 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fcvt s5, h5 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: frintx s7, s7 +; CHECK-NEXT: frintx s17, s6 +; CHECK-NEXT: fmov d6, x9 +; CHECK-NEXT: mov v2.d[1], x10 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: frintx s18, s3 +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: fcvtzs x9, s7 +; CHECK-NEXT: fmov d3, x11 +; CHECK-NEXT: fcvtzs x11, s0 +; CHECK-NEXT: fmov d7, x12 +; CHECK-NEXT: fcvtzs x12, s16 +; CHECK-NEXT: fcvtzs x16, s17 +; CHECK-NEXT: fcvtzs x17, s4 +; CHECK-NEXT: fmov d0, x13 +; CHECK-NEXT: fmov d1, x15 +; CHECK-NEXT: fcvtzs x18, s18 +; CHECK-NEXT: fcvtzs x0, s5 +; CHECK-NEXT: fmov d4, x8 +; CHECK-NEXT: fmov d5, x9 +; CHECK-NEXT: mov v0.d[1], x14 +; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: mov v3.d[1], x12 +; CHECK-NEXT: mov v4.d[1], x16 +; CHECK-NEXT: mov v6.d[1], x17 +; CHECK-NEXT: mov v7.d[1], x18 +; CHECK-NEXT: mov v5.d[1], x0 +; CHECK-NEXT: ret + %a = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half>) + +define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { +; CHECK-LABEL: lrint_v32i64_v32f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v5.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v6.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov h19, v0.h[1] +; CHECK-NEXT: fcvt s21, h0 +; CHECK-NEXT: mov h23, v1.h[2] +; CHECK-NEXT: fcvt s22, h1 +; CHECK-NEXT: fcvt s26, h2 +; CHECK-NEXT: mov h27, v2.h[1] +; CHECK-NEXT: mov h28, v2.h[2] +; CHECK-NEXT: mov h16, v4.h[2] +; CHECK-NEXT: fcvt s17, h5 +; CHECK-NEXT: mov h18, v5.h[2] +; CHECK-NEXT: mov h20, v6.h[2] +; CHECK-NEXT: fcvt s24, h7 +; CHECK-NEXT: fcvt s25, h6 +; CHECK-NEXT: fcvt s19, h19 +; CHECK-NEXT: frintx s22, s22 +; CHECK-NEXT: fcvt s16, h16 +; CHECK-NEXT: frintx s17, s17 +; CHECK-NEXT: fcvt s18, h18 +; CHECK-NEXT: fcvt s20, h20 +; CHECK-NEXT: frintx s16, s16 +; CHECK-NEXT: fcvtzs x12, s17 +; CHECK-NEXT: frintx s17, s18 +; CHECK-NEXT: frintx s18, s21 +; CHECK-NEXT: fcvt s21, h23 +; CHECK-NEXT: frintx s23, s24 +; CHECK-NEXT: frintx s24, s25 +; CHECK-NEXT: frintx s25, s19 +; CHECK-NEXT: mov h19, v7.h[1] +; CHECK-NEXT: fcvtzs x13, s16 +; CHECK-NEXT: frintx s16, s20 +; CHECK-NEXT: frintx s20, s26 +; CHECK-NEXT: fcvtzs x9, s23 +; CHECK-NEXT: mov h23, v3.h[2] +; CHECK-NEXT: fcvt s26, h27 +; CHECK-NEXT: fcvtzs x15, s24 +; CHECK-NEXT: fcvtzs x10, s25 +; CHECK-NEXT: fcvt s24, h28 +; CHECK-NEXT: mov h25, v3.h[3] +; CHECK-NEXT: fcvtzs x14, s17 +; CHECK-NEXT: frintx s21, s21 +; CHECK-NEXT: fmov d17, x12 +; CHECK-NEXT: fcvtzs x12, s16 +; CHECK-NEXT: fmov d16, x13 +; CHECK-NEXT: fcvtzs x13, s22 +; CHECK-NEXT: fcvt s22, h3 +; CHECK-NEXT: mov h3, v3.h[1] +; CHECK-NEXT: mov h27, v0.h[2] +; CHECK-NEXT: mov h28, v2.h[3] +; CHECK-NEXT: fcvt s23, h23 +; CHECK-NEXT: frintx s26, s26 +; CHECK-NEXT: fcvtzs x16, s20 +; CHECK-NEXT: frintx s20, s24 +; CHECK-NEXT: fcvt s24, h25 +; CHECK-NEXT: fcvtzs x11, s18 +; CHECK-NEXT: fmov d18, x14 +; CHECK-NEXT: fcvtzs x14, s21 +; CHECK-NEXT: frintx s22, s22 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fcvt s25, h27 +; CHECK-NEXT: fcvt s27, h28 +; CHECK-NEXT: frintx s23, s23 +; CHECK-NEXT: mov h21, v1.h[3] +; CHECK-NEXT: fmov d2, x15 +; CHECK-NEXT: fcvtzs x15, s26 +; CHECK-NEXT: fmov d26, x13 +; CHECK-NEXT: mov h1, v1.h[1] +; CHECK-NEXT: fcvtzs x13, s20 +; CHECK-NEXT: frintx s20, s24 +; CHECK-NEXT: fmov d24, x14 +; CHECK-NEXT: fcvtzs x14, s22 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: fmov d22, x16 +; CHECK-NEXT: frintx s27, s27 +; CHECK-NEXT: fcvtzs x16, s23 +; CHECK-NEXT: fcvt s21, h21 +; CHECK-NEXT: frintx s25, s25 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: mov h23, v7.h[2] +; CHECK-NEXT: mov v22.d[1], x15 +; CHECK-NEXT: fcvtzs x15, s20 +; CHECK-NEXT: fmov d20, x13 +; CHECK-NEXT: fcvtzs x13, s3 +; CHECK-NEXT: fmov d3, x14 +; CHECK-NEXT: fcvtzs x14, s27 +; CHECK-NEXT: fmov d27, x16 +; CHECK-NEXT: frintx s21, s21 +; CHECK-NEXT: mov h7, v7.h[3] +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s23, h23 +; CHECK-NEXT: fcvt s19, h19 +; CHECK-NEXT: mov v27.d[1], x15 +; CHECK-NEXT: fcvtzs x15, s25 +; CHECK-NEXT: mov h25, v6.h[3] +; CHECK-NEXT: mov h6, v6.h[1] +; CHECK-NEXT: mov v3.d[1], x13 +; CHECK-NEXT: fcvtzs x13, s21 +; CHECK-NEXT: mov h21, v5.h[1] +; CHECK-NEXT: mov h5, v5.h[3] +; CHECK-NEXT: mov v20.d[1], x14 +; CHECK-NEXT: fcvtzs x14, s1 +; CHECK-NEXT: mov h1, v4.h[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvt s25, h25 +; CHECK-NEXT: fcvt s7, h7 +; CHECK-NEXT: stp q3, q27, [x8, #192] +; CHECK-NEXT: fcvt s6, h6 +; CHECK-NEXT: mov h3, v4.h[3] +; CHECK-NEXT: stp q22, q20, [x8, #128] +; CHECK-NEXT: fcvt s21, h21 +; CHECK-NEXT: fcvt s5, h5 +; CHECK-NEXT: mov v24.d[1], x13 +; CHECK-NEXT: mov v26.d[1], x14 +; CHECK-NEXT: fcvt s4, h4 +; CHECK-NEXT: frintx s22, s25 +; CHECK-NEXT: fmov d20, x12 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fcvtzs x12, s0 +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: frintx s21, s21 +; CHECK-NEXT: fmov d0, x11 +; CHECK-NEXT: stp q26, q24, [x8, #64] +; CHECK-NEXT: fmov d24, x15 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: fcvtzs x11, s22 +; CHECK-NEXT: frintx s22, s23 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvtzs x13, s6 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: frintx s6, s7 +; CHECK-NEXT: fcvtzs x14, s5 +; CHECK-NEXT: mov v24.d[1], x12 +; CHECK-NEXT: frintx s5, s19 +; CHECK-NEXT: fcvtzs x12, s21 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: fcvtzs x10, s4 +; CHECK-NEXT: mov v20.d[1], x11 +; CHECK-NEXT: fcvtzs x11, s22 +; CHECK-NEXT: mov v2.d[1], x13 +; CHECK-NEXT: fcvtzs x15, s3 +; CHECK-NEXT: fcvtzs x13, s1 +; CHECK-NEXT: mov v18.d[1], x14 +; CHECK-NEXT: fcvtzs x14, s6 +; CHECK-NEXT: stp q0, q24, [x8] +; CHECK-NEXT: mov v17.d[1], x12 +; CHECK-NEXT: fcvtzs x12, s5 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d1, x11 +; CHECK-NEXT: stp q2, q20, [x8, #224] +; CHECK-NEXT: fmov d2, x9 +; CHECK-NEXT: mov v16.d[1], x15 +; CHECK-NEXT: stp q17, q18, [x8, #160] +; CHECK-NEXT: mov v0.d[1], x13 +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: mov v2.d[1], x12 +; CHECK-NEXT: stp q0, q16, [x8, #96] +; CHECK-NEXT: stp q2, q1, [x8, #32] +; CHECK-NEXT: ret + %a = call <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half>) + +define <1 x i64> @lrint_v1f32(<1 x float> %x) { +; CHECK-LABEL: lrint_v1f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %a = call <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float>) + +define <2 x i64> @lrint_v2f32(<2 x float> %x) { +; CHECK-LABEL: lrint_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: ret + %a = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float>) + +define <4 x i64> @lrint_v4f32(<4 x float> %x) { +; CHECK-LABEL: lrint_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov s3, v0.s[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: mov s2, v1.s[1] +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: fcvtzs x9, s0 +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v1.d[1], x10 +; CHECK-NEXT: ret + %a = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float>) + +define <8 x i64> @lrint_v8f32(<8 x float> %x) { +; CHECK-LABEL: lrint_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: mov s4, v0.s[1] +; CHECK-NEXT: mov s7, v1.s[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: mov s5, v2.s[1] +; CHECK-NEXT: mov s6, v3.s[1] +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: frintx s7, s7 +; CHECK-NEXT: fcvtzs x9, s0 +; CHECK-NEXT: fcvtzs x12, s1 +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvtzs x8, s2 +; CHECK-NEXT: fcvtzs x10, s3 +; CHECK-NEXT: fcvtzs x11, s4 +; CHECK-NEXT: fcvtzs x15, s7 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fmov d2, x12 +; CHECK-NEXT: fcvtzs x13, s5 +; CHECK-NEXT: fcvtzs x14, s6 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fmov d3, x10 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v2.d[1], x15 +; CHECK-NEXT: mov v1.d[1], x13 +; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: ret + %a = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float>) + +define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) { +; CHECK-LABEL: lrint_v16i64_v16f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v5.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: frintx s7, s0 +; CHECK-NEXT: ext v16.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: frintx s17, s4 +; CHECK-NEXT: mov s4, v4.s[1] +; CHECK-NEXT: mov s18, v5.s[1] +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: frintx s19, s6 +; CHECK-NEXT: fcvtzs x8, s7 +; CHECK-NEXT: frintx s7, s16 +; CHECK-NEXT: mov s6, v6.s[1] +; CHECK-NEXT: mov s16, v16.s[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: fcvtzs x9, s17 +; CHECK-NEXT: frintx s17, s1 +; CHECK-NEXT: mov s1, v1.s[1] +; CHECK-NEXT: frintx s18, s18 +; CHECK-NEXT: fcvtzs x10, s5 +; CHECK-NEXT: mov s5, v2.s[1] +; CHECK-NEXT: fcvtzs x11, s19 +; CHECK-NEXT: mov s19, v3.s[1] +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: fcvtzs x12, s7 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvtzs x13, s4 +; CHECK-NEXT: frintx s4, s3 +; CHECK-NEXT: frintx s16, s16 +; CHECK-NEXT: fcvtzs x14, s18 +; CHECK-NEXT: frintx s18, s1 +; CHECK-NEXT: fcvtzs x15, s17 +; CHECK-NEXT: frintx s20, s5 +; CHECK-NEXT: frintx s17, s19 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fcvtzs x9, s2 +; CHECK-NEXT: fmov d5, x11 +; CHECK-NEXT: fmov d3, x10 +; CHECK-NEXT: fcvtzs x11, s4 +; CHECK-NEXT: fcvtzs x10, s0 +; CHECK-NEXT: fmov d7, x12 +; CHECK-NEXT: fcvtzs x12, s18 +; CHECK-NEXT: fcvtzs x17, s6 +; CHECK-NEXT: fcvtzs x18, s16 +; CHECK-NEXT: fcvtzs x16, s20 +; CHECK-NEXT: fcvtzs x0, s17 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d2, x15 +; CHECK-NEXT: fmov d4, x9 +; CHECK-NEXT: mov v1.d[1], x13 +; CHECK-NEXT: fmov d6, x11 +; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v5.d[1], x17 +; CHECK-NEXT: mov v7.d[1], x18 +; CHECK-NEXT: mov v2.d[1], x12 +; CHECK-NEXT: mov v4.d[1], x16 +; CHECK-NEXT: mov v6.d[1], x0 +; CHECK-NEXT: ret + %a = call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float>) + +define <1 x i64> @lrint_v1f64(<1 x double> %x) { +; CHECK-LABEL: lrint_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %a = call <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>) + +define <2 x i64> @lrint_v2f64(<2 x double> %x) { +; CHECK-LABEL: lrint_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: ret + %a = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>) + +define <4 x i64> @lrint_v4f64(<4 x double> %x) { +; CHECK-LABEL: lrint_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d2, v0.d[1] +; CHECK-NEXT: mov d3, v1.d[1] +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: frintx d2, d2 +; CHECK-NEXT: frintx d3, d3 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: fcvtzs x10, d2 +; CHECK-NEXT: fcvtzs x11, d3 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: ret + %a = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>) + +define <8 x i64> @lrint_v8f64(<8 x double> %x) { +; CHECK-LABEL: lrint_v8f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d4, v0.d[1] +; CHECK-NEXT: mov d5, v1.d[1] +; CHECK-NEXT: mov d6, v2.d[1] +; CHECK-NEXT: mov d7, v3.d[1] +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: frintx d2, d2 +; CHECK-NEXT: frintx d3, d3 +; CHECK-NEXT: frintx d4, d4 +; CHECK-NEXT: frintx d5, d5 +; CHECK-NEXT: frintx d6, d6 +; CHECK-NEXT: frintx d7, d7 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: fcvtzs x10, d2 +; CHECK-NEXT: fcvtzs x11, d3 +; CHECK-NEXT: fcvtzs x12, d4 +; CHECK-NEXT: fcvtzs x13, d5 +; CHECK-NEXT: fcvtzs x14, d6 +; CHECK-NEXT: fcvtzs x15, d7 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d2, x10 +; CHECK-NEXT: fmov d3, x11 +; CHECK-NEXT: mov v0.d[1], x12 +; CHECK-NEXT: mov v1.d[1], x13 +; CHECK-NEXT: mov v2.d[1], x14 +; CHECK-NEXT: mov v3.d[1], x15 +; CHECK-NEXT: ret + %a = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>) diff --git a/llvm/test/CodeGen/PowerPC/vector-llrint.ll b/llvm/test/CodeGen/PowerPC/vector-llrint.ll new file mode 100644 index 0000000000000..4321b213b631c --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vector-llrint.ll @@ -0,0 +1,4848 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64-unknown-unknown -verify-machineinstrs < %s | \ +; RUN: FileCheck %s --check-prefix=BE +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s | \ +; RUN: FileCheck %s +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s \ +; RUN: --enable-unsafe-fp-math | FileCheck %s --check-prefix=FAST + +define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { +; BE-LABEL: llrint_v1i64_v1f16: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v1i64_v1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v1i64_v1f16: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -32(r1) +; FAST-NEXT: std r0, 48(r1) +; FAST-NEXT: .cfi_def_cfa_offset 32 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: addi r1, r1, 32 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>) + +define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { +; BE-LABEL: llrint_v1i64_v2f16: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -160(r1) +; BE-NEXT: std r0, 176(r1) +; BE-NEXT: .cfi_def_cfa_offset 160 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset r30, -24 +; BE-NEXT: .cfi_offset f31, -8 +; BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f31, f1 +; BE-NEXT: fmr f1, f2 +; BE-NEXT: std r30, 136(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: mr r30, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r30, 48 +; BE-NEXT: fmr f31, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: std r3, 120(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 112(r1) +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: ld r30, 136(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f31, 152(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r1, r1, 160 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v1i64_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -96(r1) +; CHECK-NEXT: std r0, 112(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -24 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: .cfi_offset v31, -48 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r30, 72(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f31, 88(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f31, f2 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r30, 48 +; CHECK-NEXT: fmr f31, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lfd f31, 88(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 72(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v2, vs0, v31 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 96 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v1i64_v2f16: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: .cfi_def_cfa_offset 48 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset f30, -16 +; FAST-NEXT: .cfi_offset f31, -8 +; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; FAST-NEXT: stdu r1, -48(r1) +; FAST-NEXT: fmr f31, f1 +; FAST-NEXT: fmr f1, f2 +; FAST-NEXT: std r0, 64(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f30, f1 +; FAST-NEXT: fmr f1, f31 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: fctid f1, f30 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v2, vs1, vs0 +; FAST-NEXT: addi r1, r1, 48 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>) + +define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { +; BE-LABEL: llrint_v4i64_v4f16: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -208(r1) +; BE-NEXT: std r0, 224(r1) +; BE-NEXT: .cfi_def_cfa_offset 208 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset r28, -56 +; BE-NEXT: .cfi_offset r29, -48 +; BE-NEXT: .cfi_offset r30, -40 +; BE-NEXT: .cfi_offset f29, -24 +; BE-NEXT: .cfi_offset f30, -16 +; BE-NEXT: .cfi_offset f31, -8 +; BE-NEXT: stfd f29, 184(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f29, f1 +; BE-NEXT: fmr f1, f2 +; BE-NEXT: std r28, 152(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 160(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 168(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f30, 192(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f31, 200(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f31, f4 +; BE-NEXT: fmr f30, f3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: mr r30, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: mr r29, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: mr r28, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r28, 48 +; BE-NEXT: fmr f31, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r29, 48 +; BE-NEXT: fmr f30, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r30, 48 +; BE-NEXT: fmr f29, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: std r3, 120(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: std r3, 112(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: ld r30, 168(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f31, 200(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f30, 192(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f29, 184(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: ld r29, 160(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 152(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r1, r1, 208 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v4i64_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -144(r1) +; CHECK-NEXT: std r0, 160(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 144 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r28, -56 +; CHECK-NEXT: .cfi_offset r29, -48 +; CHECK-NEXT: .cfi_offset r30, -40 +; CHECK-NEXT: .cfi_offset f29, -24 +; CHECK-NEXT: .cfi_offset f30, -16 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: .cfi_offset v30, -96 +; CHECK-NEXT: .cfi_offset v31, -80 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r28, 88(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, 96(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, 104(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f29, 120(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f29, f2 +; CHECK-NEXT: stfd f30, 128(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f30, f3 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f31, f4 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mr r28, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r28, 48 +; CHECK-NEXT: fmr f31, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r29, 48 +; CHECK-NEXT: fmr f30, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r30, 48 +; CHECK-NEXT: fmr f29, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: lfd f31, 136(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f30, 128(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f29, 120(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 104(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 96(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: ld r28, 88(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v3, vs0, v30 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 144 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v4i64_v4f16: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: .cfi_def_cfa_offset 64 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset f28, -32 +; FAST-NEXT: .cfi_offset f29, -24 +; FAST-NEXT: .cfi_offset f30, -16 +; FAST-NEXT: .cfi_offset f31, -8 +; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; FAST-NEXT: stdu r1, -64(r1) +; FAST-NEXT: fmr f29, f1 +; FAST-NEXT: fmr f1, f4 +; FAST-NEXT: std r0, 80(r1) +; FAST-NEXT: fmr f31, f3 +; FAST-NEXT: fmr f30, f2 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f28, f1 +; FAST-NEXT: fmr f1, f31 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f31, f1 +; FAST-NEXT: fmr f1, f30 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f30, f1 +; FAST-NEXT: fmr f1, f29 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fctid f0, f30 +; FAST-NEXT: fctid f2, f31 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f2 +; FAST-NEXT: mtfprd f2, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v2, vs0, vs1 +; FAST-NEXT: fctid f0, f28 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v3, vs0, vs2 +; FAST-NEXT: addi r1, r1, 64 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload +; FAST-NEXT: mtlr r0 +; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload +; FAST-NEXT: blr + %a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>) + +define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { +; BE-LABEL: llrint_v8i64_v8f16: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -304(r1) +; BE-NEXT: std r0, 320(r1) +; BE-NEXT: .cfi_def_cfa_offset 304 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset r24, -120 +; BE-NEXT: .cfi_offset r25, -112 +; BE-NEXT: .cfi_offset r26, -104 +; BE-NEXT: .cfi_offset r27, -96 +; BE-NEXT: .cfi_offset r28, -88 +; BE-NEXT: .cfi_offset r29, -80 +; BE-NEXT: .cfi_offset r30, -72 +; BE-NEXT: .cfi_offset f25, -56 +; BE-NEXT: .cfi_offset f26, -48 +; BE-NEXT: .cfi_offset f27, -40 +; BE-NEXT: .cfi_offset f28, -32 +; BE-NEXT: .cfi_offset f29, -24 +; BE-NEXT: .cfi_offset f30, -16 +; BE-NEXT: .cfi_offset f31, -8 +; BE-NEXT: stfd f25, 248(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f25, f1 +; BE-NEXT: fmr f1, f2 +; BE-NEXT: std r24, 184(r1) # 8-byte Folded Spill +; BE-NEXT: std r25, 192(r1) # 8-byte Folded Spill +; BE-NEXT: std r26, 200(r1) # 8-byte Folded Spill +; BE-NEXT: std r27, 208(r1) # 8-byte Folded Spill +; BE-NEXT: std r28, 216(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 224(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 232(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f26, 256(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f27, 264(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f28, 272(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f29, 280(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f30, 288(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f31, 296(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f31, f8 +; BE-NEXT: fmr f30, f7 +; BE-NEXT: fmr f29, f6 +; BE-NEXT: fmr f28, f5 +; BE-NEXT: fmr f27, f4 +; BE-NEXT: fmr f26, f3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f25 +; BE-NEXT: mr r30, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f27 +; BE-NEXT: mr r29, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f26 +; BE-NEXT: mr r28, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: mr r27, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f28 +; BE-NEXT: mr r26, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: mr r25, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: mr r24, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r24, 48 +; BE-NEXT: fmr f31, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r25, 48 +; BE-NEXT: fmr f30, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r26, 48 +; BE-NEXT: fmr f29, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r27, 48 +; BE-NEXT: fmr f28, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r28, 48 +; BE-NEXT: fmr f27, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r29, 48 +; BE-NEXT: fmr f26, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r30, 48 +; BE-NEXT: fmr f25, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f25 +; BE-NEXT: std r3, 120(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f26 +; BE-NEXT: std r3, 112(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f27 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f28 +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: std r3, 168(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 160(r1) +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: ld r30, 232(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f31, 296(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f30, 288(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f29, 280(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lfd f28, 272(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f27, 264(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f26, 256(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 224(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 216(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lfd f25, 248(r1) # 8-byte Folded Reload +; BE-NEXT: ld r27, 208(r1) # 8-byte Folded Reload +; BE-NEXT: ld r26, 200(r1) # 8-byte Folded Reload +; BE-NEXT: ld r25, 192(r1) # 8-byte Folded Reload +; BE-NEXT: ld r24, 184(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: addi r3, r1, 160 +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: addi r1, r1, 304 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v8i64_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -240(r1) +; CHECK-NEXT: std r0, 256(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 240 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r24, -120 +; CHECK-NEXT: .cfi_offset r25, -112 +; CHECK-NEXT: .cfi_offset r26, -104 +; CHECK-NEXT: .cfi_offset r27, -96 +; CHECK-NEXT: .cfi_offset r28, -88 +; CHECK-NEXT: .cfi_offset r29, -80 +; CHECK-NEXT: .cfi_offset r30, -72 +; CHECK-NEXT: .cfi_offset f25, -56 +; CHECK-NEXT: .cfi_offset f26, -48 +; CHECK-NEXT: .cfi_offset f27, -40 +; CHECK-NEXT: .cfi_offset f28, -32 +; CHECK-NEXT: .cfi_offset f29, -24 +; CHECK-NEXT: .cfi_offset f30, -16 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: .cfi_offset v28, -192 +; CHECK-NEXT: .cfi_offset v29, -176 +; CHECK-NEXT: .cfi_offset v30, -160 +; CHECK-NEXT: .cfi_offset v31, -144 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r24, 120(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r25, 128(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r26, 136(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r27, 144(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, 152(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, 160(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill +; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stfd f25, 184(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f25, f2 +; CHECK-NEXT: stfd f26, 192(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f26, f3 +; CHECK-NEXT: stfd f27, 200(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f27, f4 +; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: stfd f28, 208(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f28, f5 +; CHECK-NEXT: stfd f29, 216(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f29, f6 +; CHECK-NEXT: stfd f30, 224(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f30, f7 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: stfd f31, 232(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f31, f8 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f25 +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f26 +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f27 +; CHECK-NEXT: mr r28, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f28 +; CHECK-NEXT: mr r27, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: mr r26, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: mr r25, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mr r24, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r24, 48 +; CHECK-NEXT: fmr f31, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r25, 48 +; CHECK-NEXT: fmr f30, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r26, 48 +; CHECK-NEXT: fmr f29, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r27, 48 +; CHECK-NEXT: fmr f28, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r28, 48 +; CHECK-NEXT: fmr f27, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r29, 48 +; CHECK-NEXT: fmr f26, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r30, 48 +; CHECK-NEXT: fmr f25, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f25 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f26 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f27 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f28 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v30, vs0, v30 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: mtvsrd v29, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v29, vs0, v29 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: lfd f31, 232(r1) # 8-byte Folded Reload +; CHECK-NEXT: vmr v3, v30 +; CHECK-NEXT: vmr v4, v29 +; CHECK-NEXT: lfd f30, 224(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f29, 216(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lfd f28, 208(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f27, 200(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f26, 192(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f25, 184(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 168(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 160(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: ld r28, 152(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r27, 144(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v5, vs0, v28 +; CHECK-NEXT: ld r26, 136(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, 128(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r24, 120(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 240 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v8i64_v8f16: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: .cfi_def_cfa_offset 96 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset f24, -64 +; FAST-NEXT: .cfi_offset f25, -56 +; FAST-NEXT: .cfi_offset f26, -48 +; FAST-NEXT: .cfi_offset f27, -40 +; FAST-NEXT: .cfi_offset f28, -32 +; FAST-NEXT: .cfi_offset f29, -24 +; FAST-NEXT: .cfi_offset f30, -16 +; FAST-NEXT: .cfi_offset f31, -8 +; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; FAST-NEXT: stdu r1, -96(r1) +; FAST-NEXT: fmr f24, f1 +; FAST-NEXT: fmr f1, f8 +; FAST-NEXT: std r0, 112(r1) +; FAST-NEXT: fmr f30, f7 +; FAST-NEXT: fmr f29, f6 +; FAST-NEXT: fmr f28, f5 +; FAST-NEXT: fmr f27, f4 +; FAST-NEXT: fmr f26, f3 +; FAST-NEXT: fmr f25, f2 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f31, f1 +; FAST-NEXT: fmr f1, f30 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f30, f1 +; FAST-NEXT: fmr f1, f29 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f29, f1 +; FAST-NEXT: fmr f1, f28 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f28, f1 +; FAST-NEXT: fmr f1, f27 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f27, f1 +; FAST-NEXT: fmr f1, f26 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f26, f1 +; FAST-NEXT: fmr f1, f25 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f25, f1 +; FAST-NEXT: fmr f1, f24 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fctid f0, f25 +; FAST-NEXT: fctid f2, f26 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: fctid f3, f27 +; FAST-NEXT: fctid f4, f28 +; FAST-NEXT: fctid f5, f29 +; FAST-NEXT: fctid f6, f30 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f2 +; FAST-NEXT: mtfprd f2, r3 +; FAST-NEXT: mffprd r3, f3 +; FAST-NEXT: mtfprd f3, r3 +; FAST-NEXT: mffprd r3, f4 +; FAST-NEXT: mtfprd f4, r3 +; FAST-NEXT: mffprd r3, f5 +; FAST-NEXT: mtfprd f5, r3 +; FAST-NEXT: mffprd r3, f6 +; FAST-NEXT: mtfprd f6, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v3, vs3, vs2 +; FAST-NEXT: xxmrghd v4, vs5, vs4 +; FAST-NEXT: xxmrghd v2, vs0, vs1 +; FAST-NEXT: fctid f0, f31 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v5, vs0, vs6 +; FAST-NEXT: addi r1, r1, 96 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload +; FAST-NEXT: mtlr r0 +; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload +; FAST-NEXT: blr + %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>) + +define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { +; BE-LABEL: llrint_v16i64_v16f16: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -496(r1) +; BE-NEXT: std r0, 512(r1) +; BE-NEXT: .cfi_def_cfa_offset 496 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset r16, -248 +; BE-NEXT: .cfi_offset r17, -240 +; BE-NEXT: .cfi_offset r18, -232 +; BE-NEXT: .cfi_offset r19, -224 +; BE-NEXT: .cfi_offset r20, -216 +; BE-NEXT: .cfi_offset r21, -208 +; BE-NEXT: .cfi_offset r22, -200 +; BE-NEXT: .cfi_offset r23, -192 +; BE-NEXT: .cfi_offset r24, -184 +; BE-NEXT: .cfi_offset r25, -176 +; BE-NEXT: .cfi_offset r26, -168 +; BE-NEXT: .cfi_offset r27, -160 +; BE-NEXT: .cfi_offset r28, -152 +; BE-NEXT: .cfi_offset r29, -144 +; BE-NEXT: .cfi_offset r30, -136 +; BE-NEXT: .cfi_offset f17, -120 +; BE-NEXT: .cfi_offset f18, -112 +; BE-NEXT: .cfi_offset f19, -104 +; BE-NEXT: .cfi_offset f20, -96 +; BE-NEXT: .cfi_offset f21, -88 +; BE-NEXT: .cfi_offset f22, -80 +; BE-NEXT: .cfi_offset f23, -72 +; BE-NEXT: .cfi_offset f24, -64 +; BE-NEXT: .cfi_offset f25, -56 +; BE-NEXT: .cfi_offset f26, -48 +; BE-NEXT: .cfi_offset f27, -40 +; BE-NEXT: .cfi_offset f28, -32 +; BE-NEXT: .cfi_offset f29, -24 +; BE-NEXT: .cfi_offset f30, -16 +; BE-NEXT: .cfi_offset f31, -8 +; BE-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f20, f1 +; BE-NEXT: fmr f1, f2 +; BE-NEXT: std r16, 248(r1) # 8-byte Folded Spill +; BE-NEXT: std r17, 256(r1) # 8-byte Folded Spill +; BE-NEXT: std r18, 264(r1) # 8-byte Folded Spill +; BE-NEXT: std r19, 272(r1) # 8-byte Folded Spill +; BE-NEXT: std r20, 280(r1) # 8-byte Folded Spill +; BE-NEXT: std r21, 288(r1) # 8-byte Folded Spill +; BE-NEXT: std r22, 296(r1) # 8-byte Folded Spill +; BE-NEXT: std r23, 304(r1) # 8-byte Folded Spill +; BE-NEXT: std r24, 312(r1) # 8-byte Folded Spill +; BE-NEXT: std r25, 320(r1) # 8-byte Folded Spill +; BE-NEXT: std r26, 328(r1) # 8-byte Folded Spill +; BE-NEXT: std r27, 336(r1) # 8-byte Folded Spill +; BE-NEXT: std r28, 344(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 352(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 360(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f17, 376(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f18, 384(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f19, 392(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f21, 408(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f22, 416(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f23, 424(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f24, 432(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f25, 440(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f26, 448(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f27, 456(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f28, 464(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f29, 472(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f30, 480(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f31, 488(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f31, f13 +; BE-NEXT: fmr f29, f12 +; BE-NEXT: fmr f30, f11 +; BE-NEXT: fmr f28, f10 +; BE-NEXT: fmr f27, f9 +; BE-NEXT: fmr f26, f8 +; BE-NEXT: fmr f25, f7 +; BE-NEXT: fmr f24, f6 +; BE-NEXT: fmr f23, f5 +; BE-NEXT: fmr f22, f4 +; BE-NEXT: fmr f21, f3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f20 +; BE-NEXT: mr r30, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f22 +; BE-NEXT: mr r29, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f21 +; BE-NEXT: mr r28, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f24 +; BE-NEXT: mr r27, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f23 +; BE-NEXT: mr r26, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f26 +; BE-NEXT: mr r25, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f25 +; BE-NEXT: mr r24, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f28 +; BE-NEXT: mr r23, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f27 +; BE-NEXT: mr r22, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: mr r21, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: mr r20, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 652(r1) +; BE-NEXT: mr r19, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: mr r18, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 668(r1) +; BE-NEXT: mr r17, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 660(r1) +; BE-NEXT: mr r16, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r16, 48 +; BE-NEXT: fmr f31, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r17, 48 +; BE-NEXT: fmr f30, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r18, 48 +; BE-NEXT: fmr f29, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r19, 48 +; BE-NEXT: fmr f28, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r20, 48 +; BE-NEXT: fmr f27, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r21, 48 +; BE-NEXT: fmr f26, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r22, 48 +; BE-NEXT: fmr f25, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r23, 48 +; BE-NEXT: fmr f24, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r24, 48 +; BE-NEXT: fmr f23, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r25, 48 +; BE-NEXT: fmr f22, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r26, 48 +; BE-NEXT: fmr f21, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r27, 48 +; BE-NEXT: fmr f20, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r28, 48 +; BE-NEXT: fmr f19, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r29, 48 +; BE-NEXT: fmr f18, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r30, 48 +; BE-NEXT: fmr f17, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f17 +; BE-NEXT: std r3, 120(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f18 +; BE-NEXT: std r3, 112(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f19 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f20 +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f21 +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f22 +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f23 +; BE-NEXT: std r3, 168(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f24 +; BE-NEXT: std r3, 160(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f25 +; BE-NEXT: std r3, 184(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f26 +; BE-NEXT: std r3, 176(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f27 +; BE-NEXT: std r3, 200(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f28 +; BE-NEXT: std r3, 192(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: std r3, 216(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: std r3, 208(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: std r3, 232(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 224(r1) +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: ld r30, 360(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f31, 488(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f30, 480(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f29, 472(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lfd f28, 464(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f27, 456(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f26, 448(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 352(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 344(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lfd f25, 440(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f24, 432(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f23, 424(r1) # 8-byte Folded Reload +; BE-NEXT: ld r27, 336(r1) # 8-byte Folded Reload +; BE-NEXT: ld r26, 328(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: addi r3, r1, 160 +; BE-NEXT: lfd f22, 416(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f21, 408(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f20, 400(r1) # 8-byte Folded Reload +; BE-NEXT: ld r25, 320(r1) # 8-byte Folded Reload +; BE-NEXT: ld r24, 312(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: addi r3, r1, 176 +; BE-NEXT: lfd f19, 392(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f18, 384(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f17, 376(r1) # 8-byte Folded Reload +; BE-NEXT: ld r23, 304(r1) # 8-byte Folded Reload +; BE-NEXT: ld r22, 296(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v6, 0, r3 +; BE-NEXT: addi r3, r1, 192 +; BE-NEXT: ld r21, 288(r1) # 8-byte Folded Reload +; BE-NEXT: ld r20, 280(r1) # 8-byte Folded Reload +; BE-NEXT: ld r19, 272(r1) # 8-byte Folded Reload +; BE-NEXT: ld r18, 264(r1) # 8-byte Folded Reload +; BE-NEXT: ld r17, 256(r1) # 8-byte Folded Reload +; BE-NEXT: ld r16, 248(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v7, 0, r3 +; BE-NEXT: addi r3, r1, 208 +; BE-NEXT: lxvd2x v8, 0, r3 +; BE-NEXT: addi r3, r1, 224 +; BE-NEXT: lxvd2x v9, 0, r3 +; BE-NEXT: addi r1, r1, 496 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v16i64_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -432(r1) +; CHECK-NEXT: std r0, 448(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 432 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r16, -248 +; CHECK-NEXT: .cfi_offset r17, -240 +; CHECK-NEXT: .cfi_offset r18, -232 +; CHECK-NEXT: .cfi_offset r19, -224 +; CHECK-NEXT: .cfi_offset r20, -216 +; CHECK-NEXT: .cfi_offset r21, -208 +; CHECK-NEXT: .cfi_offset r22, -200 +; CHECK-NEXT: .cfi_offset r23, -192 +; CHECK-NEXT: .cfi_offset r24, -184 +; CHECK-NEXT: .cfi_offset r25, -176 +; CHECK-NEXT: .cfi_offset r26, -168 +; CHECK-NEXT: .cfi_offset r27, -160 +; CHECK-NEXT: .cfi_offset r28, -152 +; CHECK-NEXT: .cfi_offset r29, -144 +; CHECK-NEXT: .cfi_offset r30, -136 +; CHECK-NEXT: .cfi_offset f17, -120 +; CHECK-NEXT: .cfi_offset f18, -112 +; CHECK-NEXT: .cfi_offset f19, -104 +; CHECK-NEXT: .cfi_offset f20, -96 +; CHECK-NEXT: .cfi_offset f21, -88 +; CHECK-NEXT: .cfi_offset f22, -80 +; CHECK-NEXT: .cfi_offset f23, -72 +; CHECK-NEXT: .cfi_offset f24, -64 +; CHECK-NEXT: .cfi_offset f25, -56 +; CHECK-NEXT: .cfi_offset f26, -48 +; CHECK-NEXT: .cfi_offset f27, -40 +; CHECK-NEXT: .cfi_offset f28, -32 +; CHECK-NEXT: .cfi_offset f29, -24 +; CHECK-NEXT: .cfi_offset f30, -16 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: .cfi_offset v24, -384 +; CHECK-NEXT: .cfi_offset v25, -368 +; CHECK-NEXT: .cfi_offset v26, -352 +; CHECK-NEXT: .cfi_offset v27, -336 +; CHECK-NEXT: .cfi_offset v28, -320 +; CHECK-NEXT: .cfi_offset v29, -304 +; CHECK-NEXT: .cfi_offset v30, -288 +; CHECK-NEXT: .cfi_offset v31, -272 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r16, 184(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r17, 192(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r18, 200(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r19, 208(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r20, 216(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r21, 224(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill +; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: std r23, 240(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r24, 248(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r25, 256(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r26, 264(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r27, 272(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill +; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: std r29, 288(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, 296(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f17, 312(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f18, 320(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f19, 328(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f20, 336(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f20, f2 +; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: stfd f21, 344(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f21, f3 +; CHECK-NEXT: stfd f22, 352(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f22, f4 +; CHECK-NEXT: stfd f23, 360(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f23, f5 +; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: stfd f24, 368(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f24, f6 +; CHECK-NEXT: stfd f25, 376(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f25, f7 +; CHECK-NEXT: stfd f26, 384(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f26, f8 +; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: stfd f27, 392(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f27, f9 +; CHECK-NEXT: stfd f28, 400(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f28, f10 +; CHECK-NEXT: stfd f29, 408(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f29, f11 +; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: stfd f30, 416(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f30, f12 +; CHECK-NEXT: stfd f31, 424(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f31, f13 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f20 +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f21 +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f22 +; CHECK-NEXT: mr r28, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f23 +; CHECK-NEXT: mr r27, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f24 +; CHECK-NEXT: mr r26, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f25 +; CHECK-NEXT: mr r25, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f26 +; CHECK-NEXT: mr r24, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f27 +; CHECK-NEXT: mr r23, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f28 +; CHECK-NEXT: mr r22, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: mr r21, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: mr r20, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mr r19, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 568(r1) +; CHECK-NEXT: mr r18, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 576(r1) +; CHECK-NEXT: mr r17, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 584(r1) +; CHECK-NEXT: mr r16, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r16, 48 +; CHECK-NEXT: fmr f31, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r17, 48 +; CHECK-NEXT: fmr f30, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r18, 48 +; CHECK-NEXT: fmr f29, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r19, 48 +; CHECK-NEXT: fmr f28, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r20, 48 +; CHECK-NEXT: fmr f27, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r21, 48 +; CHECK-NEXT: fmr f26, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r22, 48 +; CHECK-NEXT: fmr f25, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r23, 48 +; CHECK-NEXT: fmr f24, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r24, 48 +; CHECK-NEXT: fmr f23, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r25, 48 +; CHECK-NEXT: fmr f22, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r26, 48 +; CHECK-NEXT: fmr f21, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r27, 48 +; CHECK-NEXT: fmr f20, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r28, 48 +; CHECK-NEXT: fmr f19, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r29, 48 +; CHECK-NEXT: fmr f18, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r30, 48 +; CHECK-NEXT: fmr f17, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f17 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f18 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f19 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f20 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v30, vs0, v30 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f21 +; CHECK-NEXT: mtvsrd v29, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f22 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v29, vs0, v29 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f23 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f24 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v28, vs0, v28 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f25 +; CHECK-NEXT: mtvsrd v27, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f26 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v27, vs0, v27 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f27 +; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f28 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v26, vs0, v26 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: mtvsrd v25, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v25, vs0, v25 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mtvsrd v24, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: lfd f31, 424(r1) # 8-byte Folded Reload +; CHECK-NEXT: vmr v3, v30 +; CHECK-NEXT: vmr v4, v29 +; CHECK-NEXT: lfd f30, 416(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f29, 408(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: vmr v5, v28 +; CHECK-NEXT: vmr v6, v27 +; CHECK-NEXT: vmr v7, v26 +; CHECK-NEXT: vmr v8, v25 +; CHECK-NEXT: lfd f28, 400(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f27, 392(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: lfd f26, 384(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f25, 376(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v9, vs0, v24 +; CHECK-NEXT: lfd f24, 368(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f23, 360(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f22, 352(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: lfd f21, 344(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 296(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f20, 336(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f19, 328(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 288(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, 280(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: lfd f18, 320(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r27, 272(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f17, 312(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, 264(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, 256(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r24, 248(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: ld r23, 240(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r22, 232(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r21, 224(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r20, 216(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r19, 208(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r18, 200(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: ld r17, 192(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r16, 184(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 432 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v16i64_v16f16: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: .cfi_def_cfa_offset 160 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset f16, -128 +; FAST-NEXT: .cfi_offset f17, -120 +; FAST-NEXT: .cfi_offset f18, -112 +; FAST-NEXT: .cfi_offset f19, -104 +; FAST-NEXT: .cfi_offset f20, -96 +; FAST-NEXT: .cfi_offset f21, -88 +; FAST-NEXT: .cfi_offset f22, -80 +; FAST-NEXT: .cfi_offset f23, -72 +; FAST-NEXT: .cfi_offset f24, -64 +; FAST-NEXT: .cfi_offset f25, -56 +; FAST-NEXT: .cfi_offset f26, -48 +; FAST-NEXT: .cfi_offset f27, -40 +; FAST-NEXT: .cfi_offset f28, -32 +; FAST-NEXT: .cfi_offset f29, -24 +; FAST-NEXT: .cfi_offset f30, -16 +; FAST-NEXT: .cfi_offset f31, -8 +; FAST-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f19, -104(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f20, -96(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f21, -88(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f22, -80(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f23, -72(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; FAST-NEXT: stdu r1, -160(r1) +; FAST-NEXT: fmr f26, f1 +; FAST-NEXT: lfs f1, 312(r1) +; FAST-NEXT: std r0, 176(r1) +; FAST-NEXT: fmr f28, f13 +; FAST-NEXT: fmr f27, f12 +; FAST-NEXT: fmr f24, f11 +; FAST-NEXT: fmr f21, f10 +; FAST-NEXT: fmr f19, f9 +; FAST-NEXT: fmr f18, f8 +; FAST-NEXT: fmr f17, f7 +; FAST-NEXT: fmr f16, f6 +; FAST-NEXT: fmr f20, f5 +; FAST-NEXT: fmr f22, f4 +; FAST-NEXT: fmr f23, f3 +; FAST-NEXT: fmr f25, f2 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f31, f1 +; FAST-NEXT: lfs f1, 304(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f30, f1 +; FAST-NEXT: lfs f1, 296(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f29, f1 +; FAST-NEXT: fmr f1, f28 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f28, f1 +; FAST-NEXT: fmr f1, f27 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f27, f1 +; FAST-NEXT: fmr f1, f24 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f24, f1 +; FAST-NEXT: fmr f1, f21 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f21, f1 +; FAST-NEXT: fmr f1, f19 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f19, f1 +; FAST-NEXT: fmr f1, f18 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f18, f1 +; FAST-NEXT: fmr f1, f17 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f17, f1 +; FAST-NEXT: fmr f1, f16 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f16, f1 +; FAST-NEXT: fmr f1, f20 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f20, f1 +; FAST-NEXT: fmr f1, f22 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f22, f1 +; FAST-NEXT: fmr f1, f23 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f23, f1 +; FAST-NEXT: fmr f1, f25 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f25, f1 +; FAST-NEXT: fmr f1, f26 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fctid f0, f25 +; FAST-NEXT: fctid f2, f23 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: fctid f3, f22 +; FAST-NEXT: fctid f4, f20 +; FAST-NEXT: fctid f5, f16 +; FAST-NEXT: fctid f6, f17 +; FAST-NEXT: fctid f7, f18 +; FAST-NEXT: fctid f8, f19 +; FAST-NEXT: fctid f9, f21 +; FAST-NEXT: fctid f10, f24 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f2 +; FAST-NEXT: mtfprd f2, r3 +; FAST-NEXT: mffprd r3, f3 +; FAST-NEXT: mtfprd f3, r3 +; FAST-NEXT: mffprd r3, f4 +; FAST-NEXT: mtfprd f4, r3 +; FAST-NEXT: mffprd r3, f5 +; FAST-NEXT: mtfprd f5, r3 +; FAST-NEXT: mffprd r3, f6 +; FAST-NEXT: mtfprd f6, r3 +; FAST-NEXT: mffprd r3, f7 +; FAST-NEXT: mtfprd f7, r3 +; FAST-NEXT: mffprd r3, f8 +; FAST-NEXT: mtfprd f8, r3 +; FAST-NEXT: mffprd r3, f9 +; FAST-NEXT: mtfprd f9, r3 +; FAST-NEXT: mffprd r3, f10 +; FAST-NEXT: mtfprd f10, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v3, vs3, vs2 +; FAST-NEXT: xxmrghd v4, vs5, vs4 +; FAST-NEXT: xxmrghd v5, vs7, vs6 +; FAST-NEXT: xxmrghd v6, vs9, vs8 +; FAST-NEXT: xxmrghd v2, vs0, vs1 +; FAST-NEXT: fctid f0, f27 +; FAST-NEXT: fctid f1, f29 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v7, vs0, vs10 +; FAST-NEXT: fctid f0, f28 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v8, vs1, vs0 +; FAST-NEXT: fctid f0, f30 +; FAST-NEXT: fctid f1, f31 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v9, vs1, vs0 +; FAST-NEXT: addi r1, r1, 160 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload +; FAST-NEXT: mtlr r0 +; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f23, -72(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f22, -80(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f21, -88(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f20, -96(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f19, -104(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f18, -112(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f17, -120(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f16, -128(r1) # 8-byte Folded Reload +; FAST-NEXT: blr + %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>) + +define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { +; BE-LABEL: llrint_v32i64_v32f16: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -864(r1) +; BE-NEXT: std r0, 880(r1) +; BE-NEXT: .cfi_def_cfa_offset 864 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset r14, -288 +; BE-NEXT: .cfi_offset r15, -280 +; BE-NEXT: .cfi_offset r16, -272 +; BE-NEXT: .cfi_offset r17, -264 +; BE-NEXT: .cfi_offset r18, -256 +; BE-NEXT: .cfi_offset r19, -248 +; BE-NEXT: .cfi_offset r20, -240 +; BE-NEXT: .cfi_offset r21, -232 +; BE-NEXT: .cfi_offset r22, -224 +; BE-NEXT: .cfi_offset r23, -216 +; BE-NEXT: .cfi_offset r24, -208 +; BE-NEXT: .cfi_offset r25, -200 +; BE-NEXT: .cfi_offset r26, -192 +; BE-NEXT: .cfi_offset r27, -184 +; BE-NEXT: .cfi_offset r28, -176 +; BE-NEXT: .cfi_offset r29, -168 +; BE-NEXT: .cfi_offset r30, -160 +; BE-NEXT: .cfi_offset r31, -152 +; BE-NEXT: .cfi_offset f14, -144 +; BE-NEXT: .cfi_offset f15, -136 +; BE-NEXT: .cfi_offset f16, -128 +; BE-NEXT: .cfi_offset f17, -120 +; BE-NEXT: .cfi_offset f18, -112 +; BE-NEXT: .cfi_offset f19, -104 +; BE-NEXT: .cfi_offset f20, -96 +; BE-NEXT: .cfi_offset f21, -88 +; BE-NEXT: .cfi_offset f22, -80 +; BE-NEXT: .cfi_offset f23, -72 +; BE-NEXT: .cfi_offset f24, -64 +; BE-NEXT: .cfi_offset f25, -56 +; BE-NEXT: .cfi_offset f26, -48 +; BE-NEXT: .cfi_offset f27, -40 +; BE-NEXT: .cfi_offset f28, -32 +; BE-NEXT: .cfi_offset f29, -24 +; BE-NEXT: .cfi_offset f30, -16 +; BE-NEXT: .cfi_offset f31, -8 +; BE-NEXT: stfd f20, 768(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f20, f1 +; BE-NEXT: fmr f1, f2 +; BE-NEXT: std r14, 576(r1) # 8-byte Folded Spill +; BE-NEXT: std r15, 584(r1) # 8-byte Folded Spill +; BE-NEXT: std r16, 592(r1) # 8-byte Folded Spill +; BE-NEXT: std r17, 600(r1) # 8-byte Folded Spill +; BE-NEXT: std r18, 608(r1) # 8-byte Folded Spill +; BE-NEXT: std r19, 616(r1) # 8-byte Folded Spill +; BE-NEXT: std r20, 624(r1) # 8-byte Folded Spill +; BE-NEXT: std r21, 632(r1) # 8-byte Folded Spill +; BE-NEXT: std r22, 640(r1) # 8-byte Folded Spill +; BE-NEXT: std r23, 648(r1) # 8-byte Folded Spill +; BE-NEXT: std r24, 656(r1) # 8-byte Folded Spill +; BE-NEXT: std r25, 664(r1) # 8-byte Folded Spill +; BE-NEXT: std r26, 672(r1) # 8-byte Folded Spill +; BE-NEXT: std r27, 680(r1) # 8-byte Folded Spill +; BE-NEXT: std r28, 688(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 696(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 704(r1) # 8-byte Folded Spill +; BE-NEXT: std r31, 712(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f14, 720(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f15, 728(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f16, 736(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f17, 744(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f18, 752(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f19, 760(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f21, 776(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f22, 784(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f23, 792(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f24, 800(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f25, 808(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f26, 816(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f27, 824(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f28, 832(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f29, 840(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f30, 848(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f31, 856(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f31, f13 +; BE-NEXT: fmr f29, f12 +; BE-NEXT: fmr f30, f11 +; BE-NEXT: fmr f28, f10 +; BE-NEXT: fmr f27, f9 +; BE-NEXT: fmr f26, f8 +; BE-NEXT: fmr f25, f7 +; BE-NEXT: fmr f24, f6 +; BE-NEXT: fmr f23, f5 +; BE-NEXT: fmr f22, f4 +; BE-NEXT: fmr f21, f3 +; BE-NEXT: mr r30, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f20 +; BE-NEXT: std r3, 304(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f22 +; BE-NEXT: std r3, 296(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f21 +; BE-NEXT: std r3, 280(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f24 +; BE-NEXT: std r3, 264(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f23 +; BE-NEXT: std r3, 248(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f26 +; BE-NEXT: std r3, 232(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f25 +; BE-NEXT: std r3, 216(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f28 +; BE-NEXT: std r3, 200(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f27 +; BE-NEXT: std r3, 184(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: std r3, 168(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: std r3, 152(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1028(r1) +; BE-NEXT: std r3, 136(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: std r3, 120(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1044(r1) +; BE-NEXT: std r3, 112(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1036(r1) +; BE-NEXT: mr r15, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1060(r1) +; BE-NEXT: mr r14, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1052(r1) +; BE-NEXT: mr r31, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1076(r1) +; BE-NEXT: mr r29, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1068(r1) +; BE-NEXT: mr r28, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1092(r1) +; BE-NEXT: mr r27, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1084(r1) +; BE-NEXT: mr r26, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1108(r1) +; BE-NEXT: mr r25, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1100(r1) +; BE-NEXT: mr r24, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1124(r1) +; BE-NEXT: mr r23, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1116(r1) +; BE-NEXT: mr r22, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1140(r1) +; BE-NEXT: mr r21, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1132(r1) +; BE-NEXT: mr r20, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1156(r1) +; BE-NEXT: mr r19, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1148(r1) +; BE-NEXT: mr r18, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1172(r1) +; BE-NEXT: mr r17, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1164(r1) +; BE-NEXT: mr r16, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r16, 48 +; BE-NEXT: stfs f1, 316(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r17, 48 +; BE-NEXT: stfs f1, 312(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r18, 48 +; BE-NEXT: stfs f1, 292(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r19, 48 +; BE-NEXT: stfs f1, 276(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r20, 48 +; BE-NEXT: stfs f1, 260(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r21, 48 +; BE-NEXT: stfs f1, 244(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r22, 48 +; BE-NEXT: stfs f1, 228(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r23, 48 +; BE-NEXT: stfs f1, 212(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r24, 48 +; BE-NEXT: stfs f1, 196(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r25, 48 +; BE-NEXT: stfs f1, 180(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r26, 48 +; BE-NEXT: stfs f1, 164(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r27, 48 +; BE-NEXT: stfs f1, 148(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r28, 48 +; BE-NEXT: stfs f1, 132(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r29, 48 +; BE-NEXT: fmr f18, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r31, 48 +; BE-NEXT: fmr f17, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r14, 48 +; BE-NEXT: fmr f16, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r15, 48 +; BE-NEXT: fmr f15, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 112(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f14, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 120(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f31, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 136(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f30, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 152(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f29, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 168(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f28, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 184(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f27, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 200(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f26, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 216(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f25, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 232(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f24, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 248(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f23, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 264(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f22, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 280(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f21, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 296(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f20, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 304(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f19, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f19 +; BE-NEXT: std r3, 328(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f20 +; BE-NEXT: std r3, 320(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f21 +; BE-NEXT: std r3, 344(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f22 +; BE-NEXT: std r3, 336(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f23 +; BE-NEXT: std r3, 360(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f24 +; BE-NEXT: std r3, 352(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f25 +; BE-NEXT: std r3, 376(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f26 +; BE-NEXT: std r3, 368(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f27 +; BE-NEXT: std r3, 392(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f28 +; BE-NEXT: std r3, 384(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: std r3, 408(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: std r3, 400(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: std r3, 424(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f14 +; BE-NEXT: std r3, 416(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f15 +; BE-NEXT: std r3, 440(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f16 +; BE-NEXT: std r3, 432(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f17 +; BE-NEXT: std r3, 456(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f18 +; BE-NEXT: std r3, 448(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 132(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 472(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 148(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 464(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 164(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 488(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 180(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 480(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 196(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 504(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 212(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 496(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 228(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 520(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 244(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 512(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 260(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 536(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 276(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 528(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 292(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 552(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 312(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 544(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 316(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 568(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 560(r1) +; BE-NEXT: addi r3, r1, 320 +; BE-NEXT: lxvd2x vs0, 0, r3 +; BE-NEXT: addi r3, r1, 336 +; BE-NEXT: lxvd2x vs1, 0, r3 +; BE-NEXT: addi r3, r1, 352 +; BE-NEXT: lxvd2x vs2, 0, r3 +; BE-NEXT: addi r3, r1, 368 +; BE-NEXT: lxvd2x vs3, 0, r3 +; BE-NEXT: addi r3, r1, 384 +; BE-NEXT: lxvd2x vs4, 0, r3 +; BE-NEXT: addi r3, r1, 400 +; BE-NEXT: lxvd2x vs5, 0, r3 +; BE-NEXT: addi r3, r1, 416 +; BE-NEXT: lxvd2x vs6, 0, r3 +; BE-NEXT: addi r3, r1, 432 +; BE-NEXT: lxvd2x vs7, 0, r3 +; BE-NEXT: addi r3, r1, 448 +; BE-NEXT: lxvd2x vs8, 0, r3 +; BE-NEXT: addi r3, r1, 464 +; BE-NEXT: lxvd2x vs9, 0, r3 +; BE-NEXT: addi r3, r1, 480 +; BE-NEXT: lxvd2x vs10, 0, r3 +; BE-NEXT: addi r3, r1, 496 +; BE-NEXT: lxvd2x vs11, 0, r3 +; BE-NEXT: addi r3, r1, 512 +; BE-NEXT: lxvd2x vs12, 0, r3 +; BE-NEXT: addi r3, r1, 528 +; BE-NEXT: lxvd2x vs13, 0, r3 +; BE-NEXT: addi r3, r1, 544 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 560 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: li r3, 240 +; BE-NEXT: stxvd2x v3, r30, r3 +; BE-NEXT: li r3, 224 +; BE-NEXT: stxvd2x v2, r30, r3 +; BE-NEXT: li r3, 208 +; BE-NEXT: stxvd2x vs13, r30, r3 +; BE-NEXT: li r3, 192 +; BE-NEXT: stxvd2x vs12, r30, r3 +; BE-NEXT: li r3, 176 +; BE-NEXT: stxvd2x vs11, r30, r3 +; BE-NEXT: li r3, 160 +; BE-NEXT: stxvd2x vs10, r30, r3 +; BE-NEXT: li r3, 144 +; BE-NEXT: stxvd2x vs9, r30, r3 +; BE-NEXT: li r3, 128 +; BE-NEXT: stxvd2x vs8, r30, r3 +; BE-NEXT: li r3, 112 +; BE-NEXT: stxvd2x vs7, r30, r3 +; BE-NEXT: li r3, 96 +; BE-NEXT: stxvd2x vs6, r30, r3 +; BE-NEXT: li r3, 80 +; BE-NEXT: stxvd2x vs5, r30, r3 +; BE-NEXT: li r3, 64 +; BE-NEXT: stxvd2x vs4, r30, r3 +; BE-NEXT: li r3, 48 +; BE-NEXT: stxvd2x vs3, r30, r3 +; BE-NEXT: li r3, 32 +; BE-NEXT: stxvd2x vs2, r30, r3 +; BE-NEXT: li r3, 16 +; BE-NEXT: stxvd2x vs1, r30, r3 +; BE-NEXT: stxvd2x vs0, 0, r30 +; BE-NEXT: lfd f31, 856(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f30, 848(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f29, 840(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f28, 832(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f27, 824(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f26, 816(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f25, 808(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f24, 800(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f23, 792(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f22, 784(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f21, 776(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f20, 768(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f19, 760(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f18, 752(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f17, 744(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f16, 736(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f15, 728(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f14, 720(r1) # 8-byte Folded Reload +; BE-NEXT: ld r31, 712(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 704(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 696(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 688(r1) # 8-byte Folded Reload +; BE-NEXT: ld r27, 680(r1) # 8-byte Folded Reload +; BE-NEXT: ld r26, 672(r1) # 8-byte Folded Reload +; BE-NEXT: ld r25, 664(r1) # 8-byte Folded Reload +; BE-NEXT: ld r24, 656(r1) # 8-byte Folded Reload +; BE-NEXT: ld r23, 648(r1) # 8-byte Folded Reload +; BE-NEXT: ld r22, 640(r1) # 8-byte Folded Reload +; BE-NEXT: ld r21, 632(r1) # 8-byte Folded Reload +; BE-NEXT: ld r20, 624(r1) # 8-byte Folded Reload +; BE-NEXT: ld r19, 616(r1) # 8-byte Folded Reload +; BE-NEXT: ld r18, 608(r1) # 8-byte Folded Reload +; BE-NEXT: ld r17, 600(r1) # 8-byte Folded Reload +; BE-NEXT: ld r16, 592(r1) # 8-byte Folded Reload +; BE-NEXT: ld r15, 584(r1) # 8-byte Folded Reload +; BE-NEXT: ld r14, 576(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 864 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v32i64_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -688(r1) +; CHECK-NEXT: std r0, 704(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 688 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r14, -288 +; CHECK-NEXT: .cfi_offset r15, -280 +; CHECK-NEXT: .cfi_offset r16, -272 +; CHECK-NEXT: .cfi_offset r17, -264 +; CHECK-NEXT: .cfi_offset r18, -256 +; CHECK-NEXT: .cfi_offset r19, -248 +; CHECK-NEXT: .cfi_offset r20, -240 +; CHECK-NEXT: .cfi_offset r21, -232 +; CHECK-NEXT: .cfi_offset r22, -224 +; CHECK-NEXT: .cfi_offset r23, -216 +; CHECK-NEXT: .cfi_offset r24, -208 +; CHECK-NEXT: .cfi_offset r25, -200 +; CHECK-NEXT: .cfi_offset r26, -192 +; CHECK-NEXT: .cfi_offset r27, -184 +; CHECK-NEXT: .cfi_offset r28, -176 +; CHECK-NEXT: .cfi_offset r29, -168 +; CHECK-NEXT: .cfi_offset r30, -160 +; CHECK-NEXT: .cfi_offset r31, -152 +; CHECK-NEXT: .cfi_offset f14, -144 +; CHECK-NEXT: .cfi_offset f15, -136 +; CHECK-NEXT: .cfi_offset f16, -128 +; CHECK-NEXT: .cfi_offset f17, -120 +; CHECK-NEXT: .cfi_offset f18, -112 +; CHECK-NEXT: .cfi_offset f19, -104 +; CHECK-NEXT: .cfi_offset f20, -96 +; CHECK-NEXT: .cfi_offset f21, -88 +; CHECK-NEXT: .cfi_offset f22, -80 +; CHECK-NEXT: .cfi_offset f23, -72 +; CHECK-NEXT: .cfi_offset f24, -64 +; CHECK-NEXT: .cfi_offset f25, -56 +; CHECK-NEXT: .cfi_offset f26, -48 +; CHECK-NEXT: .cfi_offset f27, -40 +; CHECK-NEXT: .cfi_offset f28, -32 +; CHECK-NEXT: .cfi_offset f29, -24 +; CHECK-NEXT: .cfi_offset f30, -16 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: .cfi_offset v20, -480 +; CHECK-NEXT: .cfi_offset v21, -464 +; CHECK-NEXT: .cfi_offset v22, -448 +; CHECK-NEXT: .cfi_offset v23, -432 +; CHECK-NEXT: .cfi_offset v24, -416 +; CHECK-NEXT: .cfi_offset v25, -400 +; CHECK-NEXT: .cfi_offset v26, -384 +; CHECK-NEXT: .cfi_offset v27, -368 +; CHECK-NEXT: .cfi_offset v28, -352 +; CHECK-NEXT: .cfi_offset v29, -336 +; CHECK-NEXT: .cfi_offset v30, -320 +; CHECK-NEXT: .cfi_offset v31, -304 +; CHECK-NEXT: li r4, 208 +; CHECK-NEXT: std r14, 400(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r15, 408(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r16, 416(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r17, 424(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r18, 432(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r19, 440(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill +; CHECK-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 224 +; CHECK-NEXT: std r21, 456(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r22, 464(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r23, 472(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r24, 480(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r25, 488(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill +; CHECK-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 240 +; CHECK-NEXT: std r27, 504(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, 512(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, 520(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, 528(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill +; CHECK-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 256 +; CHECK-NEXT: stfd f14, 544(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f15, 552(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f16, 560(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f17, 568(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f18, 576(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill +; CHECK-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 272 +; CHECK-NEXT: stfd f20, 592(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f20, f2 +; CHECK-NEXT: stfd f21, 600(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f21, f3 +; CHECK-NEXT: stfd f22, 608(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f22, f4 +; CHECK-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 288 +; CHECK-NEXT: stfd f23, 616(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f23, f5 +; CHECK-NEXT: stfd f24, 624(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f24, f6 +; CHECK-NEXT: stfd f25, 632(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f25, f7 +; CHECK-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 304 +; CHECK-NEXT: stfd f26, 640(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f26, f8 +; CHECK-NEXT: stfd f27, 648(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f27, f9 +; CHECK-NEXT: stfd f28, 656(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f28, f10 +; CHECK-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 320 +; CHECK-NEXT: stfd f29, 664(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f29, f11 +; CHECK-NEXT: stfd f30, 672(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f30, f12 +; CHECK-NEXT: stfd f31, 680(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f31, f13 +; CHECK-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 336 +; CHECK-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 352 +; CHECK-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 368 +; CHECK-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 384 +; CHECK-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f20 +; CHECK-NEXT: std r3, 176(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f21 +; CHECK-NEXT: std r3, 160(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f22 +; CHECK-NEXT: std r3, 144(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f23 +; CHECK-NEXT: std r3, 128(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f24 +; CHECK-NEXT: std r3, 120(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f25 +; CHECK-NEXT: std r3, 112(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f26 +; CHECK-NEXT: std r3, 104(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f27 +; CHECK-NEXT: std r3, 96(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f28 +; CHECK-NEXT: std r3, 88(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: std r3, 80(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: std r3, 72(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: std r3, 64(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 832(r1) +; CHECK-NEXT: std r3, 56(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 840(r1) +; CHECK-NEXT: std r3, 48(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 848(r1) +; CHECK-NEXT: mr r15, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 856(r1) +; CHECK-NEXT: mr r14, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 864(r1) +; CHECK-NEXT: mr r31, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 872(r1) +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 880(r1) +; CHECK-NEXT: mr r28, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 888(r1) +; CHECK-NEXT: mr r27, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 896(r1) +; CHECK-NEXT: mr r26, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 904(r1) +; CHECK-NEXT: mr r25, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 912(r1) +; CHECK-NEXT: mr r24, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 920(r1) +; CHECK-NEXT: mr r23, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 928(r1) +; CHECK-NEXT: mr r22, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 936(r1) +; CHECK-NEXT: mr r21, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 944(r1) +; CHECK-NEXT: mr r20, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 952(r1) +; CHECK-NEXT: mr r19, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 960(r1) +; CHECK-NEXT: mr r18, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 968(r1) +; CHECK-NEXT: mr r17, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 976(r1) +; CHECK-NEXT: mr r16, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: li r3, 204 +; CHECK-NEXT: stxsspx f1, r1, r3 # 4-byte Folded Spill +; CHECK-NEXT: clrldi r3, r16, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: li r3, 200 +; CHECK-NEXT: stxsspx f1, r1, r3 # 4-byte Folded Spill +; CHECK-NEXT: clrldi r3, r17, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r18, 48 +; CHECK-NEXT: fmr f29, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r19, 48 +; CHECK-NEXT: fmr f28, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r20, 48 +; CHECK-NEXT: fmr f27, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r21, 48 +; CHECK-NEXT: fmr f26, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r22, 48 +; CHECK-NEXT: fmr f25, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r23, 48 +; CHECK-NEXT: fmr f24, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r24, 48 +; CHECK-NEXT: fmr f23, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r25, 48 +; CHECK-NEXT: fmr f22, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r26, 48 +; CHECK-NEXT: fmr f21, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r27, 48 +; CHECK-NEXT: fmr f20, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r28, 48 +; CHECK-NEXT: fmr f19, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r29, 48 +; CHECK-NEXT: fmr f18, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r31, 48 +; CHECK-NEXT: fmr f17, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r14, 48 +; CHECK-NEXT: fmr f16, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r15, 48 +; CHECK-NEXT: fmr f15, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 48(r1) # 8-byte Folded Reload +; CHECK-NEXT: fmr f14, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 56(r1) # 8-byte Folded Reload +; CHECK-NEXT: fmr f30, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 64(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v30, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 72(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v29, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 80(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v28, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 88(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v27, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 96(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v26, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 104(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v25, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 112(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v24, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 120(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v23, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 128(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v22, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 144(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v21, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 160(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v20, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 176(r1) # 8-byte Folded Reload +; CHECK-NEXT: fmr f31, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 176 +; CHECK-NEXT: xxlor f1, v20, v20 +; CHECK-NEXT: xxmrghd vs0, vs0, v31 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxlor f1, v21, v21 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: xxlor f1, v22, v22 +; CHECK-NEXT: xxmrghd vs0, vs0, v31 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxlor f1, v23, v23 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: xxlor f1, v24, v24 +; CHECK-NEXT: xxmrghd vs0, vs0, v31 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxlor f1, v25, v25 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: xxlor f1, v26, v26 +; CHECK-NEXT: xxmrghd vs0, vs0, v31 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxlor f1, v27, v27 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxlor f1, v28, v28 +; CHECK-NEXT: xxmrghd v27, vs0, v31 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxlor f1, v29, v29 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxlor f1, v30, v30 +; CHECK-NEXT: xxmrghd v29, vs0, v31 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f14 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f15 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f16 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v30, vs0, v30 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f17 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f18 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v28, vs0, v28 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f19 +; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f20 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v26, vs0, v26 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f21 +; CHECK-NEXT: mtvsrd v24, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f22 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v24, vs0, v24 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f23 +; CHECK-NEXT: mtvsrd v22, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f24 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v22, vs0, v22 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f25 +; CHECK-NEXT: mtvsrd v20, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f26 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v20, vs0, v20 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f27 +; CHECK-NEXT: mtvsrd v21, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f28 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v21, vs0, v21 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: mtvsrd v23, r3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 200 +; CHECK-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload +; CHECK-NEXT: xxmrghd v23, vs0, v23 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v25, r3 +; CHECK-NEXT: li r3, 204 +; CHECK-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 240 +; CHECK-NEXT: xxswapd vs1, v23 +; CHECK-NEXT: li r4, 128 +; CHECK-NEXT: xxswapd vs2, v21 +; CHECK-NEXT: xxswapd vs3, v31 +; CHECK-NEXT: xxmrghd v2, vs0, v25 +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: stxvd2x vs0, r30, r3 +; CHECK-NEXT: li r3, 224 +; CHECK-NEXT: stxvd2x vs1, r30, r3 +; CHECK-NEXT: li r3, 208 +; CHECK-NEXT: stxvd2x vs2, r30, r3 +; CHECK-NEXT: li r3, 192 +; CHECK-NEXT: xxswapd vs0, v20 +; CHECK-NEXT: stxvd2x vs0, r30, r3 +; CHECK-NEXT: li r3, 176 +; CHECK-NEXT: xxswapd vs1, v22 +; CHECK-NEXT: stxvd2x vs1, r30, r3 +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: xxswapd vs2, v28 +; CHECK-NEXT: xxswapd vs0, v24 +; CHECK-NEXT: stxvd2x vs0, r30, r3 +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: xxswapd vs1, v26 +; CHECK-NEXT: stxvd2x vs1, r30, r3 +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: stxvd2x vs2, r30, r3 +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: xxswapd vs0, v30 +; CHECK-NEXT: stxvd2x vs0, r30, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: stxvd2x vs3, r30, r3 +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lxvd2x vs2, r1, r4 # 16-byte Folded Reload +; CHECK-NEXT: li r4, 144 +; CHECK-NEXT: xxswapd vs1, v29 +; CHECK-NEXT: stxvd2x vs1, r30, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lxvd2x vs1, r1, r4 # 16-byte Folded Reload +; CHECK-NEXT: li r4, 160 +; CHECK-NEXT: lxvd2x vs3, r1, r4 # 16-byte Folded Reload +; CHECK-NEXT: li r4, 176 +; CHECK-NEXT: lxvd2x vs4, r1, r4 # 16-byte Folded Reload +; CHECK-NEXT: xxswapd vs0, v27 +; CHECK-NEXT: stxvd2x vs0, r30, r3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxswapd vs2, vs2 +; CHECK-NEXT: stxvd2x vs2, r30, r3 +; CHECK-NEXT: li r3, 32 +; CHECK-NEXT: xxswapd vs1, vs1 +; CHECK-NEXT: stxvd2x vs1, r30, r3 +; CHECK-NEXT: li r3, 16 +; CHECK-NEXT: xxswapd vs3, vs3 +; CHECK-NEXT: stxvd2x vs3, r30, r3 +; CHECK-NEXT: li r3, 384 +; CHECK-NEXT: xxswapd vs4, vs4 +; CHECK-NEXT: stxvd2x vs4, 0, r30 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 368 +; CHECK-NEXT: lfd f31, 680(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f30, 672(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f29, 664(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f28, 656(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f27, 648(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f26, 640(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f25, 632(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f24, 624(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f23, 616(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f22, 608(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f21, 600(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f20, 592(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f19, 584(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f18, 576(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f17, 568(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f16, 560(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 352 +; CHECK-NEXT: lfd f15, 552(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f14, 544(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r31, 536(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 528(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 520(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, 512(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 336 +; CHECK-NEXT: ld r27, 504(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, 496(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, 488(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r24, 480(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r23, 472(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r22, 464(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 320 +; CHECK-NEXT: ld r21, 456(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r20, 448(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r19, 440(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r18, 432(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r17, 424(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r16, 416(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 304 +; CHECK-NEXT: ld r15, 408(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r14, 400(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 288 +; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 272 +; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 256 +; CHECK-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 240 +; CHECK-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 224 +; CHECK-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 208 +; CHECK-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 688 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v32i64_v32f16: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -480(r1) +; FAST-NEXT: std r0, 496(r1) +; FAST-NEXT: .cfi_def_cfa_offset 480 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset r30, -160 +; FAST-NEXT: .cfi_offset f14, -144 +; FAST-NEXT: .cfi_offset f15, -136 +; FAST-NEXT: .cfi_offset f16, -128 +; FAST-NEXT: .cfi_offset f17, -120 +; FAST-NEXT: .cfi_offset f18, -112 +; FAST-NEXT: .cfi_offset f19, -104 +; FAST-NEXT: .cfi_offset f20, -96 +; FAST-NEXT: .cfi_offset f21, -88 +; FAST-NEXT: .cfi_offset f22, -80 +; FAST-NEXT: .cfi_offset f23, -72 +; FAST-NEXT: .cfi_offset f24, -64 +; FAST-NEXT: .cfi_offset f25, -56 +; FAST-NEXT: .cfi_offset f26, -48 +; FAST-NEXT: .cfi_offset f27, -40 +; FAST-NEXT: .cfi_offset f28, -32 +; FAST-NEXT: .cfi_offset f29, -24 +; FAST-NEXT: .cfi_offset f30, -16 +; FAST-NEXT: .cfi_offset f31, -8 +; FAST-NEXT: .cfi_offset v20, -352 +; FAST-NEXT: .cfi_offset v21, -336 +; FAST-NEXT: .cfi_offset v22, -320 +; FAST-NEXT: .cfi_offset v23, -304 +; FAST-NEXT: .cfi_offset v24, -288 +; FAST-NEXT: .cfi_offset v25, -272 +; FAST-NEXT: .cfi_offset v26, -256 +; FAST-NEXT: .cfi_offset v27, -240 +; FAST-NEXT: .cfi_offset v28, -224 +; FAST-NEXT: .cfi_offset v29, -208 +; FAST-NEXT: .cfi_offset v30, -192 +; FAST-NEXT: .cfi_offset v31, -176 +; FAST-NEXT: li r4, 128 +; FAST-NEXT: std r30, 320(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f14, 336(r1) # 8-byte Folded Spill +; FAST-NEXT: fmr f14, f5 +; FAST-NEXT: stfd f15, 344(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f16, 352(r1) # 8-byte Folded Spill +; FAST-NEXT: fmr f16, f4 +; FAST-NEXT: mr r30, r3 +; FAST-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 144 +; FAST-NEXT: stfd f17, 360(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f18, 368(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f19, 376(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f20, 384(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f21, 392(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill +; FAST-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 160 +; FAST-NEXT: stfd f23, 408(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f24, 416(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f25, 424(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f26, 432(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f27, 440(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill +; FAST-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 176 +; FAST-NEXT: xxlor v22, f3, f3 +; FAST-NEXT: stfd f29, 456(r1) # 8-byte Folded Spill +; FAST-NEXT: fmr f29, f9 +; FAST-NEXT: stfd f30, 464(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f31, 472(r1) # 8-byte Folded Spill +; FAST-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 192 +; FAST-NEXT: xxlor v23, f2, f2 +; FAST-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 208 +; FAST-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 224 +; FAST-NEXT: xxlor v25, f13, f13 +; FAST-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 240 +; FAST-NEXT: xxlor v26, f12, f12 +; FAST-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 256 +; FAST-NEXT: xxlor v27, f11, f11 +; FAST-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 272 +; FAST-NEXT: xxlor v28, f10, f10 +; FAST-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 288 +; FAST-NEXT: xxlor v29, f8, f8 +; FAST-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 304 +; FAST-NEXT: xxlor v30, f7, f7 +; FAST-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 44 +; FAST-NEXT: xxlor v31, f6, f6 +; FAST-NEXT: stxsspx f1, r1, r4 # 4-byte Folded Spill +; FAST-NEXT: lfs f1, 768(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 120 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 760(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 112 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 752(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 104 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 744(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 96 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 736(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 88 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 728(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 80 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 720(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 72 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 712(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 704(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 56 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 696(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 48 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 688(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: xxlor v21, f1, f1 +; FAST-NEXT: lfs f1, 680(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: xxlor v20, f1, f1 +; FAST-NEXT: lfs f1, 672(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: xxlor v24, f1, f1 +; FAST-NEXT: lfs f1, 664(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f31, f1 +; FAST-NEXT: lfs f1, 656(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f30, f1 +; FAST-NEXT: lfs f1, 648(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f28, f1 +; FAST-NEXT: lfs f1, 640(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f27, f1 +; FAST-NEXT: lfs f1, 632(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f26, f1 +; FAST-NEXT: lfs f1, 624(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f25, f1 +; FAST-NEXT: xxlor f1, v25, v25 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f24, f1 +; FAST-NEXT: xxlor f1, v26, v26 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f23, f1 +; FAST-NEXT: xxlor f1, v27, v27 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f22, f1 +; FAST-NEXT: xxlor f1, v28, v28 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f21, f1 +; FAST-NEXT: fmr f1, f29 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f20, f1 +; FAST-NEXT: xxlor f1, v29, v29 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f19, f1 +; FAST-NEXT: xxlor f1, v30, v30 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f18, f1 +; FAST-NEXT: xxlor f1, v31, v31 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f29, f1 +; FAST-NEXT: fmr f1, f14 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f14, f1 +; FAST-NEXT: fmr f1, f16 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f16, f1 +; FAST-NEXT: xxlor f1, v22, v22 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f17, f1 +; FAST-NEXT: xxlor f1, v23, v23 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 44 +; FAST-NEXT: fmr f15, f1 +; FAST-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fctid f3, f15 +; FAST-NEXT: fctid f4, f17 +; FAST-NEXT: mffprd r3, f3 +; FAST-NEXT: fctid f5, f16 +; FAST-NEXT: fctid f6, f14 +; FAST-NEXT: fctid f7, f18 +; FAST-NEXT: fctid f8, f19 +; FAST-NEXT: fctid f13, f1 +; FAST-NEXT: fctid f9, f20 +; FAST-NEXT: fctid f10, f22 +; FAST-NEXT: fctid f11, f24 +; FAST-NEXT: fctid f12, f25 +; FAST-NEXT: fctid f2, f23 +; FAST-NEXT: fctid f0, f21 +; FAST-NEXT: mtvsrd v2, r3 +; FAST-NEXT: mffprd r3, f4 +; FAST-NEXT: mtvsrd v3, r3 +; FAST-NEXT: mffprd r3, f5 +; FAST-NEXT: mtfprd f5, r3 +; FAST-NEXT: mffprd r3, f6 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: mffprd r3, f7 +; FAST-NEXT: mtfprd f6, r3 +; FAST-NEXT: mffprd r3, f8 +; FAST-NEXT: mtfprd f7, r3 +; FAST-NEXT: mffprd r3, f9 +; FAST-NEXT: mtfprd f3, r3 +; FAST-NEXT: mffprd r3, f10 +; FAST-NEXT: mtfprd f4, r3 +; FAST-NEXT: mffprd r3, f11 +; FAST-NEXT: fctid f11, f31 +; FAST-NEXT: lfd f31, 56(r1) # 8-byte Folded Reload +; FAST-NEXT: mtfprd f8, r3 +; FAST-NEXT: mffprd r3, f12 +; FAST-NEXT: xxlor f12, v24, v24 +; FAST-NEXT: fctid f31, f31 +; FAST-NEXT: fctid f12, f12 +; FAST-NEXT: mtfprd f9, r3 +; FAST-NEXT: mffprd r3, f13 +; FAST-NEXT: lfd f13, 48(r1) # 8-byte Folded Reload +; FAST-NEXT: mtfprd f10, r3 +; FAST-NEXT: fctid f13, f13 +; FAST-NEXT: xxmrghd v3, vs5, v3 +; FAST-NEXT: fctid f5, f26 +; FAST-NEXT: mffprd r3, f5 +; FAST-NEXT: mtfprd f5, r3 +; FAST-NEXT: xxmrghd v4, vs7, vs6 +; FAST-NEXT: fctid f6, f27 +; FAST-NEXT: fctid f7, f28 +; FAST-NEXT: mffprd r3, f6 +; FAST-NEXT: lfd f28, 96(r1) # 8-byte Folded Reload +; FAST-NEXT: fctid f28, f28 +; FAST-NEXT: mtfprd f6, r3 +; FAST-NEXT: mffprd r3, f7 +; FAST-NEXT: mtfprd f7, r3 +; FAST-NEXT: xxmrghd v2, v2, vs10 +; FAST-NEXT: fctid f10, f30 +; FAST-NEXT: mffprd r3, f10 +; FAST-NEXT: lfd f30, 80(r1) # 8-byte Folded Reload +; FAST-NEXT: fctid f30, f30 +; FAST-NEXT: mtfprd f10, r3 +; FAST-NEXT: mffprd r3, f11 +; FAST-NEXT: mtfprd f11, r3 +; FAST-NEXT: mffprd r3, f12 +; FAST-NEXT: mtfprd f12, r3 +; FAST-NEXT: xxmrghd v5, vs12, vs11 +; FAST-NEXT: xxlor f11, v20, v20 +; FAST-NEXT: xxlor f12, v21, v21 +; FAST-NEXT: fctid f11, f11 +; FAST-NEXT: fctid f12, f12 +; FAST-NEXT: mffprd r3, f11 +; FAST-NEXT: mtfprd f11, r3 +; FAST-NEXT: mffprd r3, f12 +; FAST-NEXT: mtfprd f12, r3 +; FAST-NEXT: mffprd r3, f13 +; FAST-NEXT: mtfprd f13, r3 +; FAST-NEXT: mffprd r3, f31 +; FAST-NEXT: lfd f31, 64(r1) # 8-byte Folded Reload +; FAST-NEXT: fctid f31, f31 +; FAST-NEXT: mtvsrd v0, r3 +; FAST-NEXT: mffprd r3, f31 +; FAST-NEXT: lfd f31, 72(r1) # 8-byte Folded Reload +; FAST-NEXT: mtvsrd v1, r3 +; FAST-NEXT: mffprd r3, f30 +; FAST-NEXT: lfd f30, 88(r1) # 8-byte Folded Reload +; FAST-NEXT: fctid f31, f31 +; FAST-NEXT: mtvsrd v6, r3 +; FAST-NEXT: mffprd r3, f28 +; FAST-NEXT: lfd f28, 104(r1) # 8-byte Folded Reload +; FAST-NEXT: fctid f30, f30 +; FAST-NEXT: fctid f28, f28 +; FAST-NEXT: mtvsrd v7, r3 +; FAST-NEXT: mffprd r3, f28 +; FAST-NEXT: lfd f28, 112(r1) # 8-byte Folded Reload +; FAST-NEXT: fctid f28, f28 +; FAST-NEXT: mtvsrd v8, r3 +; FAST-NEXT: mffprd r3, f28 +; FAST-NEXT: lfd f28, 120(r1) # 8-byte Folded Reload +; FAST-NEXT: fctid f28, f28 +; FAST-NEXT: xxmrghd v10, vs12, vs11 +; FAST-NEXT: xxmrghd v0, v0, vs13 +; FAST-NEXT: xxswapd vs12, v0 +; FAST-NEXT: xxmrghd v0, vs9, vs8 +; FAST-NEXT: xxmrghd v7, v8, v7 +; FAST-NEXT: mtvsrd v8, r3 +; FAST-NEXT: mffprd r3, f28 +; FAST-NEXT: mtvsrd v9, r3 +; FAST-NEXT: mffprd r3, f30 +; FAST-NEXT: xxswapd v7, v7 +; FAST-NEXT: xxmrghd v8, v9, v8 +; FAST-NEXT: mtvsrd v9, r3 +; FAST-NEXT: mffprd r3, f31 +; FAST-NEXT: xxswapd v8, v8 +; FAST-NEXT: xxmrghd v6, v9, v6 +; FAST-NEXT: mtvsrd v9, r3 +; FAST-NEXT: li r3, 240 +; FAST-NEXT: stxvd2x v8, r30, r3 +; FAST-NEXT: li r3, 224 +; FAST-NEXT: stxvd2x v7, r30, r3 +; FAST-NEXT: li r3, 208 +; FAST-NEXT: xxswapd vs11, v6 +; FAST-NEXT: xxmrghd v6, vs10, vs7 +; FAST-NEXT: stxvd2x vs11, r30, r3 +; FAST-NEXT: li r3, 192 +; FAST-NEXT: xxmrghd v1, v9, v1 +; FAST-NEXT: xxswapd vs11, v1 +; FAST-NEXT: xxmrghd v1, vs6, vs5 +; FAST-NEXT: xxswapd vs5, v10 +; FAST-NEXT: xxswapd vs6, v5 +; FAST-NEXT: stxvd2x vs11, r30, r3 +; FAST-NEXT: li r3, 176 +; FAST-NEXT: stxvd2x vs12, r30, r3 +; FAST-NEXT: li r3, 160 +; FAST-NEXT: stxvd2x vs5, r30, r3 +; FAST-NEXT: li r3, 144 +; FAST-NEXT: stxvd2x vs6, r30, r3 +; FAST-NEXT: mffprd r3, f2 +; FAST-NEXT: mtfprd f7, r3 +; FAST-NEXT: li r3, 128 +; FAST-NEXT: xxswapd vs5, v6 +; FAST-NEXT: stxvd2x vs5, r30, r3 +; FAST-NEXT: li r3, 112 +; FAST-NEXT: xxswapd vs2, v1 +; FAST-NEXT: xxswapd vs6, v0 +; FAST-NEXT: stxvd2x vs2, r30, r3 +; FAST-NEXT: li r3, 96 +; FAST-NEXT: fctid f2, f29 +; FAST-NEXT: stxvd2x vs6, r30, r3 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f2 +; FAST-NEXT: mtfprd f2, r3 +; FAST-NEXT: li r3, 80 +; FAST-NEXT: xxmrghd v5, vs7, vs4 +; FAST-NEXT: xxswapd vs4, v2 +; FAST-NEXT: xxmrghd v0, vs0, vs3 +; FAST-NEXT: xxswapd vs0, v5 +; FAST-NEXT: xxswapd vs3, v3 +; FAST-NEXT: stxvd2x vs0, r30, r3 +; FAST-NEXT: li r3, 64 +; FAST-NEXT: xxswapd vs0, v0 +; FAST-NEXT: stxvd2x vs0, r30, r3 +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxmrghd v5, vs2, vs1 +; FAST-NEXT: xxswapd vs1, v4 +; FAST-NEXT: stxvd2x vs1, r30, r3 +; FAST-NEXT: li r3, 32 +; FAST-NEXT: xxswapd vs2, v5 +; FAST-NEXT: stxvd2x vs2, r30, r3 +; FAST-NEXT: li r3, 16 +; FAST-NEXT: stxvd2x vs3, r30, r3 +; FAST-NEXT: li r3, 304 +; FAST-NEXT: stxvd2x vs4, 0, r30 +; FAST-NEXT: lfd f31, 472(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f30, 464(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f29, 456(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f28, 448(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f27, 440(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f26, 432(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f25, 424(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f24, 416(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f23, 408(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f22, 400(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f21, 392(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f20, 384(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f19, 376(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f18, 368(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f17, 360(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f16, 352(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f15, 344(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f14, 336(r1) # 8-byte Folded Reload +; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 288 +; FAST-NEXT: ld r30, 320(r1) # 8-byte Folded Reload +; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 272 +; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 256 +; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 240 +; FAST-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 224 +; FAST-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 208 +; FAST-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 192 +; FAST-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 176 +; FAST-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 160 +; FAST-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 144 +; FAST-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 128 +; FAST-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 480 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>) + +define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { +; BE-LABEL: llrint_v1i64_v1f32: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v1i64_v1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v1i64_v1f32: +; FAST: # %bb.0: +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: blr + %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>) + +define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { +; BE-LABEL: llrint_v2i64_v2f32: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -144(r1) +; BE-NEXT: std r0, 160(r1) +; BE-NEXT: .cfi_def_cfa_offset 144 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: stxvw4x v2, 0, r3 +; BE-NEXT: lfs f1, 116(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 112(r1) +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r1, r1, 144 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v2i64_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -64(r1) +; CHECK-NEXT: std r0, 80(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v2 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v31 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxmrghd v2, vs0, v31 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 64 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v2i64_v2f32: +; FAST: # %bb.0: +; FAST-NEXT: xxsldwi vs0, v2, v2, 3 +; FAST-NEXT: xxswapd vs1, v2 +; FAST-NEXT: xscvspdpn f0, vs0 +; FAST-NEXT: xscvspdpn f1, vs1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v2, vs1, vs0 +; FAST-NEXT: blr + %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>) + +define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { +; BE-LABEL: llrint_v4i64_v4f32: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -160(r1) +; BE-NEXT: std r0, 176(r1) +; BE-NEXT: .cfi_def_cfa_offset 160 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: stxvw4x v2, 0, r3 +; BE-NEXT: lfs f1, 116(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 112(r1) +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 124(r1) +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 120(r1) +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r1, r1, 160 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v4i64_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -80(r1) +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v2 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v31 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xscvspdpn f1, v31 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v30, vs0, v30 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxsldwi vs0, v31, v31, 1 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: xxmrghd v3, v31, vs0 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 80 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v4i64_v4f32: +; FAST: # %bb.0: +; FAST-NEXT: xxsldwi vs0, v2, v2, 3 +; FAST-NEXT: xxswapd vs1, v2 +; FAST-NEXT: xscvspdpn f0, vs0 +; FAST-NEXT: xxsldwi vs2, v2, v2, 1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v4, vs0, vs1 +; FAST-NEXT: xscvspdpn f0, v2 +; FAST-NEXT: vmr v2, v4 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs2 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v3, vs1, vs0 +; FAST-NEXT: blr + %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>) + +define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { +; BE-LABEL: llrint_v8i64_v8f32: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -208(r1) +; BE-NEXT: std r0, 224(r1) +; BE-NEXT: .cfi_def_cfa_offset 208 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: stxvw4x v2, 0, r3 +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: stxvw4x v3, 0, r3 +; BE-NEXT: lfs f1, 116(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 112(r1) +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 124(r1) +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 120(r1) +; BE-NEXT: std r3, 168(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 132(r1) +; BE-NEXT: std r3, 160(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 128(r1) +; BE-NEXT: std r3, 184(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 140(r1) +; BE-NEXT: std r3, 176(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 136(r1) +; BE-NEXT: std r3, 200(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 192(r1) +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 160 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r3, r1, 176 +; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: addi r3, r1, 192 +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: addi r1, r1, 208 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v8i64_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -112(r1) +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 112 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v28, -64 +; CHECK-NEXT: .cfi_offset v29, -48 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v30, v2 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v3 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v30 +; CHECK-NEXT: mtvsrd v29, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xscvspdpn f1, v30 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v29, vs0, v29 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxsldwi vs0, v30, v30, 1 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v30, v30, vs0 +; CHECK-NEXT: xxsldwi vs0, v31, v31, 3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v31 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xscvspdpn f1, v31 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v28, vs0, v28 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxsldwi vs0, v31, v31, 1 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v3, v30 +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: vmr v4, v28 +; CHECK-NEXT: xxmrghd v5, v31, vs0 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 112 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v8i64_v8f32: +; FAST: # %bb.0: +; FAST-NEXT: xxsldwi vs0, v2, v2, 3 +; FAST-NEXT: xxswapd vs1, v2 +; FAST-NEXT: xscvspdpn f0, vs0 +; FAST-NEXT: xxsldwi vs2, v2, v2, 1 +; FAST-NEXT: xxsldwi vs3, v3, v3, 3 +; FAST-NEXT: xxswapd vs4, v3 +; FAST-NEXT: xxsldwi vs5, v3, v3, 1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v0, vs0, vs1 +; FAST-NEXT: xscvspdpn f0, v2 +; FAST-NEXT: vmr v2, v0 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs2 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v1, vs1, vs0 +; FAST-NEXT: xscvspdpn f0, vs3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs4 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v4, vs0, vs1 +; FAST-NEXT: xscvspdpn f0, v3 +; FAST-NEXT: vmr v3, v1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs5 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v5, vs1, vs0 +; FAST-NEXT: blr + %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) + +define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { +; BE-LABEL: llrint_v16i64_v16f32: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -304(r1) +; BE-NEXT: std r0, 320(r1) +; BE-NEXT: .cfi_def_cfa_offset 304 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: stxvw4x v2, 0, r3 +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: stxvw4x v3, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: stxvw4x v4, 0, r3 +; BE-NEXT: addi r3, r1, 160 +; BE-NEXT: stxvw4x v5, 0, r3 +; BE-NEXT: lfs f1, 116(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 112(r1) +; BE-NEXT: std r3, 184(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 124(r1) +; BE-NEXT: std r3, 176(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 120(r1) +; BE-NEXT: std r3, 200(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 132(r1) +; BE-NEXT: std r3, 192(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 128(r1) +; BE-NEXT: std r3, 216(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 140(r1) +; BE-NEXT: std r3, 208(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 136(r1) +; BE-NEXT: std r3, 232(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 148(r1) +; BE-NEXT: std r3, 224(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 144(r1) +; BE-NEXT: std r3, 248(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 156(r1) +; BE-NEXT: std r3, 240(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 152(r1) +; BE-NEXT: std r3, 264(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 164(r1) +; BE-NEXT: std r3, 256(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 160(r1) +; BE-NEXT: std r3, 280(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 172(r1) +; BE-NEXT: std r3, 272(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 168(r1) +; BE-NEXT: std r3, 296(r1) +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 288(r1) +; BE-NEXT: addi r3, r1, 176 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 192 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r3, r1, 208 +; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: addi r3, r1, 224 +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: addi r3, r1, 240 +; BE-NEXT: lxvd2x v6, 0, r3 +; BE-NEXT: addi r3, r1, 256 +; BE-NEXT: lxvd2x v7, 0, r3 +; BE-NEXT: addi r3, r1, 272 +; BE-NEXT: lxvd2x v8, 0, r3 +; BE-NEXT: addi r3, r1, 288 +; BE-NEXT: lxvd2x v9, 0, r3 +; BE-NEXT: addi r1, r1, 304 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v16i64_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -176(r1) +; CHECK-NEXT: std r0, 192(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 176 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v24, -128 +; CHECK-NEXT: .cfi_offset v25, -112 +; CHECK-NEXT: .cfi_offset v26, -96 +; CHECK-NEXT: .cfi_offset v27, -80 +; CHECK-NEXT: .cfi_offset v28, -64 +; CHECK-NEXT: .cfi_offset v29, -48 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v26, v3 +; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: vmr v28, v4 +; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: vmr v29, v2 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v5 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v29 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xscvspdpn f1, v29 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v30, vs0, v30 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxsldwi vs0, v29, v29, 1 +; CHECK-NEXT: mtvsrd v29, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v29, v29, vs0 +; CHECK-NEXT: xxsldwi vs0, v26, v26, 3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v26 +; CHECK-NEXT: mtvsrd v27, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xscvspdpn f1, v26 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v27, vs0, v27 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxsldwi vs0, v26, v26, 1 +; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v26, v26, vs0 +; CHECK-NEXT: xxsldwi vs0, v28, v28, 3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v28 +; CHECK-NEXT: mtvsrd v25, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xscvspdpn f1, v28 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v25, vs0, v25 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxsldwi vs0, v28, v28, 1 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v28, v28, vs0 +; CHECK-NEXT: xxsldwi vs0, v31, v31, 3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v31 +; CHECK-NEXT: mtvsrd v24, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xscvspdpn f1, v31 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v24, vs0, v24 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxsldwi vs0, v31, v31, 1 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: vmr v3, v29 +; CHECK-NEXT: vmr v7, v28 +; CHECK-NEXT: vmr v4, v27 +; CHECK-NEXT: vmr v5, v26 +; CHECK-NEXT: vmr v6, v25 +; CHECK-NEXT: vmr v8, v24 +; CHECK-NEXT: xxmrghd v9, v31, vs0 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 176 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v16i64_v16f32: +; FAST: # %bb.0: +; FAST-NEXT: xxsldwi vs0, v2, v2, 3 +; FAST-NEXT: xxswapd vs1, v2 +; FAST-NEXT: xscvspdpn f0, vs0 +; FAST-NEXT: xxsldwi vs2, v2, v2, 1 +; FAST-NEXT: xxsldwi vs3, v3, v3, 3 +; FAST-NEXT: xxswapd vs4, v3 +; FAST-NEXT: xxsldwi vs5, v3, v3, 1 +; FAST-NEXT: xxsldwi vs6, v4, v4, 3 +; FAST-NEXT: xxswapd vs7, v4 +; FAST-NEXT: xxsldwi vs8, v4, v4, 1 +; FAST-NEXT: xxsldwi vs9, v5, v5, 3 +; FAST-NEXT: xxswapd vs10, v5 +; FAST-NEXT: xxsldwi vs11, v5, v5, 1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v0, vs0, vs1 +; FAST-NEXT: xscvspdpn f0, v2 +; FAST-NEXT: vmr v2, v0 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs2 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v1, vs1, vs0 +; FAST-NEXT: xscvspdpn f0, vs3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs4 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v10, vs0, vs1 +; FAST-NEXT: xscvspdpn f0, v3 +; FAST-NEXT: vmr v3, v1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs5 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v11, vs1, vs0 +; FAST-NEXT: xscvspdpn f0, vs6 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs7 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v6, vs0, vs1 +; FAST-NEXT: xscvspdpn f0, v4 +; FAST-NEXT: xscvspdpn f1, vs8 +; FAST-NEXT: vmr v4, v10 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v7, vs0, vs1 +; FAST-NEXT: xscvspdpn f0, vs9 +; FAST-NEXT: xscvspdpn f1, vs10 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v8, vs1, vs0 +; FAST-NEXT: xscvspdpn f0, v5 +; FAST-NEXT: xscvspdpn f1, vs11 +; FAST-NEXT: vmr v5, v11 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v9, vs0, vs1 +; FAST-NEXT: blr + %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) + +define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { +; BE-LABEL: llrint_v1i64_v1f64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v1i64_v1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl llrint +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v1i64_v1f64: +; FAST: # %bb.0: +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: blr + %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>) + +define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { +; BE-LABEL: llrint_v2i64_v2f64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -160(r1) +; BE-NEXT: std r0, 176(r1) +; BE-NEXT: .cfi_def_cfa_offset 160 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset v31, -16 +; BE-NEXT: li r3, 144 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v2 +; BE-NEXT: xxlor f1, v31, v31 +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: xxswapd vs1, v31 +; BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: li r3, 144 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 160 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v2i64_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -64(r1) +; CHECK-NEXT: std r0, 80(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v2 +; CHECK-NEXT: xxlor f1, v31, v31 +; CHECK-NEXT: bl llrint +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs1, v31 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: bl llrint +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxmrghd v2, v31, vs0 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 64 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v2i64_v2f64: +; FAST: # %bb.0: +; FAST-NEXT: xxlor f1, v2, v2 +; FAST-NEXT: xxswapd vs0, v2 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v2, vs1, vs0 +; FAST-NEXT: blr + %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>) + +define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { +; BE-LABEL: llrint_v4i64_v4f64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -192(r1) +; BE-NEXT: std r0, 208(r1) +; BE-NEXT: .cfi_def_cfa_offset 192 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset v30, -32 +; BE-NEXT: .cfi_offset v31, -16 +; BE-NEXT: li r3, 160 +; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v30, v2 +; BE-NEXT: li r3, 176 +; BE-NEXT: xxlor f1, v30, v30 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v3 +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: xxswapd vs1, v30 +; BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: xxlor f1, v31, v31 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: xxswapd vs1, v31 +; BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: li r3, 176 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 160 +; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 192 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v4i64_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -80(r1) +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v30, v2 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: xxlor f1, v30, v30 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v3 +; CHECK-NEXT: bl llrint +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs1, v30 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: bl llrint +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxlor f1, v31, v31 +; CHECK-NEXT: xxmrghd v30, v30, vs0 +; CHECK-NEXT: bl llrint +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs1, v31 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: bl llrint +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: xxmrghd v3, v31, vs0 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 80 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v4i64_v4f64: +; FAST: # %bb.0: +; FAST-NEXT: xxswapd vs0, v2 +; FAST-NEXT: xxlor f2, v2, v2 +; FAST-NEXT: xxswapd vs1, v3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: fctid f2, f2 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: mffprd r4, f0 +; FAST-NEXT: xxlor f0, v3, v3 +; FAST-NEXT: mffprd r3, f2 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mtfprd f2, r4 +; FAST-NEXT: mffprd r5, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v2, vs0, vs2 +; FAST-NEXT: mtfprd f0, r5 +; FAST-NEXT: xxmrghd v3, vs0, vs1 +; FAST-NEXT: blr + %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) + +define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { +; BE-LABEL: llrint_v8i64_v8f64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -256(r1) +; BE-NEXT: std r0, 272(r1) +; BE-NEXT: .cfi_def_cfa_offset 256 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset v28, -64 +; BE-NEXT: .cfi_offset v29, -48 +; BE-NEXT: .cfi_offset v30, -32 +; BE-NEXT: .cfi_offset v31, -16 +; BE-NEXT: li r3, 192 +; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 208 +; BE-NEXT: vmr v28, v2 +; BE-NEXT: xxlor f1, v28, v28 +; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 224 +; BE-NEXT: vmr v29, v3 +; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 240 +; BE-NEXT: vmr v30, v4 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v5 +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: xxswapd vs1, v28 +; BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: xxlor f1, v29, v29 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: xxswapd vs1, v29 +; BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: xxlor f1, v30, v30 +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: std r3, 160(r1) +; BE-NEXT: xxswapd vs1, v30 +; BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: xxlor f1, v31, v31 +; BE-NEXT: std r3, 168(r1) +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: std r3, 176(r1) +; BE-NEXT: xxswapd vs1, v31 +; BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: std r3, 184(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r3, r1, 160 +; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: addi r3, r1, 176 +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: li r3, 240 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 224 +; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 208 +; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 192 +; BE-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 256 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v8i64_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -112(r1) +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 112 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v28, -64 +; CHECK-NEXT: .cfi_offset v29, -48 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: vmr v28, v2 +; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: xxlor f1, v28, v28 +; CHECK-NEXT: vmr v29, v3 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v30, v4 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v5 +; CHECK-NEXT: bl llrint +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs1, v28 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: bl llrint +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxlor f1, v29, v29 +; CHECK-NEXT: xxmrghd v28, v28, vs0 +; CHECK-NEXT: bl llrint +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs1, v29 +; CHECK-NEXT: mtvsrd v29, r3 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: bl llrint +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxlor f1, v30, v30 +; CHECK-NEXT: xxmrghd v29, v29, vs0 +; CHECK-NEXT: bl llrint +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs1, v30 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: bl llrint +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxlor f1, v31, v31 +; CHECK-NEXT: xxmrghd v30, v30, vs0 +; CHECK-NEXT: bl llrint +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs1, v31 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: bl llrint +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v4, v30 +; CHECK-NEXT: vmr v3, v29 +; CHECK-NEXT: vmr v2, v28 +; CHECK-NEXT: xxmrghd v5, v31, vs0 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 112 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v8i64_v8f64: +; FAST: # %bb.0: +; FAST-NEXT: xxswapd vs0, v2 +; FAST-NEXT: xxswapd vs1, v3 +; FAST-NEXT: xxlor f4, v2, v2 +; FAST-NEXT: xxswapd vs2, v4 +; FAST-NEXT: xxswapd vs3, v5 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: fctid f4, f4 +; FAST-NEXT: mffprd r4, f0 +; FAST-NEXT: xxlor f0, v3, v3 +; FAST-NEXT: mffprd r3, f4 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r5, f0 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mtfprd f1, r4 +; FAST-NEXT: mffprd r6, f0 +; FAST-NEXT: xxlor f0, v4, v4 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mtfprd f4, r6 +; FAST-NEXT: mffprd r7, f0 +; FAST-NEXT: fctid f0, f2 +; FAST-NEXT: mtfprd f2, r5 +; FAST-NEXT: mtfprd f5, r7 +; FAST-NEXT: mffprd r8, f0 +; FAST-NEXT: xxlor f0, v5, v5 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mtfprd f6, r8 +; FAST-NEXT: mffprd r9, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v3, vs2, vs4 +; FAST-NEXT: xxmrghd v4, vs5, vs6 +; FAST-NEXT: xxmrghd v2, vs0, vs1 +; FAST-NEXT: fctid f1, f3 +; FAST-NEXT: mtfprd f0, r9 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v5, vs0, vs1 +; FAST-NEXT: blr + %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) diff --git a/llvm/test/CodeGen/PowerPC/vector-lrint.ll b/llvm/test/CodeGen/PowerPC/vector-lrint.ll new file mode 100644 index 0000000000000..9667a26120149 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vector-lrint.ll @@ -0,0 +1,4859 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=BE +; RUN: sed 's/iXLen/i32/g' %s | llc -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs --enable-unsafe-fp-math | \ +; RUN: FileCheck %s --check-prefixes=FAST +; RUN: sed 's/iXLen/i64/g' %s | llc -mcpu=pwr7 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=BE +; RUN: sed 's/iXLen/i64/g' %s | llc -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs --enable-unsafe-fp-math | \ +; RUN: FileCheck %s --check-prefixes=FAST + +define <1 x i64> @lrint_v1f16(<1 x half> %x) { +; BE-LABEL: lrint_v1f16: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v1f16: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -32(r1) +; FAST-NEXT: std r0, 48(r1) +; FAST-NEXT: .cfi_def_cfa_offset 32 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: addi r1, r1, 32 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half>) + +define <2 x i64> @lrint_v2f16(<2 x half> %x) { +; BE-LABEL: lrint_v2f16: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -160(r1) +; BE-NEXT: std r0, 176(r1) +; BE-NEXT: .cfi_def_cfa_offset 160 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset r30, -24 +; BE-NEXT: .cfi_offset f31, -8 +; BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f31, f1 +; BE-NEXT: fmr f1, f2 +; BE-NEXT: std r30, 136(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: mr r30, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r30, 48 +; BE-NEXT: fmr f31, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: std r3, 120(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 112(r1) +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: ld r30, 136(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f31, 152(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r1, r1, 160 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -96(r1) +; CHECK-NEXT: std r0, 112(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -24 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: .cfi_offset v31, -48 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r30, 72(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f31, 88(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f31, f2 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r30, 48 +; CHECK-NEXT: fmr f31, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lfd f31, 88(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 72(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v2, vs0, v31 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 96 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v2f16: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: .cfi_def_cfa_offset 48 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset f30, -16 +; FAST-NEXT: .cfi_offset f31, -8 +; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; FAST-NEXT: stdu r1, -48(r1) +; FAST-NEXT: fmr f31, f1 +; FAST-NEXT: fmr f1, f2 +; FAST-NEXT: std r0, 64(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f30, f1 +; FAST-NEXT: fmr f1, f31 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: fctid f1, f30 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v2, vs1, vs0 +; FAST-NEXT: addi r1, r1, 48 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half>) + +define <4 x i64> @lrint_v4f16(<4 x half> %x) { +; BE-LABEL: lrint_v4f16: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -208(r1) +; BE-NEXT: std r0, 224(r1) +; BE-NEXT: .cfi_def_cfa_offset 208 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset r28, -56 +; BE-NEXT: .cfi_offset r29, -48 +; BE-NEXT: .cfi_offset r30, -40 +; BE-NEXT: .cfi_offset f29, -24 +; BE-NEXT: .cfi_offset f30, -16 +; BE-NEXT: .cfi_offset f31, -8 +; BE-NEXT: stfd f29, 184(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f29, f1 +; BE-NEXT: fmr f1, f2 +; BE-NEXT: std r28, 152(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 160(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 168(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f30, 192(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f31, 200(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f31, f4 +; BE-NEXT: fmr f30, f3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: mr r30, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: mr r29, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: mr r28, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r28, 48 +; BE-NEXT: fmr f31, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r29, 48 +; BE-NEXT: fmr f30, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r30, 48 +; BE-NEXT: fmr f29, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: std r3, 120(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: std r3, 112(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: ld r30, 168(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f31, 200(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f30, 192(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f29, 184(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: ld r29, 160(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 152(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r1, r1, 208 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -144(r1) +; CHECK-NEXT: std r0, 160(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 144 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r28, -56 +; CHECK-NEXT: .cfi_offset r29, -48 +; CHECK-NEXT: .cfi_offset r30, -40 +; CHECK-NEXT: .cfi_offset f29, -24 +; CHECK-NEXT: .cfi_offset f30, -16 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: .cfi_offset v30, -96 +; CHECK-NEXT: .cfi_offset v31, -80 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r28, 88(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, 96(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, 104(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f29, 120(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f29, f2 +; CHECK-NEXT: stfd f30, 128(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f30, f3 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f31, f4 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mr r28, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r28, 48 +; CHECK-NEXT: fmr f31, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r29, 48 +; CHECK-NEXT: fmr f30, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r30, 48 +; CHECK-NEXT: fmr f29, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: lfd f31, 136(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f30, 128(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f29, 120(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 104(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 96(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: ld r28, 88(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v3, vs0, v30 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 144 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v4f16: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: .cfi_def_cfa_offset 64 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset f28, -32 +; FAST-NEXT: .cfi_offset f29, -24 +; FAST-NEXT: .cfi_offset f30, -16 +; FAST-NEXT: .cfi_offset f31, -8 +; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; FAST-NEXT: stdu r1, -64(r1) +; FAST-NEXT: fmr f29, f1 +; FAST-NEXT: fmr f1, f4 +; FAST-NEXT: std r0, 80(r1) +; FAST-NEXT: fmr f31, f3 +; FAST-NEXT: fmr f30, f2 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f28, f1 +; FAST-NEXT: fmr f1, f31 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f31, f1 +; FAST-NEXT: fmr f1, f30 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f30, f1 +; FAST-NEXT: fmr f1, f29 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fctid f0, f30 +; FAST-NEXT: fctid f2, f31 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f2 +; FAST-NEXT: mtfprd f2, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v2, vs0, vs1 +; FAST-NEXT: fctid f0, f28 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v3, vs0, vs2 +; FAST-NEXT: addi r1, r1, 64 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload +; FAST-NEXT: mtlr r0 +; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload +; FAST-NEXT: blr + %a = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half>) + +define <8 x i64> @lrint_v8f16(<8 x half> %x) { +; BE-LABEL: lrint_v8f16: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -304(r1) +; BE-NEXT: std r0, 320(r1) +; BE-NEXT: .cfi_def_cfa_offset 304 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset r24, -120 +; BE-NEXT: .cfi_offset r25, -112 +; BE-NEXT: .cfi_offset r26, -104 +; BE-NEXT: .cfi_offset r27, -96 +; BE-NEXT: .cfi_offset r28, -88 +; BE-NEXT: .cfi_offset r29, -80 +; BE-NEXT: .cfi_offset r30, -72 +; BE-NEXT: .cfi_offset f25, -56 +; BE-NEXT: .cfi_offset f26, -48 +; BE-NEXT: .cfi_offset f27, -40 +; BE-NEXT: .cfi_offset f28, -32 +; BE-NEXT: .cfi_offset f29, -24 +; BE-NEXT: .cfi_offset f30, -16 +; BE-NEXT: .cfi_offset f31, -8 +; BE-NEXT: stfd f25, 248(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f25, f1 +; BE-NEXT: fmr f1, f2 +; BE-NEXT: std r24, 184(r1) # 8-byte Folded Spill +; BE-NEXT: std r25, 192(r1) # 8-byte Folded Spill +; BE-NEXT: std r26, 200(r1) # 8-byte Folded Spill +; BE-NEXT: std r27, 208(r1) # 8-byte Folded Spill +; BE-NEXT: std r28, 216(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 224(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 232(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f26, 256(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f27, 264(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f28, 272(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f29, 280(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f30, 288(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f31, 296(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f31, f8 +; BE-NEXT: fmr f30, f7 +; BE-NEXT: fmr f29, f6 +; BE-NEXT: fmr f28, f5 +; BE-NEXT: fmr f27, f4 +; BE-NEXT: fmr f26, f3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f25 +; BE-NEXT: mr r30, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f27 +; BE-NEXT: mr r29, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f26 +; BE-NEXT: mr r28, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: mr r27, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f28 +; BE-NEXT: mr r26, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: mr r25, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: mr r24, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r24, 48 +; BE-NEXT: fmr f31, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r25, 48 +; BE-NEXT: fmr f30, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r26, 48 +; BE-NEXT: fmr f29, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r27, 48 +; BE-NEXT: fmr f28, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r28, 48 +; BE-NEXT: fmr f27, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r29, 48 +; BE-NEXT: fmr f26, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r30, 48 +; BE-NEXT: fmr f25, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f25 +; BE-NEXT: std r3, 120(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f26 +; BE-NEXT: std r3, 112(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f27 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f28 +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: std r3, 168(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 160(r1) +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: ld r30, 232(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f31, 296(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f30, 288(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f29, 280(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lfd f28, 272(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f27, 264(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f26, 256(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 224(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 216(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lfd f25, 248(r1) # 8-byte Folded Reload +; BE-NEXT: ld r27, 208(r1) # 8-byte Folded Reload +; BE-NEXT: ld r26, 200(r1) # 8-byte Folded Reload +; BE-NEXT: ld r25, 192(r1) # 8-byte Folded Reload +; BE-NEXT: ld r24, 184(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: addi r3, r1, 160 +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: addi r1, r1, 304 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -240(r1) +; CHECK-NEXT: std r0, 256(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 240 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r24, -120 +; CHECK-NEXT: .cfi_offset r25, -112 +; CHECK-NEXT: .cfi_offset r26, -104 +; CHECK-NEXT: .cfi_offset r27, -96 +; CHECK-NEXT: .cfi_offset r28, -88 +; CHECK-NEXT: .cfi_offset r29, -80 +; CHECK-NEXT: .cfi_offset r30, -72 +; CHECK-NEXT: .cfi_offset f25, -56 +; CHECK-NEXT: .cfi_offset f26, -48 +; CHECK-NEXT: .cfi_offset f27, -40 +; CHECK-NEXT: .cfi_offset f28, -32 +; CHECK-NEXT: .cfi_offset f29, -24 +; CHECK-NEXT: .cfi_offset f30, -16 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: .cfi_offset v28, -192 +; CHECK-NEXT: .cfi_offset v29, -176 +; CHECK-NEXT: .cfi_offset v30, -160 +; CHECK-NEXT: .cfi_offset v31, -144 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r24, 120(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r25, 128(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r26, 136(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r27, 144(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, 152(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, 160(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill +; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stfd f25, 184(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f25, f2 +; CHECK-NEXT: stfd f26, 192(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f26, f3 +; CHECK-NEXT: stfd f27, 200(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f27, f4 +; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: stfd f28, 208(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f28, f5 +; CHECK-NEXT: stfd f29, 216(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f29, f6 +; CHECK-NEXT: stfd f30, 224(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f30, f7 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: stfd f31, 232(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f31, f8 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f25 +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f26 +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f27 +; CHECK-NEXT: mr r28, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f28 +; CHECK-NEXT: mr r27, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: mr r26, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: mr r25, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mr r24, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r24, 48 +; CHECK-NEXT: fmr f31, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r25, 48 +; CHECK-NEXT: fmr f30, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r26, 48 +; CHECK-NEXT: fmr f29, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r27, 48 +; CHECK-NEXT: fmr f28, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r28, 48 +; CHECK-NEXT: fmr f27, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r29, 48 +; CHECK-NEXT: fmr f26, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r30, 48 +; CHECK-NEXT: fmr f25, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f25 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f26 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f27 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f28 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v30, vs0, v30 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: mtvsrd v29, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v29, vs0, v29 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: lfd f31, 232(r1) # 8-byte Folded Reload +; CHECK-NEXT: vmr v3, v30 +; CHECK-NEXT: vmr v4, v29 +; CHECK-NEXT: lfd f30, 224(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f29, 216(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lfd f28, 208(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f27, 200(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f26, 192(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f25, 184(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 168(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 160(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: ld r28, 152(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r27, 144(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v5, vs0, v28 +; CHECK-NEXT: ld r26, 136(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, 128(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r24, 120(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 240 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v8f16: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: .cfi_def_cfa_offset 96 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset f24, -64 +; FAST-NEXT: .cfi_offset f25, -56 +; FAST-NEXT: .cfi_offset f26, -48 +; FAST-NEXT: .cfi_offset f27, -40 +; FAST-NEXT: .cfi_offset f28, -32 +; FAST-NEXT: .cfi_offset f29, -24 +; FAST-NEXT: .cfi_offset f30, -16 +; FAST-NEXT: .cfi_offset f31, -8 +; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; FAST-NEXT: stdu r1, -96(r1) +; FAST-NEXT: fmr f24, f1 +; FAST-NEXT: fmr f1, f8 +; FAST-NEXT: std r0, 112(r1) +; FAST-NEXT: fmr f30, f7 +; FAST-NEXT: fmr f29, f6 +; FAST-NEXT: fmr f28, f5 +; FAST-NEXT: fmr f27, f4 +; FAST-NEXT: fmr f26, f3 +; FAST-NEXT: fmr f25, f2 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f31, f1 +; FAST-NEXT: fmr f1, f30 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f30, f1 +; FAST-NEXT: fmr f1, f29 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f29, f1 +; FAST-NEXT: fmr f1, f28 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f28, f1 +; FAST-NEXT: fmr f1, f27 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f27, f1 +; FAST-NEXT: fmr f1, f26 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f26, f1 +; FAST-NEXT: fmr f1, f25 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f25, f1 +; FAST-NEXT: fmr f1, f24 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fctid f0, f25 +; FAST-NEXT: fctid f2, f26 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: fctid f3, f27 +; FAST-NEXT: fctid f4, f28 +; FAST-NEXT: fctid f5, f29 +; FAST-NEXT: fctid f6, f30 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f2 +; FAST-NEXT: mtfprd f2, r3 +; FAST-NEXT: mffprd r3, f3 +; FAST-NEXT: mtfprd f3, r3 +; FAST-NEXT: mffprd r3, f4 +; FAST-NEXT: mtfprd f4, r3 +; FAST-NEXT: mffprd r3, f5 +; FAST-NEXT: mtfprd f5, r3 +; FAST-NEXT: mffprd r3, f6 +; FAST-NEXT: mtfprd f6, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v3, vs3, vs2 +; FAST-NEXT: xxmrghd v4, vs5, vs4 +; FAST-NEXT: xxmrghd v2, vs0, vs1 +; FAST-NEXT: fctid f0, f31 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v5, vs0, vs6 +; FAST-NEXT: addi r1, r1, 96 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload +; FAST-NEXT: mtlr r0 +; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload +; FAST-NEXT: blr + %a = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half>) + +define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { +; BE-LABEL: lrint_v16i64_v16f16: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -496(r1) +; BE-NEXT: std r0, 512(r1) +; BE-NEXT: .cfi_def_cfa_offset 496 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset r16, -248 +; BE-NEXT: .cfi_offset r17, -240 +; BE-NEXT: .cfi_offset r18, -232 +; BE-NEXT: .cfi_offset r19, -224 +; BE-NEXT: .cfi_offset r20, -216 +; BE-NEXT: .cfi_offset r21, -208 +; BE-NEXT: .cfi_offset r22, -200 +; BE-NEXT: .cfi_offset r23, -192 +; BE-NEXT: .cfi_offset r24, -184 +; BE-NEXT: .cfi_offset r25, -176 +; BE-NEXT: .cfi_offset r26, -168 +; BE-NEXT: .cfi_offset r27, -160 +; BE-NEXT: .cfi_offset r28, -152 +; BE-NEXT: .cfi_offset r29, -144 +; BE-NEXT: .cfi_offset r30, -136 +; BE-NEXT: .cfi_offset f17, -120 +; BE-NEXT: .cfi_offset f18, -112 +; BE-NEXT: .cfi_offset f19, -104 +; BE-NEXT: .cfi_offset f20, -96 +; BE-NEXT: .cfi_offset f21, -88 +; BE-NEXT: .cfi_offset f22, -80 +; BE-NEXT: .cfi_offset f23, -72 +; BE-NEXT: .cfi_offset f24, -64 +; BE-NEXT: .cfi_offset f25, -56 +; BE-NEXT: .cfi_offset f26, -48 +; BE-NEXT: .cfi_offset f27, -40 +; BE-NEXT: .cfi_offset f28, -32 +; BE-NEXT: .cfi_offset f29, -24 +; BE-NEXT: .cfi_offset f30, -16 +; BE-NEXT: .cfi_offset f31, -8 +; BE-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f20, f1 +; BE-NEXT: fmr f1, f2 +; BE-NEXT: std r16, 248(r1) # 8-byte Folded Spill +; BE-NEXT: std r17, 256(r1) # 8-byte Folded Spill +; BE-NEXT: std r18, 264(r1) # 8-byte Folded Spill +; BE-NEXT: std r19, 272(r1) # 8-byte Folded Spill +; BE-NEXT: std r20, 280(r1) # 8-byte Folded Spill +; BE-NEXT: std r21, 288(r1) # 8-byte Folded Spill +; BE-NEXT: std r22, 296(r1) # 8-byte Folded Spill +; BE-NEXT: std r23, 304(r1) # 8-byte Folded Spill +; BE-NEXT: std r24, 312(r1) # 8-byte Folded Spill +; BE-NEXT: std r25, 320(r1) # 8-byte Folded Spill +; BE-NEXT: std r26, 328(r1) # 8-byte Folded Spill +; BE-NEXT: std r27, 336(r1) # 8-byte Folded Spill +; BE-NEXT: std r28, 344(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 352(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 360(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f17, 376(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f18, 384(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f19, 392(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f21, 408(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f22, 416(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f23, 424(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f24, 432(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f25, 440(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f26, 448(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f27, 456(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f28, 464(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f29, 472(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f30, 480(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f31, 488(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f31, f13 +; BE-NEXT: fmr f29, f12 +; BE-NEXT: fmr f30, f11 +; BE-NEXT: fmr f28, f10 +; BE-NEXT: fmr f27, f9 +; BE-NEXT: fmr f26, f8 +; BE-NEXT: fmr f25, f7 +; BE-NEXT: fmr f24, f6 +; BE-NEXT: fmr f23, f5 +; BE-NEXT: fmr f22, f4 +; BE-NEXT: fmr f21, f3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f20 +; BE-NEXT: mr r30, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f22 +; BE-NEXT: mr r29, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f21 +; BE-NEXT: mr r28, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f24 +; BE-NEXT: mr r27, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f23 +; BE-NEXT: mr r26, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f26 +; BE-NEXT: mr r25, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f25 +; BE-NEXT: mr r24, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f28 +; BE-NEXT: mr r23, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f27 +; BE-NEXT: mr r22, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: mr r21, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: mr r20, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 652(r1) +; BE-NEXT: mr r19, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: mr r18, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 668(r1) +; BE-NEXT: mr r17, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 660(r1) +; BE-NEXT: mr r16, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r16, 48 +; BE-NEXT: fmr f31, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r17, 48 +; BE-NEXT: fmr f30, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r18, 48 +; BE-NEXT: fmr f29, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r19, 48 +; BE-NEXT: fmr f28, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r20, 48 +; BE-NEXT: fmr f27, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r21, 48 +; BE-NEXT: fmr f26, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r22, 48 +; BE-NEXT: fmr f25, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r23, 48 +; BE-NEXT: fmr f24, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r24, 48 +; BE-NEXT: fmr f23, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r25, 48 +; BE-NEXT: fmr f22, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r26, 48 +; BE-NEXT: fmr f21, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r27, 48 +; BE-NEXT: fmr f20, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r28, 48 +; BE-NEXT: fmr f19, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r29, 48 +; BE-NEXT: fmr f18, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r30, 48 +; BE-NEXT: fmr f17, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f17 +; BE-NEXT: std r3, 120(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f18 +; BE-NEXT: std r3, 112(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f19 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f20 +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f21 +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f22 +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f23 +; BE-NEXT: std r3, 168(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f24 +; BE-NEXT: std r3, 160(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f25 +; BE-NEXT: std r3, 184(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f26 +; BE-NEXT: std r3, 176(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f27 +; BE-NEXT: std r3, 200(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f28 +; BE-NEXT: std r3, 192(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: std r3, 216(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: std r3, 208(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: std r3, 232(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 224(r1) +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: ld r30, 360(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f31, 488(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f30, 480(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f29, 472(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lfd f28, 464(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f27, 456(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f26, 448(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 352(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 344(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lfd f25, 440(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f24, 432(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f23, 424(r1) # 8-byte Folded Reload +; BE-NEXT: ld r27, 336(r1) # 8-byte Folded Reload +; BE-NEXT: ld r26, 328(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: addi r3, r1, 160 +; BE-NEXT: lfd f22, 416(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f21, 408(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f20, 400(r1) # 8-byte Folded Reload +; BE-NEXT: ld r25, 320(r1) # 8-byte Folded Reload +; BE-NEXT: ld r24, 312(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: addi r3, r1, 176 +; BE-NEXT: lfd f19, 392(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f18, 384(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f17, 376(r1) # 8-byte Folded Reload +; BE-NEXT: ld r23, 304(r1) # 8-byte Folded Reload +; BE-NEXT: ld r22, 296(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v6, 0, r3 +; BE-NEXT: addi r3, r1, 192 +; BE-NEXT: ld r21, 288(r1) # 8-byte Folded Reload +; BE-NEXT: ld r20, 280(r1) # 8-byte Folded Reload +; BE-NEXT: ld r19, 272(r1) # 8-byte Folded Reload +; BE-NEXT: ld r18, 264(r1) # 8-byte Folded Reload +; BE-NEXT: ld r17, 256(r1) # 8-byte Folded Reload +; BE-NEXT: ld r16, 248(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v7, 0, r3 +; BE-NEXT: addi r3, r1, 208 +; BE-NEXT: lxvd2x v8, 0, r3 +; BE-NEXT: addi r3, r1, 224 +; BE-NEXT: lxvd2x v9, 0, r3 +; BE-NEXT: addi r1, r1, 496 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v16i64_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -432(r1) +; CHECK-NEXT: std r0, 448(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 432 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r16, -248 +; CHECK-NEXT: .cfi_offset r17, -240 +; CHECK-NEXT: .cfi_offset r18, -232 +; CHECK-NEXT: .cfi_offset r19, -224 +; CHECK-NEXT: .cfi_offset r20, -216 +; CHECK-NEXT: .cfi_offset r21, -208 +; CHECK-NEXT: .cfi_offset r22, -200 +; CHECK-NEXT: .cfi_offset r23, -192 +; CHECK-NEXT: .cfi_offset r24, -184 +; CHECK-NEXT: .cfi_offset r25, -176 +; CHECK-NEXT: .cfi_offset r26, -168 +; CHECK-NEXT: .cfi_offset r27, -160 +; CHECK-NEXT: .cfi_offset r28, -152 +; CHECK-NEXT: .cfi_offset r29, -144 +; CHECK-NEXT: .cfi_offset r30, -136 +; CHECK-NEXT: .cfi_offset f17, -120 +; CHECK-NEXT: .cfi_offset f18, -112 +; CHECK-NEXT: .cfi_offset f19, -104 +; CHECK-NEXT: .cfi_offset f20, -96 +; CHECK-NEXT: .cfi_offset f21, -88 +; CHECK-NEXT: .cfi_offset f22, -80 +; CHECK-NEXT: .cfi_offset f23, -72 +; CHECK-NEXT: .cfi_offset f24, -64 +; CHECK-NEXT: .cfi_offset f25, -56 +; CHECK-NEXT: .cfi_offset f26, -48 +; CHECK-NEXT: .cfi_offset f27, -40 +; CHECK-NEXT: .cfi_offset f28, -32 +; CHECK-NEXT: .cfi_offset f29, -24 +; CHECK-NEXT: .cfi_offset f30, -16 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: .cfi_offset v24, -384 +; CHECK-NEXT: .cfi_offset v25, -368 +; CHECK-NEXT: .cfi_offset v26, -352 +; CHECK-NEXT: .cfi_offset v27, -336 +; CHECK-NEXT: .cfi_offset v28, -320 +; CHECK-NEXT: .cfi_offset v29, -304 +; CHECK-NEXT: .cfi_offset v30, -288 +; CHECK-NEXT: .cfi_offset v31, -272 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r16, 184(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r17, 192(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r18, 200(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r19, 208(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r20, 216(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r21, 224(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill +; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: std r23, 240(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r24, 248(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r25, 256(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r26, 264(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r27, 272(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill +; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: std r29, 288(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, 296(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f17, 312(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f18, 320(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f19, 328(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f20, 336(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f20, f2 +; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: stfd f21, 344(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f21, f3 +; CHECK-NEXT: stfd f22, 352(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f22, f4 +; CHECK-NEXT: stfd f23, 360(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f23, f5 +; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: stfd f24, 368(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f24, f6 +; CHECK-NEXT: stfd f25, 376(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f25, f7 +; CHECK-NEXT: stfd f26, 384(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f26, f8 +; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: stfd f27, 392(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f27, f9 +; CHECK-NEXT: stfd f28, 400(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f28, f10 +; CHECK-NEXT: stfd f29, 408(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f29, f11 +; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: stfd f30, 416(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f30, f12 +; CHECK-NEXT: stfd f31, 424(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f31, f13 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f20 +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f21 +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f22 +; CHECK-NEXT: mr r28, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f23 +; CHECK-NEXT: mr r27, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f24 +; CHECK-NEXT: mr r26, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f25 +; CHECK-NEXT: mr r25, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f26 +; CHECK-NEXT: mr r24, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f27 +; CHECK-NEXT: mr r23, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f28 +; CHECK-NEXT: mr r22, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: mr r21, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: mr r20, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mr r19, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 568(r1) +; CHECK-NEXT: mr r18, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 576(r1) +; CHECK-NEXT: mr r17, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 584(r1) +; CHECK-NEXT: mr r16, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r16, 48 +; CHECK-NEXT: fmr f31, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r17, 48 +; CHECK-NEXT: fmr f30, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r18, 48 +; CHECK-NEXT: fmr f29, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r19, 48 +; CHECK-NEXT: fmr f28, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r20, 48 +; CHECK-NEXT: fmr f27, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r21, 48 +; CHECK-NEXT: fmr f26, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r22, 48 +; CHECK-NEXT: fmr f25, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r23, 48 +; CHECK-NEXT: fmr f24, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r24, 48 +; CHECK-NEXT: fmr f23, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r25, 48 +; CHECK-NEXT: fmr f22, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r26, 48 +; CHECK-NEXT: fmr f21, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r27, 48 +; CHECK-NEXT: fmr f20, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r28, 48 +; CHECK-NEXT: fmr f19, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r29, 48 +; CHECK-NEXT: fmr f18, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r30, 48 +; CHECK-NEXT: fmr f17, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f17 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f18 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f19 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f20 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v30, vs0, v30 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f21 +; CHECK-NEXT: mtvsrd v29, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f22 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v29, vs0, v29 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f23 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f24 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v28, vs0, v28 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f25 +; CHECK-NEXT: mtvsrd v27, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f26 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v27, vs0, v27 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f27 +; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f28 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v26, vs0, v26 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: mtvsrd v25, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v25, vs0, v25 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mtvsrd v24, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: lfd f31, 424(r1) # 8-byte Folded Reload +; CHECK-NEXT: vmr v3, v30 +; CHECK-NEXT: vmr v4, v29 +; CHECK-NEXT: lfd f30, 416(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f29, 408(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: vmr v5, v28 +; CHECK-NEXT: vmr v6, v27 +; CHECK-NEXT: vmr v7, v26 +; CHECK-NEXT: vmr v8, v25 +; CHECK-NEXT: lfd f28, 400(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f27, 392(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: lfd f26, 384(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f25, 376(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxmrghd v9, vs0, v24 +; CHECK-NEXT: lfd f24, 368(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f23, 360(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f22, 352(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: lfd f21, 344(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 296(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f20, 336(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f19, 328(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 288(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, 280(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: lfd f18, 320(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r27, 272(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f17, 312(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, 264(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, 256(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r24, 248(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: ld r23, 240(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r22, 232(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r21, 224(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r20, 216(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r19, 208(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r18, 200(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: ld r17, 192(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r16, 184(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 432 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v16i64_v16f16: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: .cfi_def_cfa_offset 160 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset f16, -128 +; FAST-NEXT: .cfi_offset f17, -120 +; FAST-NEXT: .cfi_offset f18, -112 +; FAST-NEXT: .cfi_offset f19, -104 +; FAST-NEXT: .cfi_offset f20, -96 +; FAST-NEXT: .cfi_offset f21, -88 +; FAST-NEXT: .cfi_offset f22, -80 +; FAST-NEXT: .cfi_offset f23, -72 +; FAST-NEXT: .cfi_offset f24, -64 +; FAST-NEXT: .cfi_offset f25, -56 +; FAST-NEXT: .cfi_offset f26, -48 +; FAST-NEXT: .cfi_offset f27, -40 +; FAST-NEXT: .cfi_offset f28, -32 +; FAST-NEXT: .cfi_offset f29, -24 +; FAST-NEXT: .cfi_offset f30, -16 +; FAST-NEXT: .cfi_offset f31, -8 +; FAST-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f19, -104(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f20, -96(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f21, -88(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f22, -80(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f23, -72(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; FAST-NEXT: stdu r1, -160(r1) +; FAST-NEXT: fmr f26, f1 +; FAST-NEXT: lfs f1, 312(r1) +; FAST-NEXT: std r0, 176(r1) +; FAST-NEXT: fmr f28, f13 +; FAST-NEXT: fmr f27, f12 +; FAST-NEXT: fmr f24, f11 +; FAST-NEXT: fmr f21, f10 +; FAST-NEXT: fmr f19, f9 +; FAST-NEXT: fmr f18, f8 +; FAST-NEXT: fmr f17, f7 +; FAST-NEXT: fmr f16, f6 +; FAST-NEXT: fmr f20, f5 +; FAST-NEXT: fmr f22, f4 +; FAST-NEXT: fmr f23, f3 +; FAST-NEXT: fmr f25, f2 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f31, f1 +; FAST-NEXT: lfs f1, 304(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f30, f1 +; FAST-NEXT: lfs f1, 296(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f29, f1 +; FAST-NEXT: fmr f1, f28 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f28, f1 +; FAST-NEXT: fmr f1, f27 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f27, f1 +; FAST-NEXT: fmr f1, f24 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f24, f1 +; FAST-NEXT: fmr f1, f21 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f21, f1 +; FAST-NEXT: fmr f1, f19 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f19, f1 +; FAST-NEXT: fmr f1, f18 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f18, f1 +; FAST-NEXT: fmr f1, f17 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f17, f1 +; FAST-NEXT: fmr f1, f16 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f16, f1 +; FAST-NEXT: fmr f1, f20 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f20, f1 +; FAST-NEXT: fmr f1, f22 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f22, f1 +; FAST-NEXT: fmr f1, f23 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f23, f1 +; FAST-NEXT: fmr f1, f25 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f25, f1 +; FAST-NEXT: fmr f1, f26 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fctid f0, f25 +; FAST-NEXT: fctid f2, f23 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: fctid f3, f22 +; FAST-NEXT: fctid f4, f20 +; FAST-NEXT: fctid f5, f16 +; FAST-NEXT: fctid f6, f17 +; FAST-NEXT: fctid f7, f18 +; FAST-NEXT: fctid f8, f19 +; FAST-NEXT: fctid f9, f21 +; FAST-NEXT: fctid f10, f24 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f2 +; FAST-NEXT: mtfprd f2, r3 +; FAST-NEXT: mffprd r3, f3 +; FAST-NEXT: mtfprd f3, r3 +; FAST-NEXT: mffprd r3, f4 +; FAST-NEXT: mtfprd f4, r3 +; FAST-NEXT: mffprd r3, f5 +; FAST-NEXT: mtfprd f5, r3 +; FAST-NEXT: mffprd r3, f6 +; FAST-NEXT: mtfprd f6, r3 +; FAST-NEXT: mffprd r3, f7 +; FAST-NEXT: mtfprd f7, r3 +; FAST-NEXT: mffprd r3, f8 +; FAST-NEXT: mtfprd f8, r3 +; FAST-NEXT: mffprd r3, f9 +; FAST-NEXT: mtfprd f9, r3 +; FAST-NEXT: mffprd r3, f10 +; FAST-NEXT: mtfprd f10, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v3, vs3, vs2 +; FAST-NEXT: xxmrghd v4, vs5, vs4 +; FAST-NEXT: xxmrghd v5, vs7, vs6 +; FAST-NEXT: xxmrghd v6, vs9, vs8 +; FAST-NEXT: xxmrghd v2, vs0, vs1 +; FAST-NEXT: fctid f0, f27 +; FAST-NEXT: fctid f1, f29 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v7, vs0, vs10 +; FAST-NEXT: fctid f0, f28 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v8, vs1, vs0 +; FAST-NEXT: fctid f0, f30 +; FAST-NEXT: fctid f1, f31 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v9, vs1, vs0 +; FAST-NEXT: addi r1, r1, 160 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload +; FAST-NEXT: mtlr r0 +; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f23, -72(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f22, -80(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f21, -88(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f20, -96(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f19, -104(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f18, -112(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f17, -120(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f16, -128(r1) # 8-byte Folded Reload +; FAST-NEXT: blr + %a = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half>) + +define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { +; BE-LABEL: lrint_v32i64_v32f16: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -864(r1) +; BE-NEXT: std r0, 880(r1) +; BE-NEXT: .cfi_def_cfa_offset 864 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset r14, -288 +; BE-NEXT: .cfi_offset r15, -280 +; BE-NEXT: .cfi_offset r16, -272 +; BE-NEXT: .cfi_offset r17, -264 +; BE-NEXT: .cfi_offset r18, -256 +; BE-NEXT: .cfi_offset r19, -248 +; BE-NEXT: .cfi_offset r20, -240 +; BE-NEXT: .cfi_offset r21, -232 +; BE-NEXT: .cfi_offset r22, -224 +; BE-NEXT: .cfi_offset r23, -216 +; BE-NEXT: .cfi_offset r24, -208 +; BE-NEXT: .cfi_offset r25, -200 +; BE-NEXT: .cfi_offset r26, -192 +; BE-NEXT: .cfi_offset r27, -184 +; BE-NEXT: .cfi_offset r28, -176 +; BE-NEXT: .cfi_offset r29, -168 +; BE-NEXT: .cfi_offset r30, -160 +; BE-NEXT: .cfi_offset r31, -152 +; BE-NEXT: .cfi_offset f14, -144 +; BE-NEXT: .cfi_offset f15, -136 +; BE-NEXT: .cfi_offset f16, -128 +; BE-NEXT: .cfi_offset f17, -120 +; BE-NEXT: .cfi_offset f18, -112 +; BE-NEXT: .cfi_offset f19, -104 +; BE-NEXT: .cfi_offset f20, -96 +; BE-NEXT: .cfi_offset f21, -88 +; BE-NEXT: .cfi_offset f22, -80 +; BE-NEXT: .cfi_offset f23, -72 +; BE-NEXT: .cfi_offset f24, -64 +; BE-NEXT: .cfi_offset f25, -56 +; BE-NEXT: .cfi_offset f26, -48 +; BE-NEXT: .cfi_offset f27, -40 +; BE-NEXT: .cfi_offset f28, -32 +; BE-NEXT: .cfi_offset f29, -24 +; BE-NEXT: .cfi_offset f30, -16 +; BE-NEXT: .cfi_offset f31, -8 +; BE-NEXT: stfd f20, 768(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f20, f1 +; BE-NEXT: fmr f1, f2 +; BE-NEXT: std r14, 576(r1) # 8-byte Folded Spill +; BE-NEXT: std r15, 584(r1) # 8-byte Folded Spill +; BE-NEXT: std r16, 592(r1) # 8-byte Folded Spill +; BE-NEXT: std r17, 600(r1) # 8-byte Folded Spill +; BE-NEXT: std r18, 608(r1) # 8-byte Folded Spill +; BE-NEXT: std r19, 616(r1) # 8-byte Folded Spill +; BE-NEXT: std r20, 624(r1) # 8-byte Folded Spill +; BE-NEXT: std r21, 632(r1) # 8-byte Folded Spill +; BE-NEXT: std r22, 640(r1) # 8-byte Folded Spill +; BE-NEXT: std r23, 648(r1) # 8-byte Folded Spill +; BE-NEXT: std r24, 656(r1) # 8-byte Folded Spill +; BE-NEXT: std r25, 664(r1) # 8-byte Folded Spill +; BE-NEXT: std r26, 672(r1) # 8-byte Folded Spill +; BE-NEXT: std r27, 680(r1) # 8-byte Folded Spill +; BE-NEXT: std r28, 688(r1) # 8-byte Folded Spill +; BE-NEXT: std r29, 696(r1) # 8-byte Folded Spill +; BE-NEXT: std r30, 704(r1) # 8-byte Folded Spill +; BE-NEXT: std r31, 712(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f14, 720(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f15, 728(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f16, 736(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f17, 744(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f18, 752(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f19, 760(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f21, 776(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f22, 784(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f23, 792(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f24, 800(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f25, 808(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f26, 816(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f27, 824(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f28, 832(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f29, 840(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f30, 848(r1) # 8-byte Folded Spill +; BE-NEXT: stfd f31, 856(r1) # 8-byte Folded Spill +; BE-NEXT: fmr f31, f13 +; BE-NEXT: fmr f29, f12 +; BE-NEXT: fmr f30, f11 +; BE-NEXT: fmr f28, f10 +; BE-NEXT: fmr f27, f9 +; BE-NEXT: fmr f26, f8 +; BE-NEXT: fmr f25, f7 +; BE-NEXT: fmr f24, f6 +; BE-NEXT: fmr f23, f5 +; BE-NEXT: fmr f22, f4 +; BE-NEXT: fmr f21, f3 +; BE-NEXT: mr r30, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f20 +; BE-NEXT: std r3, 304(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f22 +; BE-NEXT: std r3, 296(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f21 +; BE-NEXT: std r3, 280(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f24 +; BE-NEXT: std r3, 264(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f23 +; BE-NEXT: std r3, 248(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f26 +; BE-NEXT: std r3, 232(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f25 +; BE-NEXT: std r3, 216(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f28 +; BE-NEXT: std r3, 200(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f27 +; BE-NEXT: std r3, 184(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: std r3, 168(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: std r3, 152(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1028(r1) +; BE-NEXT: std r3, 136(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: std r3, 120(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1044(r1) +; BE-NEXT: std r3, 112(r1) # 8-byte Folded Spill +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1036(r1) +; BE-NEXT: mr r15, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1060(r1) +; BE-NEXT: mr r14, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1052(r1) +; BE-NEXT: mr r31, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1076(r1) +; BE-NEXT: mr r29, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1068(r1) +; BE-NEXT: mr r28, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1092(r1) +; BE-NEXT: mr r27, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1084(r1) +; BE-NEXT: mr r26, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1108(r1) +; BE-NEXT: mr r25, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1100(r1) +; BE-NEXT: mr r24, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1124(r1) +; BE-NEXT: mr r23, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1116(r1) +; BE-NEXT: mr r22, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1140(r1) +; BE-NEXT: mr r21, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1132(r1) +; BE-NEXT: mr r20, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1156(r1) +; BE-NEXT: mr r19, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1148(r1) +; BE-NEXT: mr r18, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1172(r1) +; BE-NEXT: mr r17, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: lfs f1, 1164(r1) +; BE-NEXT: mr r16, r3 +; BE-NEXT: bl __gnu_f2h_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r16, 48 +; BE-NEXT: stfs f1, 316(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r17, 48 +; BE-NEXT: stfs f1, 312(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r18, 48 +; BE-NEXT: stfs f1, 292(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r19, 48 +; BE-NEXT: stfs f1, 276(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r20, 48 +; BE-NEXT: stfs f1, 260(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r21, 48 +; BE-NEXT: stfs f1, 244(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r22, 48 +; BE-NEXT: stfs f1, 228(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r23, 48 +; BE-NEXT: stfs f1, 212(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r24, 48 +; BE-NEXT: stfs f1, 196(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r25, 48 +; BE-NEXT: stfs f1, 180(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r26, 48 +; BE-NEXT: stfs f1, 164(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r27, 48 +; BE-NEXT: stfs f1, 148(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r28, 48 +; BE-NEXT: stfs f1, 132(r1) # 4-byte Folded Spill +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r29, 48 +; BE-NEXT: fmr f18, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r31, 48 +; BE-NEXT: fmr f17, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r14, 48 +; BE-NEXT: fmr f16, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: clrldi r3, r15, 48 +; BE-NEXT: fmr f15, f1 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 112(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f14, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 120(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f31, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 136(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f30, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 152(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f29, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 168(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f28, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 184(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f27, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 200(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f26, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 216(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f25, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 232(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f24, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 248(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f23, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 264(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f22, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 280(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f21, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 296(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f20, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: ld r3, 304(r1) # 8-byte Folded Reload +; BE-NEXT: fmr f19, f1 +; BE-NEXT: clrldi r3, r3, 48 +; BE-NEXT: bl __gnu_h2f_ieee +; BE-NEXT: nop +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f19 +; BE-NEXT: std r3, 328(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f20 +; BE-NEXT: std r3, 320(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f21 +; BE-NEXT: std r3, 344(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f22 +; BE-NEXT: std r3, 336(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f23 +; BE-NEXT: std r3, 360(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f24 +; BE-NEXT: std r3, 352(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f25 +; BE-NEXT: std r3, 376(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f26 +; BE-NEXT: std r3, 368(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f27 +; BE-NEXT: std r3, 392(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f28 +; BE-NEXT: std r3, 384(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f29 +; BE-NEXT: std r3, 408(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f30 +; BE-NEXT: std r3, 400(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f31 +; BE-NEXT: std r3, 424(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f14 +; BE-NEXT: std r3, 416(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f15 +; BE-NEXT: std r3, 440(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f16 +; BE-NEXT: std r3, 432(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f17 +; BE-NEXT: std r3, 456(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: fmr f1, f18 +; BE-NEXT: std r3, 448(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 132(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 472(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 148(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 464(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 164(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 488(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 180(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 480(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 196(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 504(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 212(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 496(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 228(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 520(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 244(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 512(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 260(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 536(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 276(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 528(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 292(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 552(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 312(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 544(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 316(r1) # 4-byte Folded Reload +; BE-NEXT: std r3, 568(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 560(r1) +; BE-NEXT: addi r3, r1, 320 +; BE-NEXT: lxvd2x vs0, 0, r3 +; BE-NEXT: addi r3, r1, 336 +; BE-NEXT: lxvd2x vs1, 0, r3 +; BE-NEXT: addi r3, r1, 352 +; BE-NEXT: lxvd2x vs2, 0, r3 +; BE-NEXT: addi r3, r1, 368 +; BE-NEXT: lxvd2x vs3, 0, r3 +; BE-NEXT: addi r3, r1, 384 +; BE-NEXT: lxvd2x vs4, 0, r3 +; BE-NEXT: addi r3, r1, 400 +; BE-NEXT: lxvd2x vs5, 0, r3 +; BE-NEXT: addi r3, r1, 416 +; BE-NEXT: lxvd2x vs6, 0, r3 +; BE-NEXT: addi r3, r1, 432 +; BE-NEXT: lxvd2x vs7, 0, r3 +; BE-NEXT: addi r3, r1, 448 +; BE-NEXT: lxvd2x vs8, 0, r3 +; BE-NEXT: addi r3, r1, 464 +; BE-NEXT: lxvd2x vs9, 0, r3 +; BE-NEXT: addi r3, r1, 480 +; BE-NEXT: lxvd2x vs10, 0, r3 +; BE-NEXT: addi r3, r1, 496 +; BE-NEXT: lxvd2x vs11, 0, r3 +; BE-NEXT: addi r3, r1, 512 +; BE-NEXT: lxvd2x vs12, 0, r3 +; BE-NEXT: addi r3, r1, 528 +; BE-NEXT: lxvd2x vs13, 0, r3 +; BE-NEXT: addi r3, r1, 544 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 560 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: li r3, 240 +; BE-NEXT: stxvd2x v3, r30, r3 +; BE-NEXT: li r3, 224 +; BE-NEXT: stxvd2x v2, r30, r3 +; BE-NEXT: li r3, 208 +; BE-NEXT: stxvd2x vs13, r30, r3 +; BE-NEXT: li r3, 192 +; BE-NEXT: stxvd2x vs12, r30, r3 +; BE-NEXT: li r3, 176 +; BE-NEXT: stxvd2x vs11, r30, r3 +; BE-NEXT: li r3, 160 +; BE-NEXT: stxvd2x vs10, r30, r3 +; BE-NEXT: li r3, 144 +; BE-NEXT: stxvd2x vs9, r30, r3 +; BE-NEXT: li r3, 128 +; BE-NEXT: stxvd2x vs8, r30, r3 +; BE-NEXT: li r3, 112 +; BE-NEXT: stxvd2x vs7, r30, r3 +; BE-NEXT: li r3, 96 +; BE-NEXT: stxvd2x vs6, r30, r3 +; BE-NEXT: li r3, 80 +; BE-NEXT: stxvd2x vs5, r30, r3 +; BE-NEXT: li r3, 64 +; BE-NEXT: stxvd2x vs4, r30, r3 +; BE-NEXT: li r3, 48 +; BE-NEXT: stxvd2x vs3, r30, r3 +; BE-NEXT: li r3, 32 +; BE-NEXT: stxvd2x vs2, r30, r3 +; BE-NEXT: li r3, 16 +; BE-NEXT: stxvd2x vs1, r30, r3 +; BE-NEXT: stxvd2x vs0, 0, r30 +; BE-NEXT: lfd f31, 856(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f30, 848(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f29, 840(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f28, 832(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f27, 824(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f26, 816(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f25, 808(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f24, 800(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f23, 792(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f22, 784(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f21, 776(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f20, 768(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f19, 760(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f18, 752(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f17, 744(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f16, 736(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f15, 728(r1) # 8-byte Folded Reload +; BE-NEXT: lfd f14, 720(r1) # 8-byte Folded Reload +; BE-NEXT: ld r31, 712(r1) # 8-byte Folded Reload +; BE-NEXT: ld r30, 704(r1) # 8-byte Folded Reload +; BE-NEXT: ld r29, 696(r1) # 8-byte Folded Reload +; BE-NEXT: ld r28, 688(r1) # 8-byte Folded Reload +; BE-NEXT: ld r27, 680(r1) # 8-byte Folded Reload +; BE-NEXT: ld r26, 672(r1) # 8-byte Folded Reload +; BE-NEXT: ld r25, 664(r1) # 8-byte Folded Reload +; BE-NEXT: ld r24, 656(r1) # 8-byte Folded Reload +; BE-NEXT: ld r23, 648(r1) # 8-byte Folded Reload +; BE-NEXT: ld r22, 640(r1) # 8-byte Folded Reload +; BE-NEXT: ld r21, 632(r1) # 8-byte Folded Reload +; BE-NEXT: ld r20, 624(r1) # 8-byte Folded Reload +; BE-NEXT: ld r19, 616(r1) # 8-byte Folded Reload +; BE-NEXT: ld r18, 608(r1) # 8-byte Folded Reload +; BE-NEXT: ld r17, 600(r1) # 8-byte Folded Reload +; BE-NEXT: ld r16, 592(r1) # 8-byte Folded Reload +; BE-NEXT: ld r15, 584(r1) # 8-byte Folded Reload +; BE-NEXT: ld r14, 576(r1) # 8-byte Folded Reload +; BE-NEXT: addi r1, r1, 864 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v32i64_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -688(r1) +; CHECK-NEXT: std r0, 704(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 688 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r14, -288 +; CHECK-NEXT: .cfi_offset r15, -280 +; CHECK-NEXT: .cfi_offset r16, -272 +; CHECK-NEXT: .cfi_offset r17, -264 +; CHECK-NEXT: .cfi_offset r18, -256 +; CHECK-NEXT: .cfi_offset r19, -248 +; CHECK-NEXT: .cfi_offset r20, -240 +; CHECK-NEXT: .cfi_offset r21, -232 +; CHECK-NEXT: .cfi_offset r22, -224 +; CHECK-NEXT: .cfi_offset r23, -216 +; CHECK-NEXT: .cfi_offset r24, -208 +; CHECK-NEXT: .cfi_offset r25, -200 +; CHECK-NEXT: .cfi_offset r26, -192 +; CHECK-NEXT: .cfi_offset r27, -184 +; CHECK-NEXT: .cfi_offset r28, -176 +; CHECK-NEXT: .cfi_offset r29, -168 +; CHECK-NEXT: .cfi_offset r30, -160 +; CHECK-NEXT: .cfi_offset r31, -152 +; CHECK-NEXT: .cfi_offset f14, -144 +; CHECK-NEXT: .cfi_offset f15, -136 +; CHECK-NEXT: .cfi_offset f16, -128 +; CHECK-NEXT: .cfi_offset f17, -120 +; CHECK-NEXT: .cfi_offset f18, -112 +; CHECK-NEXT: .cfi_offset f19, -104 +; CHECK-NEXT: .cfi_offset f20, -96 +; CHECK-NEXT: .cfi_offset f21, -88 +; CHECK-NEXT: .cfi_offset f22, -80 +; CHECK-NEXT: .cfi_offset f23, -72 +; CHECK-NEXT: .cfi_offset f24, -64 +; CHECK-NEXT: .cfi_offset f25, -56 +; CHECK-NEXT: .cfi_offset f26, -48 +; CHECK-NEXT: .cfi_offset f27, -40 +; CHECK-NEXT: .cfi_offset f28, -32 +; CHECK-NEXT: .cfi_offset f29, -24 +; CHECK-NEXT: .cfi_offset f30, -16 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: .cfi_offset v20, -480 +; CHECK-NEXT: .cfi_offset v21, -464 +; CHECK-NEXT: .cfi_offset v22, -448 +; CHECK-NEXT: .cfi_offset v23, -432 +; CHECK-NEXT: .cfi_offset v24, -416 +; CHECK-NEXT: .cfi_offset v25, -400 +; CHECK-NEXT: .cfi_offset v26, -384 +; CHECK-NEXT: .cfi_offset v27, -368 +; CHECK-NEXT: .cfi_offset v28, -352 +; CHECK-NEXT: .cfi_offset v29, -336 +; CHECK-NEXT: .cfi_offset v30, -320 +; CHECK-NEXT: .cfi_offset v31, -304 +; CHECK-NEXT: li r4, 208 +; CHECK-NEXT: std r14, 400(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r15, 408(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r16, 416(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r17, 424(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r18, 432(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r19, 440(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill +; CHECK-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 224 +; CHECK-NEXT: std r21, 456(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r22, 464(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r23, 472(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r24, 480(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r25, 488(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill +; CHECK-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 240 +; CHECK-NEXT: std r27, 504(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, 512(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, 520(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, 528(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill +; CHECK-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 256 +; CHECK-NEXT: stfd f14, 544(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f15, 552(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f16, 560(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f17, 568(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f18, 576(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill +; CHECK-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 272 +; CHECK-NEXT: stfd f20, 592(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f20, f2 +; CHECK-NEXT: stfd f21, 600(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f21, f3 +; CHECK-NEXT: stfd f22, 608(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f22, f4 +; CHECK-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 288 +; CHECK-NEXT: stfd f23, 616(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f23, f5 +; CHECK-NEXT: stfd f24, 624(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f24, f6 +; CHECK-NEXT: stfd f25, 632(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f25, f7 +; CHECK-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 304 +; CHECK-NEXT: stfd f26, 640(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f26, f8 +; CHECK-NEXT: stfd f27, 648(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f27, f9 +; CHECK-NEXT: stfd f28, 656(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f28, f10 +; CHECK-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 320 +; CHECK-NEXT: stfd f29, 664(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f29, f11 +; CHECK-NEXT: stfd f30, 672(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f30, f12 +; CHECK-NEXT: stfd f31, 680(r1) # 8-byte Folded Spill +; CHECK-NEXT: fmr f31, f13 +; CHECK-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 336 +; CHECK-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 352 +; CHECK-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 368 +; CHECK-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 384 +; CHECK-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f20 +; CHECK-NEXT: std r3, 176(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f21 +; CHECK-NEXT: std r3, 160(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f22 +; CHECK-NEXT: std r3, 144(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f23 +; CHECK-NEXT: std r3, 128(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f24 +; CHECK-NEXT: std r3, 120(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f25 +; CHECK-NEXT: std r3, 112(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f26 +; CHECK-NEXT: std r3, 104(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f27 +; CHECK-NEXT: std r3, 96(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f28 +; CHECK-NEXT: std r3, 88(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: std r3, 80(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: std r3, 72(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: std r3, 64(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 832(r1) +; CHECK-NEXT: std r3, 56(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 840(r1) +; CHECK-NEXT: std r3, 48(r1) # 8-byte Folded Spill +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 848(r1) +; CHECK-NEXT: mr r15, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 856(r1) +; CHECK-NEXT: mr r14, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 864(r1) +; CHECK-NEXT: mr r31, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 872(r1) +; CHECK-NEXT: mr r29, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 880(r1) +; CHECK-NEXT: mr r28, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 888(r1) +; CHECK-NEXT: mr r27, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 896(r1) +; CHECK-NEXT: mr r26, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 904(r1) +; CHECK-NEXT: mr r25, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 912(r1) +; CHECK-NEXT: mr r24, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 920(r1) +; CHECK-NEXT: mr r23, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 928(r1) +; CHECK-NEXT: mr r22, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 936(r1) +; CHECK-NEXT: mr r21, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 944(r1) +; CHECK-NEXT: mr r20, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 952(r1) +; CHECK-NEXT: mr r19, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 960(r1) +; CHECK-NEXT: mr r18, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 968(r1) +; CHECK-NEXT: mr r17, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: lfs f1, 976(r1) +; CHECK-NEXT: mr r16, r3 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: li r3, 204 +; CHECK-NEXT: stxsspx f1, r1, r3 # 4-byte Folded Spill +; CHECK-NEXT: clrldi r3, r16, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: li r3, 200 +; CHECK-NEXT: stxsspx f1, r1, r3 # 4-byte Folded Spill +; CHECK-NEXT: clrldi r3, r17, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r18, 48 +; CHECK-NEXT: fmr f29, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r19, 48 +; CHECK-NEXT: fmr f28, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r20, 48 +; CHECK-NEXT: fmr f27, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r21, 48 +; CHECK-NEXT: fmr f26, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r22, 48 +; CHECK-NEXT: fmr f25, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r23, 48 +; CHECK-NEXT: fmr f24, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r24, 48 +; CHECK-NEXT: fmr f23, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r25, 48 +; CHECK-NEXT: fmr f22, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r26, 48 +; CHECK-NEXT: fmr f21, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r27, 48 +; CHECK-NEXT: fmr f20, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r28, 48 +; CHECK-NEXT: fmr f19, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r29, 48 +; CHECK-NEXT: fmr f18, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r31, 48 +; CHECK-NEXT: fmr f17, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r14, 48 +; CHECK-NEXT: fmr f16, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r15, 48 +; CHECK-NEXT: fmr f15, f1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 48(r1) # 8-byte Folded Reload +; CHECK-NEXT: fmr f14, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 56(r1) # 8-byte Folded Reload +; CHECK-NEXT: fmr f30, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 64(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v30, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 72(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v29, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 80(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v28, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 88(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v27, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 96(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v26, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 104(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v25, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 112(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v24, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 120(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v23, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 128(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v22, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 144(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v21, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 160(r1) # 8-byte Folded Reload +; CHECK-NEXT: xxlor v20, f1, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 176(r1) # 8-byte Folded Reload +; CHECK-NEXT: fmr f31, f1 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 176 +; CHECK-NEXT: xxlor f1, v20, v20 +; CHECK-NEXT: xxmrghd vs0, vs0, v31 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxlor f1, v21, v21 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: xxlor f1, v22, v22 +; CHECK-NEXT: xxmrghd vs0, vs0, v31 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxlor f1, v23, v23 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: xxlor f1, v24, v24 +; CHECK-NEXT: xxmrghd vs0, vs0, v31 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxlor f1, v25, v25 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: xxlor f1, v26, v26 +; CHECK-NEXT: xxmrghd vs0, vs0, v31 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxlor f1, v27, v27 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxlor f1, v28, v28 +; CHECK-NEXT: xxmrghd v27, vs0, v31 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxlor f1, v29, v29 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxlor f1, v30, v30 +; CHECK-NEXT: xxmrghd v29, vs0, v31 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f14 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f15 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f16 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v30, vs0, v30 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f17 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f18 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v28, vs0, v28 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f19 +; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f20 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v26, vs0, v26 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f21 +; CHECK-NEXT: mtvsrd v24, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f22 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v24, vs0, v24 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f23 +; CHECK-NEXT: mtvsrd v22, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f24 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v22, vs0, v22 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f25 +; CHECK-NEXT: mtvsrd v20, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f26 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v20, vs0, v20 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f27 +; CHECK-NEXT: mtvsrd v21, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f28 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v21, vs0, v21 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f1, f29 +; CHECK-NEXT: mtvsrd v23, r3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 200 +; CHECK-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload +; CHECK-NEXT: xxmrghd v23, vs0, v23 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v25, r3 +; CHECK-NEXT: li r3, 204 +; CHECK-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 240 +; CHECK-NEXT: xxswapd vs1, v23 +; CHECK-NEXT: li r4, 128 +; CHECK-NEXT: xxswapd vs2, v21 +; CHECK-NEXT: xxswapd vs3, v31 +; CHECK-NEXT: xxmrghd v2, vs0, v25 +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: stxvd2x vs0, r30, r3 +; CHECK-NEXT: li r3, 224 +; CHECK-NEXT: stxvd2x vs1, r30, r3 +; CHECK-NEXT: li r3, 208 +; CHECK-NEXT: stxvd2x vs2, r30, r3 +; CHECK-NEXT: li r3, 192 +; CHECK-NEXT: xxswapd vs0, v20 +; CHECK-NEXT: stxvd2x vs0, r30, r3 +; CHECK-NEXT: li r3, 176 +; CHECK-NEXT: xxswapd vs1, v22 +; CHECK-NEXT: stxvd2x vs1, r30, r3 +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: xxswapd vs2, v28 +; CHECK-NEXT: xxswapd vs0, v24 +; CHECK-NEXT: stxvd2x vs0, r30, r3 +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: xxswapd vs1, v26 +; CHECK-NEXT: stxvd2x vs1, r30, r3 +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: stxvd2x vs2, r30, r3 +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: xxswapd vs0, v30 +; CHECK-NEXT: stxvd2x vs0, r30, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: stxvd2x vs3, r30, r3 +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lxvd2x vs2, r1, r4 # 16-byte Folded Reload +; CHECK-NEXT: li r4, 144 +; CHECK-NEXT: xxswapd vs1, v29 +; CHECK-NEXT: stxvd2x vs1, r30, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lxvd2x vs1, r1, r4 # 16-byte Folded Reload +; CHECK-NEXT: li r4, 160 +; CHECK-NEXT: lxvd2x vs3, r1, r4 # 16-byte Folded Reload +; CHECK-NEXT: li r4, 176 +; CHECK-NEXT: lxvd2x vs4, r1, r4 # 16-byte Folded Reload +; CHECK-NEXT: xxswapd vs0, v27 +; CHECK-NEXT: stxvd2x vs0, r30, r3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxswapd vs2, vs2 +; CHECK-NEXT: stxvd2x vs2, r30, r3 +; CHECK-NEXT: li r3, 32 +; CHECK-NEXT: xxswapd vs1, vs1 +; CHECK-NEXT: stxvd2x vs1, r30, r3 +; CHECK-NEXT: li r3, 16 +; CHECK-NEXT: xxswapd vs3, vs3 +; CHECK-NEXT: stxvd2x vs3, r30, r3 +; CHECK-NEXT: li r3, 384 +; CHECK-NEXT: xxswapd vs4, vs4 +; CHECK-NEXT: stxvd2x vs4, 0, r30 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 368 +; CHECK-NEXT: lfd f31, 680(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f30, 672(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f29, 664(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f28, 656(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f27, 648(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f26, 640(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f25, 632(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f24, 624(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f23, 616(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f22, 608(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f21, 600(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f20, 592(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f19, 584(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f18, 576(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f17, 568(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f16, 560(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 352 +; CHECK-NEXT: lfd f15, 552(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f14, 544(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r31, 536(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 528(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 520(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, 512(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 336 +; CHECK-NEXT: ld r27, 504(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, 496(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, 488(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r24, 480(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r23, 472(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r22, 464(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 320 +; CHECK-NEXT: ld r21, 456(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r20, 448(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r19, 440(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r18, 432(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r17, 424(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r16, 416(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 304 +; CHECK-NEXT: ld r15, 408(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r14, 400(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 288 +; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 272 +; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 256 +; CHECK-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 240 +; CHECK-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 224 +; CHECK-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 208 +; CHECK-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 688 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v32i64_v32f16: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -480(r1) +; FAST-NEXT: std r0, 496(r1) +; FAST-NEXT: .cfi_def_cfa_offset 480 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset r30, -160 +; FAST-NEXT: .cfi_offset f14, -144 +; FAST-NEXT: .cfi_offset f15, -136 +; FAST-NEXT: .cfi_offset f16, -128 +; FAST-NEXT: .cfi_offset f17, -120 +; FAST-NEXT: .cfi_offset f18, -112 +; FAST-NEXT: .cfi_offset f19, -104 +; FAST-NEXT: .cfi_offset f20, -96 +; FAST-NEXT: .cfi_offset f21, -88 +; FAST-NEXT: .cfi_offset f22, -80 +; FAST-NEXT: .cfi_offset f23, -72 +; FAST-NEXT: .cfi_offset f24, -64 +; FAST-NEXT: .cfi_offset f25, -56 +; FAST-NEXT: .cfi_offset f26, -48 +; FAST-NEXT: .cfi_offset f27, -40 +; FAST-NEXT: .cfi_offset f28, -32 +; FAST-NEXT: .cfi_offset f29, -24 +; FAST-NEXT: .cfi_offset f30, -16 +; FAST-NEXT: .cfi_offset f31, -8 +; FAST-NEXT: .cfi_offset v20, -352 +; FAST-NEXT: .cfi_offset v21, -336 +; FAST-NEXT: .cfi_offset v22, -320 +; FAST-NEXT: .cfi_offset v23, -304 +; FAST-NEXT: .cfi_offset v24, -288 +; FAST-NEXT: .cfi_offset v25, -272 +; FAST-NEXT: .cfi_offset v26, -256 +; FAST-NEXT: .cfi_offset v27, -240 +; FAST-NEXT: .cfi_offset v28, -224 +; FAST-NEXT: .cfi_offset v29, -208 +; FAST-NEXT: .cfi_offset v30, -192 +; FAST-NEXT: .cfi_offset v31, -176 +; FAST-NEXT: li r4, 128 +; FAST-NEXT: std r30, 320(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f14, 336(r1) # 8-byte Folded Spill +; FAST-NEXT: fmr f14, f5 +; FAST-NEXT: stfd f15, 344(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f16, 352(r1) # 8-byte Folded Spill +; FAST-NEXT: fmr f16, f4 +; FAST-NEXT: mr r30, r3 +; FAST-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 144 +; FAST-NEXT: stfd f17, 360(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f18, 368(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f19, 376(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f20, 384(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f21, 392(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill +; FAST-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 160 +; FAST-NEXT: stfd f23, 408(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f24, 416(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f25, 424(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f26, 432(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f27, 440(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill +; FAST-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 176 +; FAST-NEXT: xxlor v22, f3, f3 +; FAST-NEXT: stfd f29, 456(r1) # 8-byte Folded Spill +; FAST-NEXT: fmr f29, f9 +; FAST-NEXT: stfd f30, 464(r1) # 8-byte Folded Spill +; FAST-NEXT: stfd f31, 472(r1) # 8-byte Folded Spill +; FAST-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 192 +; FAST-NEXT: xxlor v23, f2, f2 +; FAST-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 208 +; FAST-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 224 +; FAST-NEXT: xxlor v25, f13, f13 +; FAST-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 240 +; FAST-NEXT: xxlor v26, f12, f12 +; FAST-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 256 +; FAST-NEXT: xxlor v27, f11, f11 +; FAST-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 272 +; FAST-NEXT: xxlor v28, f10, f10 +; FAST-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 288 +; FAST-NEXT: xxlor v29, f8, f8 +; FAST-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 304 +; FAST-NEXT: xxlor v30, f7, f7 +; FAST-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 44 +; FAST-NEXT: xxlor v31, f6, f6 +; FAST-NEXT: stxsspx f1, r1, r4 # 4-byte Folded Spill +; FAST-NEXT: lfs f1, 768(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 120 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 760(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 112 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 752(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 104 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 744(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 96 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 736(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 88 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 728(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 80 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 720(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 72 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 712(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 704(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 56 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 696(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 48 +; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill +; FAST-NEXT: lfs f1, 688(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: xxlor v21, f1, f1 +; FAST-NEXT: lfs f1, 680(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: xxlor v20, f1, f1 +; FAST-NEXT: lfs f1, 672(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: xxlor v24, f1, f1 +; FAST-NEXT: lfs f1, 664(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f31, f1 +; FAST-NEXT: lfs f1, 656(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f30, f1 +; FAST-NEXT: lfs f1, 648(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f28, f1 +; FAST-NEXT: lfs f1, 640(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f27, f1 +; FAST-NEXT: lfs f1, 632(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f26, f1 +; FAST-NEXT: lfs f1, 624(r1) +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f25, f1 +; FAST-NEXT: xxlor f1, v25, v25 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f24, f1 +; FAST-NEXT: xxlor f1, v26, v26 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f23, f1 +; FAST-NEXT: xxlor f1, v27, v27 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f22, f1 +; FAST-NEXT: xxlor f1, v28, v28 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f21, f1 +; FAST-NEXT: fmr f1, f29 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f20, f1 +; FAST-NEXT: xxlor f1, v29, v29 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f19, f1 +; FAST-NEXT: xxlor f1, v30, v30 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f18, f1 +; FAST-NEXT: xxlor f1, v31, v31 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f29, f1 +; FAST-NEXT: fmr f1, f14 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f14, f1 +; FAST-NEXT: fmr f1, f16 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f16, f1 +; FAST-NEXT: xxlor f1, v22, v22 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fmr f17, f1 +; FAST-NEXT: xxlor f1, v23, v23 +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: li r3, 44 +; FAST-NEXT: fmr f15, f1 +; FAST-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload +; FAST-NEXT: bl __gnu_f2h_ieee +; FAST-NEXT: nop +; FAST-NEXT: clrldi r3, r3, 48 +; FAST-NEXT: bl __gnu_h2f_ieee +; FAST-NEXT: nop +; FAST-NEXT: fctid f3, f15 +; FAST-NEXT: fctid f4, f17 +; FAST-NEXT: mffprd r3, f3 +; FAST-NEXT: fctid f5, f16 +; FAST-NEXT: fctid f6, f14 +; FAST-NEXT: fctid f7, f18 +; FAST-NEXT: fctid f8, f19 +; FAST-NEXT: fctid f13, f1 +; FAST-NEXT: fctid f9, f20 +; FAST-NEXT: fctid f10, f22 +; FAST-NEXT: fctid f11, f24 +; FAST-NEXT: fctid f12, f25 +; FAST-NEXT: fctid f2, f23 +; FAST-NEXT: fctid f0, f21 +; FAST-NEXT: mtvsrd v2, r3 +; FAST-NEXT: mffprd r3, f4 +; FAST-NEXT: mtvsrd v3, r3 +; FAST-NEXT: mffprd r3, f5 +; FAST-NEXT: mtfprd f5, r3 +; FAST-NEXT: mffprd r3, f6 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: mffprd r3, f7 +; FAST-NEXT: mtfprd f6, r3 +; FAST-NEXT: mffprd r3, f8 +; FAST-NEXT: mtfprd f7, r3 +; FAST-NEXT: mffprd r3, f9 +; FAST-NEXT: mtfprd f3, r3 +; FAST-NEXT: mffprd r3, f10 +; FAST-NEXT: mtfprd f4, r3 +; FAST-NEXT: mffprd r3, f11 +; FAST-NEXT: fctid f11, f31 +; FAST-NEXT: lfd f31, 56(r1) # 8-byte Folded Reload +; FAST-NEXT: mtfprd f8, r3 +; FAST-NEXT: mffprd r3, f12 +; FAST-NEXT: xxlor f12, v24, v24 +; FAST-NEXT: fctid f31, f31 +; FAST-NEXT: fctid f12, f12 +; FAST-NEXT: mtfprd f9, r3 +; FAST-NEXT: mffprd r3, f13 +; FAST-NEXT: lfd f13, 48(r1) # 8-byte Folded Reload +; FAST-NEXT: mtfprd f10, r3 +; FAST-NEXT: fctid f13, f13 +; FAST-NEXT: xxmrghd v3, vs5, v3 +; FAST-NEXT: fctid f5, f26 +; FAST-NEXT: mffprd r3, f5 +; FAST-NEXT: mtfprd f5, r3 +; FAST-NEXT: xxmrghd v4, vs7, vs6 +; FAST-NEXT: fctid f6, f27 +; FAST-NEXT: fctid f7, f28 +; FAST-NEXT: mffprd r3, f6 +; FAST-NEXT: lfd f28, 96(r1) # 8-byte Folded Reload +; FAST-NEXT: fctid f28, f28 +; FAST-NEXT: mtfprd f6, r3 +; FAST-NEXT: mffprd r3, f7 +; FAST-NEXT: mtfprd f7, r3 +; FAST-NEXT: xxmrghd v2, v2, vs10 +; FAST-NEXT: fctid f10, f30 +; FAST-NEXT: mffprd r3, f10 +; FAST-NEXT: lfd f30, 80(r1) # 8-byte Folded Reload +; FAST-NEXT: fctid f30, f30 +; FAST-NEXT: mtfprd f10, r3 +; FAST-NEXT: mffprd r3, f11 +; FAST-NEXT: mtfprd f11, r3 +; FAST-NEXT: mffprd r3, f12 +; FAST-NEXT: mtfprd f12, r3 +; FAST-NEXT: xxmrghd v5, vs12, vs11 +; FAST-NEXT: xxlor f11, v20, v20 +; FAST-NEXT: xxlor f12, v21, v21 +; FAST-NEXT: fctid f11, f11 +; FAST-NEXT: fctid f12, f12 +; FAST-NEXT: mffprd r3, f11 +; FAST-NEXT: mtfprd f11, r3 +; FAST-NEXT: mffprd r3, f12 +; FAST-NEXT: mtfprd f12, r3 +; FAST-NEXT: mffprd r3, f13 +; FAST-NEXT: mtfprd f13, r3 +; FAST-NEXT: mffprd r3, f31 +; FAST-NEXT: lfd f31, 64(r1) # 8-byte Folded Reload +; FAST-NEXT: fctid f31, f31 +; FAST-NEXT: mtvsrd v0, r3 +; FAST-NEXT: mffprd r3, f31 +; FAST-NEXT: lfd f31, 72(r1) # 8-byte Folded Reload +; FAST-NEXT: mtvsrd v1, r3 +; FAST-NEXT: mffprd r3, f30 +; FAST-NEXT: lfd f30, 88(r1) # 8-byte Folded Reload +; FAST-NEXT: fctid f31, f31 +; FAST-NEXT: mtvsrd v6, r3 +; FAST-NEXT: mffprd r3, f28 +; FAST-NEXT: lfd f28, 104(r1) # 8-byte Folded Reload +; FAST-NEXT: fctid f30, f30 +; FAST-NEXT: fctid f28, f28 +; FAST-NEXT: mtvsrd v7, r3 +; FAST-NEXT: mffprd r3, f28 +; FAST-NEXT: lfd f28, 112(r1) # 8-byte Folded Reload +; FAST-NEXT: fctid f28, f28 +; FAST-NEXT: mtvsrd v8, r3 +; FAST-NEXT: mffprd r3, f28 +; FAST-NEXT: lfd f28, 120(r1) # 8-byte Folded Reload +; FAST-NEXT: fctid f28, f28 +; FAST-NEXT: xxmrghd v10, vs12, vs11 +; FAST-NEXT: xxmrghd v0, v0, vs13 +; FAST-NEXT: xxswapd vs12, v0 +; FAST-NEXT: xxmrghd v0, vs9, vs8 +; FAST-NEXT: xxmrghd v7, v8, v7 +; FAST-NEXT: mtvsrd v8, r3 +; FAST-NEXT: mffprd r3, f28 +; FAST-NEXT: mtvsrd v9, r3 +; FAST-NEXT: mffprd r3, f30 +; FAST-NEXT: xxswapd v7, v7 +; FAST-NEXT: xxmrghd v8, v9, v8 +; FAST-NEXT: mtvsrd v9, r3 +; FAST-NEXT: mffprd r3, f31 +; FAST-NEXT: xxswapd v8, v8 +; FAST-NEXT: xxmrghd v6, v9, v6 +; FAST-NEXT: mtvsrd v9, r3 +; FAST-NEXT: li r3, 240 +; FAST-NEXT: stxvd2x v8, r30, r3 +; FAST-NEXT: li r3, 224 +; FAST-NEXT: stxvd2x v7, r30, r3 +; FAST-NEXT: li r3, 208 +; FAST-NEXT: xxswapd vs11, v6 +; FAST-NEXT: xxmrghd v6, vs10, vs7 +; FAST-NEXT: stxvd2x vs11, r30, r3 +; FAST-NEXT: li r3, 192 +; FAST-NEXT: xxmrghd v1, v9, v1 +; FAST-NEXT: xxswapd vs11, v1 +; FAST-NEXT: xxmrghd v1, vs6, vs5 +; FAST-NEXT: xxswapd vs5, v10 +; FAST-NEXT: xxswapd vs6, v5 +; FAST-NEXT: stxvd2x vs11, r30, r3 +; FAST-NEXT: li r3, 176 +; FAST-NEXT: stxvd2x vs12, r30, r3 +; FAST-NEXT: li r3, 160 +; FAST-NEXT: stxvd2x vs5, r30, r3 +; FAST-NEXT: li r3, 144 +; FAST-NEXT: stxvd2x vs6, r30, r3 +; FAST-NEXT: mffprd r3, f2 +; FAST-NEXT: mtfprd f7, r3 +; FAST-NEXT: li r3, 128 +; FAST-NEXT: xxswapd vs5, v6 +; FAST-NEXT: stxvd2x vs5, r30, r3 +; FAST-NEXT: li r3, 112 +; FAST-NEXT: xxswapd vs2, v1 +; FAST-NEXT: xxswapd vs6, v0 +; FAST-NEXT: stxvd2x vs2, r30, r3 +; FAST-NEXT: li r3, 96 +; FAST-NEXT: fctid f2, f29 +; FAST-NEXT: stxvd2x vs6, r30, r3 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f2 +; FAST-NEXT: mtfprd f2, r3 +; FAST-NEXT: li r3, 80 +; FAST-NEXT: xxmrghd v5, vs7, vs4 +; FAST-NEXT: xxswapd vs4, v2 +; FAST-NEXT: xxmrghd v0, vs0, vs3 +; FAST-NEXT: xxswapd vs0, v5 +; FAST-NEXT: xxswapd vs3, v3 +; FAST-NEXT: stxvd2x vs0, r30, r3 +; FAST-NEXT: li r3, 64 +; FAST-NEXT: xxswapd vs0, v0 +; FAST-NEXT: stxvd2x vs0, r30, r3 +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxmrghd v5, vs2, vs1 +; FAST-NEXT: xxswapd vs1, v4 +; FAST-NEXT: stxvd2x vs1, r30, r3 +; FAST-NEXT: li r3, 32 +; FAST-NEXT: xxswapd vs2, v5 +; FAST-NEXT: stxvd2x vs2, r30, r3 +; FAST-NEXT: li r3, 16 +; FAST-NEXT: stxvd2x vs3, r30, r3 +; FAST-NEXT: li r3, 304 +; FAST-NEXT: stxvd2x vs4, 0, r30 +; FAST-NEXT: lfd f31, 472(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f30, 464(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f29, 456(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f28, 448(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f27, 440(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f26, 432(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f25, 424(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f24, 416(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f23, 408(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f22, 400(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f21, 392(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f20, 384(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f19, 376(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f18, 368(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f17, 360(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f16, 352(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f15, 344(r1) # 8-byte Folded Reload +; FAST-NEXT: lfd f14, 336(r1) # 8-byte Folded Reload +; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 288 +; FAST-NEXT: ld r30, 320(r1) # 8-byte Folded Reload +; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 272 +; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 256 +; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 240 +; FAST-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 224 +; FAST-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 208 +; FAST-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 192 +; FAST-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 176 +; FAST-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 160 +; FAST-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 144 +; FAST-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 128 +; FAST-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 480 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half>) + +define <1 x i64> @lrint_v1f32(<1 x float> %x) { +; BE-LABEL: lrint_v1f32: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v1f32: +; FAST: # %bb.0: +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: blr + %a = call <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float>) + +define <2 x i64> @lrint_v2f32(<2 x float> %x) { +; BE-LABEL: lrint_v2f32: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -144(r1) +; BE-NEXT: std r0, 160(r1) +; BE-NEXT: .cfi_def_cfa_offset 144 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: stxvw4x v2, 0, r3 +; BE-NEXT: lfs f1, 116(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 112(r1) +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r1, r1, 144 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -64(r1) +; CHECK-NEXT: std r0, 80(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v2 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v31 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxmrghd v2, vs0, v31 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 64 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v2f32: +; FAST: # %bb.0: +; FAST-NEXT: xxsldwi vs0, v2, v2, 3 +; FAST-NEXT: xxswapd vs1, v2 +; FAST-NEXT: xscvspdpn f0, vs0 +; FAST-NEXT: xscvspdpn f1, vs1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v2, vs1, vs0 +; FAST-NEXT: blr + %a = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float>) + +define <4 x i64> @lrint_v4f32(<4 x float> %x) { +; BE-LABEL: lrint_v4f32: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -160(r1) +; BE-NEXT: std r0, 176(r1) +; BE-NEXT: .cfi_def_cfa_offset 160 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: stxvw4x v2, 0, r3 +; BE-NEXT: lfs f1, 116(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 112(r1) +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 124(r1) +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 120(r1) +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r1, r1, 160 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -80(r1) +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v2 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v31 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xscvspdpn f1, v31 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v30, vs0, v30 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxsldwi vs0, v31, v31, 1 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: xxmrghd v3, v31, vs0 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 80 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v4f32: +; FAST: # %bb.0: +; FAST-NEXT: xxsldwi vs0, v2, v2, 3 +; FAST-NEXT: xxswapd vs1, v2 +; FAST-NEXT: xscvspdpn f0, vs0 +; FAST-NEXT: xxsldwi vs2, v2, v2, 1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v4, vs0, vs1 +; FAST-NEXT: xscvspdpn f0, v2 +; FAST-NEXT: vmr v2, v4 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs2 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v3, vs1, vs0 +; FAST-NEXT: blr + %a = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float>) + +define <8 x i64> @lrint_v8f32(<8 x float> %x) { +; BE-LABEL: lrint_v8f32: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -208(r1) +; BE-NEXT: std r0, 224(r1) +; BE-NEXT: .cfi_def_cfa_offset 208 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: stxvw4x v2, 0, r3 +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: stxvw4x v3, 0, r3 +; BE-NEXT: lfs f1, 116(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 112(r1) +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 124(r1) +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 120(r1) +; BE-NEXT: std r3, 168(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 132(r1) +; BE-NEXT: std r3, 160(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 128(r1) +; BE-NEXT: std r3, 184(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 140(r1) +; BE-NEXT: std r3, 176(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 136(r1) +; BE-NEXT: std r3, 200(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 192(r1) +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 160 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r3, r1, 176 +; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: addi r3, r1, 192 +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: addi r1, r1, 208 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -112(r1) +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 112 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v28, -64 +; CHECK-NEXT: .cfi_offset v29, -48 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v30, v2 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v3 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v30 +; CHECK-NEXT: mtvsrd v29, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xscvspdpn f1, v30 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v29, vs0, v29 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxsldwi vs0, v30, v30, 1 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v30, v30, vs0 +; CHECK-NEXT: xxsldwi vs0, v31, v31, 3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v31 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xscvspdpn f1, v31 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v28, vs0, v28 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxsldwi vs0, v31, v31, 1 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v3, v30 +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: vmr v4, v28 +; CHECK-NEXT: xxmrghd v5, v31, vs0 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 112 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v8f32: +; FAST: # %bb.0: +; FAST-NEXT: xxsldwi vs0, v2, v2, 3 +; FAST-NEXT: xxswapd vs1, v2 +; FAST-NEXT: xscvspdpn f0, vs0 +; FAST-NEXT: xxsldwi vs2, v2, v2, 1 +; FAST-NEXT: xxsldwi vs3, v3, v3, 3 +; FAST-NEXT: xxswapd vs4, v3 +; FAST-NEXT: xxsldwi vs5, v3, v3, 1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v0, vs0, vs1 +; FAST-NEXT: xscvspdpn f0, v2 +; FAST-NEXT: vmr v2, v0 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs2 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v1, vs1, vs0 +; FAST-NEXT: xscvspdpn f0, vs3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs4 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v4, vs0, vs1 +; FAST-NEXT: xscvspdpn f0, v3 +; FAST-NEXT: vmr v3, v1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs5 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v5, vs1, vs0 +; FAST-NEXT: blr + %a = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float>) + +define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) { +; BE-LABEL: lrint_v16i64_v16f32: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -304(r1) +; BE-NEXT: std r0, 320(r1) +; BE-NEXT: .cfi_def_cfa_offset 304 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: stxvw4x v2, 0, r3 +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: stxvw4x v3, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: stxvw4x v4, 0, r3 +; BE-NEXT: addi r3, r1, 160 +; BE-NEXT: stxvw4x v5, 0, r3 +; BE-NEXT: lfs f1, 116(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 112(r1) +; BE-NEXT: std r3, 184(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 124(r1) +; BE-NEXT: std r3, 176(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 120(r1) +; BE-NEXT: std r3, 200(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 132(r1) +; BE-NEXT: std r3, 192(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 128(r1) +; BE-NEXT: std r3, 216(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 140(r1) +; BE-NEXT: std r3, 208(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 136(r1) +; BE-NEXT: std r3, 232(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 148(r1) +; BE-NEXT: std r3, 224(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 144(r1) +; BE-NEXT: std r3, 248(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 156(r1) +; BE-NEXT: std r3, 240(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 152(r1) +; BE-NEXT: std r3, 264(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 164(r1) +; BE-NEXT: std r3, 256(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 160(r1) +; BE-NEXT: std r3, 280(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 172(r1) +; BE-NEXT: std r3, 272(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: lfs f1, 168(r1) +; BE-NEXT: std r3, 296(r1) +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: std r3, 288(r1) +; BE-NEXT: addi r3, r1, 176 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 192 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r3, r1, 208 +; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: addi r3, r1, 224 +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: addi r3, r1, 240 +; BE-NEXT: lxvd2x v6, 0, r3 +; BE-NEXT: addi r3, r1, 256 +; BE-NEXT: lxvd2x v7, 0, r3 +; BE-NEXT: addi r3, r1, 272 +; BE-NEXT: lxvd2x v8, 0, r3 +; BE-NEXT: addi r3, r1, 288 +; BE-NEXT: lxvd2x v9, 0, r3 +; BE-NEXT: addi r1, r1, 304 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v16i64_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -176(r1) +; CHECK-NEXT: std r0, 192(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 176 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v24, -128 +; CHECK-NEXT: .cfi_offset v25, -112 +; CHECK-NEXT: .cfi_offset v26, -96 +; CHECK-NEXT: .cfi_offset v27, -80 +; CHECK-NEXT: .cfi_offset v28, -64 +; CHECK-NEXT: .cfi_offset v29, -48 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v26, v3 +; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: vmr v28, v4 +; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: vmr v29, v2 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v5 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v29 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xscvspdpn f1, v29 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v30, vs0, v30 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxsldwi vs0, v29, v29, 1 +; CHECK-NEXT: mtvsrd v29, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v29, v29, vs0 +; CHECK-NEXT: xxsldwi vs0, v26, v26, 3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v26 +; CHECK-NEXT: mtvsrd v27, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xscvspdpn f1, v26 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v27, vs0, v27 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxsldwi vs0, v26, v26, 1 +; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v26, v26, vs0 +; CHECK-NEXT: xxsldwi vs0, v28, v28, 3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v28 +; CHECK-NEXT: mtvsrd v25, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xscvspdpn f1, v28 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v25, vs0, v25 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxsldwi vs0, v28, v28, 1 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v28, v28, vs0 +; CHECK-NEXT: xxsldwi vs0, v31, v31, 3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs0, v31 +; CHECK-NEXT: mtvsrd v24, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xscvspdpn f1, v31 +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxmrghd v24, vs0, v24 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: xxsldwi vs0, v31, v31, 1 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: vmr v3, v29 +; CHECK-NEXT: vmr v7, v28 +; CHECK-NEXT: vmr v4, v27 +; CHECK-NEXT: vmr v5, v26 +; CHECK-NEXT: vmr v6, v25 +; CHECK-NEXT: vmr v8, v24 +; CHECK-NEXT: xxmrghd v9, v31, vs0 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 176 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v16i64_v16f32: +; FAST: # %bb.0: +; FAST-NEXT: xxsldwi vs0, v2, v2, 3 +; FAST-NEXT: xxswapd vs1, v2 +; FAST-NEXT: xscvspdpn f0, vs0 +; FAST-NEXT: xxsldwi vs2, v2, v2, 1 +; FAST-NEXT: xxsldwi vs3, v3, v3, 3 +; FAST-NEXT: xxswapd vs4, v3 +; FAST-NEXT: xxsldwi vs5, v3, v3, 1 +; FAST-NEXT: xxsldwi vs6, v4, v4, 3 +; FAST-NEXT: xxswapd vs7, v4 +; FAST-NEXT: xxsldwi vs8, v4, v4, 1 +; FAST-NEXT: xxsldwi vs9, v5, v5, 3 +; FAST-NEXT: xxswapd vs10, v5 +; FAST-NEXT: xxsldwi vs11, v5, v5, 1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v0, vs0, vs1 +; FAST-NEXT: xscvspdpn f0, v2 +; FAST-NEXT: vmr v2, v0 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs2 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v1, vs1, vs0 +; FAST-NEXT: xscvspdpn f0, vs3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs4 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v10, vs0, vs1 +; FAST-NEXT: xscvspdpn f0, v3 +; FAST-NEXT: vmr v3, v1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs5 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v11, vs1, vs0 +; FAST-NEXT: xscvspdpn f0, vs6 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: xscvspdpn f0, vs7 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v6, vs0, vs1 +; FAST-NEXT: xscvspdpn f0, v4 +; FAST-NEXT: xscvspdpn f1, vs8 +; FAST-NEXT: vmr v4, v10 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v7, vs0, vs1 +; FAST-NEXT: xscvspdpn f0, vs9 +; FAST-NEXT: xscvspdpn f1, vs10 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v8, vs1, vs0 +; FAST-NEXT: xscvspdpn f0, v5 +; FAST-NEXT: xscvspdpn f1, vs11 +; FAST-NEXT: vmr v5, v11 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v9, vs0, vs1 +; FAST-NEXT: blr + %a = call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float>) + +define <1 x i64> @lrint_v1f64(<1 x double> %x) { +; BE-LABEL: lrint_v1f64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl lrint +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v1f64: +; FAST: # %bb.0: +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: blr + %a = call <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>) + +define <2 x i64> @lrint_v2f64(<2 x double> %x) { +; BE-LABEL: lrint_v2f64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -160(r1) +; BE-NEXT: std r0, 176(r1) +; BE-NEXT: .cfi_def_cfa_offset 160 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset v31, -16 +; BE-NEXT: li r3, 144 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v2 +; BE-NEXT: xxlor f1, v31, v31 +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: xxswapd vs1, v31 +; BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: li r3, 144 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 160 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -64(r1) +; CHECK-NEXT: std r0, 80(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v2 +; CHECK-NEXT: xxlor f1, v31, v31 +; CHECK-NEXT: bl lrint +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs1, v31 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: bl lrint +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxmrghd v2, v31, vs0 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 64 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v2f64: +; FAST: # %bb.0: +; FAST-NEXT: xxlor f1, v2, v2 +; FAST-NEXT: xxswapd vs0, v2 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: mffprd r3, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v2, vs1, vs0 +; FAST-NEXT: blr + %a = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>) + +define <4 x i64> @lrint_v4f64(<4 x double> %x) { +; BE-LABEL: lrint_v4f64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -192(r1) +; BE-NEXT: std r0, 208(r1) +; BE-NEXT: .cfi_def_cfa_offset 192 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset v30, -32 +; BE-NEXT: .cfi_offset v31, -16 +; BE-NEXT: li r3, 160 +; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v30, v2 +; BE-NEXT: li r3, 176 +; BE-NEXT: xxlor f1, v30, v30 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v3 +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: xxswapd vs1, v30 +; BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: xxlor f1, v31, v31 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: xxswapd vs1, v31 +; BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: li r3, 176 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 160 +; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 192 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -80(r1) +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v30, v2 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: xxlor f1, v30, v30 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v3 +; CHECK-NEXT: bl lrint +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs1, v30 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: bl lrint +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxlor f1, v31, v31 +; CHECK-NEXT: xxmrghd v30, v30, vs0 +; CHECK-NEXT: bl lrint +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs1, v31 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: bl lrint +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: xxmrghd v3, v31, vs0 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 80 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v4f64: +; FAST: # %bb.0: +; FAST-NEXT: xxswapd vs0, v2 +; FAST-NEXT: xxlor f2, v2, v2 +; FAST-NEXT: xxswapd vs1, v3 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: fctid f2, f2 +; FAST-NEXT: fctid f1, f1 +; FAST-NEXT: mffprd r4, f0 +; FAST-NEXT: xxlor f0, v3, v3 +; FAST-NEXT: mffprd r3, f2 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mtfprd f2, r4 +; FAST-NEXT: mffprd r5, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v2, vs0, vs2 +; FAST-NEXT: mtfprd f0, r5 +; FAST-NEXT: xxmrghd v3, vs0, vs1 +; FAST-NEXT: blr + %a = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>) + +define <8 x i64> @lrint_v8f64(<8 x double> %x) { +; BE-LABEL: lrint_v8f64: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -256(r1) +; BE-NEXT: std r0, 272(r1) +; BE-NEXT: .cfi_def_cfa_offset 256 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset v28, -64 +; BE-NEXT: .cfi_offset v29, -48 +; BE-NEXT: .cfi_offset v30, -32 +; BE-NEXT: .cfi_offset v31, -16 +; BE-NEXT: li r3, 192 +; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 208 +; BE-NEXT: vmr v28, v2 +; BE-NEXT: xxlor f1, v28, v28 +; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 224 +; BE-NEXT: vmr v29, v3 +; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 240 +; BE-NEXT: vmr v30, v4 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v5 +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: xxswapd vs1, v28 +; BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: xxlor f1, v29, v29 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: xxswapd vs1, v29 +; BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: xxlor f1, v30, v30 +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: std r3, 160(r1) +; BE-NEXT: xxswapd vs1, v30 +; BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: xxlor f1, v31, v31 +; BE-NEXT: std r3, 168(r1) +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: std r3, 176(r1) +; BE-NEXT: xxswapd vs1, v31 +; BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: std r3, 184(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r3, r1, 160 +; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: addi r3, r1, 176 +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: li r3, 240 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 224 +; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 208 +; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 192 +; BE-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 256 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -112(r1) +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 112 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v28, -64 +; CHECK-NEXT: .cfi_offset v29, -48 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: vmr v28, v2 +; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: xxlor f1, v28, v28 +; CHECK-NEXT: vmr v29, v3 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v30, v4 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v5 +; CHECK-NEXT: bl lrint +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs1, v28 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: bl lrint +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxlor f1, v29, v29 +; CHECK-NEXT: xxmrghd v28, v28, vs0 +; CHECK-NEXT: bl lrint +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs1, v29 +; CHECK-NEXT: mtvsrd v29, r3 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: bl lrint +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxlor f1, v30, v30 +; CHECK-NEXT: xxmrghd v29, v29, vs0 +; CHECK-NEXT: bl lrint +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs1, v30 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: bl lrint +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxlor f1, v31, v31 +; CHECK-NEXT: xxmrghd v30, v30, vs0 +; CHECK-NEXT: bl lrint +; CHECK-NEXT: nop +; CHECK-NEXT: xxswapd vs1, v31 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: bl lrint +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v4, v30 +; CHECK-NEXT: vmr v3, v29 +; CHECK-NEXT: vmr v2, v28 +; CHECK-NEXT: xxmrghd v5, v31, vs0 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 112 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v8f64: +; FAST: # %bb.0: +; FAST-NEXT: xxswapd vs0, v2 +; FAST-NEXT: xxswapd vs1, v3 +; FAST-NEXT: xxlor f4, v2, v2 +; FAST-NEXT: xxswapd vs2, v4 +; FAST-NEXT: xxswapd vs3, v5 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: fctid f4, f4 +; FAST-NEXT: mffprd r4, f0 +; FAST-NEXT: xxlor f0, v3, v3 +; FAST-NEXT: mffprd r3, f4 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mffprd r5, f0 +; FAST-NEXT: fctid f0, f1 +; FAST-NEXT: mtfprd f1, r4 +; FAST-NEXT: mffprd r6, f0 +; FAST-NEXT: xxlor f0, v4, v4 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mtfprd f4, r6 +; FAST-NEXT: mffprd r7, f0 +; FAST-NEXT: fctid f0, f2 +; FAST-NEXT: mtfprd f2, r5 +; FAST-NEXT: mtfprd f5, r7 +; FAST-NEXT: mffprd r8, f0 +; FAST-NEXT: xxlor f0, v5, v5 +; FAST-NEXT: fctid f0, f0 +; FAST-NEXT: mtfprd f6, r8 +; FAST-NEXT: mffprd r9, f0 +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: xxmrghd v3, vs2, vs4 +; FAST-NEXT: xxmrghd v4, vs5, vs6 +; FAST-NEXT: xxmrghd v2, vs0, vs1 +; FAST-NEXT: fctid f1, f3 +; FAST-NEXT: mtfprd f0, r9 +; FAST-NEXT: mffprd r3, f1 +; FAST-NEXT: mtfprd f1, r3 +; FAST-NEXT: xxmrghd v5, vs0, vs1 +; FAST-NEXT: blr + %a = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>) diff --git a/llvm/test/CodeGen/RISCV/rvv/llrint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/llrint-sdnode.ll new file mode 100644 index 0000000000000..9a485a8b58be1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/llrint-sdnode.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define @llrint_nxv1i64_nxv1f32( %x) { +; CHECK-LABEL: llrint_nxv1i64_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvt.x.f.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv1i64.nxv1f32( %x) + ret %a +} +declare @llvm.llrint.nxv1i64.nxv1f32() + +define @llrint_nxv2i64_nxv2f32( %x) { +; CHECK-LABEL: llrint_nxv2i64_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfwcvt.x.f.v v10, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv2i64.nxv2f32( %x) + ret %a +} +declare @llvm.llrint.nxv2i64.nxv2f32() + +define @llrint_nxv4i64_nxv4f32( %x) { +; CHECK-LABEL: llrint_nxv4i64_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfwcvt.x.f.v v12, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv4i64.nxv4f32( %x) + ret %a +} +declare @llvm.llrint.nxv4i64.nxv4f32() + +define @llrint_nxv8i64_nxv8f32( %x) { +; CHECK-LABEL: llrint_nxv8i64_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vfwcvt.x.f.v v16, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv8i64.nxv8f32( %x) + ret %a +} +declare @llvm.llrint.nxv8i64.nxv8f32() + +define @llrint_nxv16i64_nxv16f32( %x) { +; CHECK-LABEL: llrint_nxv16i64_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vfwcvt.x.f.v v24, v8 +; CHECK-NEXT: vfwcvt.x.f.v v16, v12 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv16i64.nxv16f32( %x) + ret %a +} +declare @llvm.llrint.nxv16i64.nxv16f32() + +define @llrint_nxv1i64_nxv1f64( %x) { +; CHECK-LABEL: llrint_nxv1i64_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv1i64.nxv1f64( %x) + ret %a +} +declare @llvm.llrint.nxv1i64.nxv1f64() + +define @llrint_nxv2i64_nxv2f64( %x) { +; CHECK-LABEL: llrint_nxv2i64_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv2i64.nxv2f64( %x) + ret %a +} +declare @llvm.llrint.nxv2i64.nxv2f64() + +define @llrint_nxv4i64_nxv4f64( %x) { +; CHECK-LABEL: llrint_nxv4i64_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv4i64.nxv4f64( %x) + ret %a +} +declare @llvm.llrint.nxv4i64.nxv4f64() + +define @llrint_nxv8i64_nxv8f64( %x) { +; CHECK-LABEL: llrint_nxv8i64_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv8i64.nxv8f64( %x) + ret %a +} +declare @llvm.llrint.nxv8i64.nxv8f64() diff --git a/llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll new file mode 100644 index 0000000000000..61a5367b7fc5c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+f,+d \ +; RUN: -target-abi=ilp32d -verify-machineinstrs | FileCheck %s --check-prefix=RV32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+f,+d \ +; RUN: -target-abi=lp64d -verify-machineinstrs | FileCheck %s --check-prefix=RV64 + +define @lrint_nxv1f32( %x) { +; RV32-LABEL: lrint_nxv1f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: lrint_nxv1f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vfwcvt.x.f.v v9, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %a = call @llvm.lrint.nxv1iXLen.nxv1f32( %x) + ret %a +} +declare @llvm.lrint.nxv1iXLen.nxv1f32() + +define @lrint_nxv2f32( %x) { +; RV32-LABEL: lrint_nxv2f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: lrint_nxv2f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; RV64-NEXT: vfwcvt.x.f.v v10, v8 +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %a = call @llvm.lrint.nxv2iXLen.nxv2f32( %x) + ret %a +} +declare @llvm.lrint.nxv2iXLen.nxv2f32() + +define @lrint_nxv4f32( %x) { +; RV32-LABEL: lrint_nxv4f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: lrint_nxv4f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vfwcvt.x.f.v v12, v8 +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %a = call @llvm.lrint.nxv4iXLen.nxv4f32( %x) + ret %a +} +declare @llvm.lrint.nxv4iXLen.nxv4f32() + +define @lrint_nxv8f32( %x) { +; RV32-LABEL: lrint_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: lrint_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; RV64-NEXT: vfwcvt.x.f.v v16, v8 +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %a = call @llvm.lrint.nxv8iXLen.nxv8f32( %x) + ret %a +} +declare @llvm.lrint.nxv8iXLen.nxv8f32() + +define @lrint_nxv16iXLen_nxv16f32( %x) { + %a = call @llvm.lrint.nxv16iXLen.nxv16f32( %x) + ret %a +} +declare @llvm.lrint.nxv16iXLen.nxv16f32() + +define @lrint_nxv1f64( %x) { +; RV32-LABEL: lrint_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vfncvt.x.f.w v9, v8 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: lrint_nxv1f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfcvt.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.lrint.nxv1iXLen.nxv1f64( %x) + ret %a +} +declare @llvm.lrint.nxv1iXLen.nxv1f64() + +define @lrint_nxv2f64( %x) { +; RV32-LABEL: lrint_nxv2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; RV32-NEXT: vfncvt.x.f.w v10, v8 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: lrint_nxv2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vfcvt.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.lrint.nxv2iXLen.nxv2f64( %x) + ret %a +} +declare @llvm.lrint.nxv2iXLen.nxv2f64() + +define @lrint_nxv4f64( %x) { +; RV32-LABEL: lrint_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vfncvt.x.f.w v12, v8 +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: lrint_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfcvt.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.lrint.nxv4iXLen.nxv4f64( %x) + ret %a +} +declare @llvm.lrint.nxv4iXLen.nxv4f64() + +define @lrint_nxv8f64( %x) { +; RV32-LABEL: lrint_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; RV32-NEXT: vfncvt.x.f.w v16, v8 +; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: lrint_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64-NEXT: vfcvt.x.f.v v8, v8 +; RV64-NEXT: ret + %a = call @llvm.lrint.nxv8iXLen.nxv8f64( %x) + ret %a +} +declare @llvm.lrint.nxv8iXLen.nxv8f64() diff --git a/llvm/test/CodeGen/X86/vector-llrint.ll b/llvm/test/CodeGen/X86/vector-llrint.ll new file mode 100644 index 0000000000000..46904f82fd5d6 --- /dev/null +++ b/llvm/test/CodeGen/X86/vector-llrint.ll @@ -0,0 +1,290 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64-SSE +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefix=X64-AVX +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64-AVX + +define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { +; X64-SSE-LABEL: llrint_v1i64_v1f32: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: cvtss2si %xmm0, %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: llrint_v1i64_v1f32: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vcvtss2si %xmm0, %rax +; X64-AVX-NEXT: retq + %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>) + +define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { +; X64-SSE-LABEL: llrint_v2i64_v2f32: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: cvtss2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm1 +; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X64-SSE-NEXT: cvtss2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm0 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X64-SSE-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: llrint_v2i64_v2f32: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vcvtss2si %xmm0, %rax +; X64-AVX-NEXT: vmovq %rax, %xmm1 +; X64-AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; X64-AVX-NEXT: vcvtss2si %xmm0, %rax +; X64-AVX-NEXT: vmovq %rax, %xmm0 +; X64-AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; X64-AVX-NEXT: retq + %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>) + +define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { +; X64-SSE-LABEL: llrint_v4i64_v4f32: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: cvtss2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm2 +; X64-SSE-NEXT: movaps %xmm0, %xmm1 +; X64-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] +; X64-SSE-NEXT: cvtss2si %xmm1, %rax +; X64-SSE-NEXT: movq %rax, %xmm1 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] +; X64-SSE-NEXT: movaps %xmm0, %xmm1 +; X64-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] +; X64-SSE-NEXT: cvtss2si %xmm1, %rax +; X64-SSE-NEXT: movq %rax, %xmm3 +; X64-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-SSE-NEXT: cvtss2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm1 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] +; X64-SSE-NEXT: movdqa %xmm2, %xmm0 +; X64-SSE-NEXT: retq + %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>) + +define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { +; X64-SSE-LABEL: llrint_v8i64_v8f32: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movaps %xmm0, %xmm2 +; X64-SSE-NEXT: cvtss2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm0 +; X64-SSE-NEXT: movaps %xmm2, %xmm3 +; X64-SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm2[1,1] +; X64-SSE-NEXT: cvtss2si %xmm3, %rax +; X64-SSE-NEXT: movq %rax, %xmm3 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; X64-SSE-NEXT: movaps %xmm2, %xmm3 +; X64-SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm2[3,3] +; X64-SSE-NEXT: cvtss2si %xmm3, %rax +; X64-SSE-NEXT: movq %rax, %xmm3 +; X64-SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; X64-SSE-NEXT: cvtss2si %xmm2, %rax +; X64-SSE-NEXT: movq %rax, %xmm4 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm3[0] +; X64-SSE-NEXT: cvtss2si %xmm1, %rax +; X64-SSE-NEXT: movq %rax, %xmm2 +; X64-SSE-NEXT: movaps %xmm1, %xmm3 +; X64-SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[1,1] +; X64-SSE-NEXT: cvtss2si %xmm3, %rax +; X64-SSE-NEXT: movq %rax, %xmm3 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; X64-SSE-NEXT: movaps %xmm1, %xmm3 +; X64-SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm1[3,3] +; X64-SSE-NEXT: cvtss2si %xmm3, %rax +; X64-SSE-NEXT: movq %rax, %xmm5 +; X64-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; X64-SSE-NEXT: cvtss2si %xmm1, %rax +; X64-SSE-NEXT: movq %rax, %xmm3 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0] +; X64-SSE-NEXT: movdqa %xmm4, %xmm1 +; X64-SSE-NEXT: retq + %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) + +define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { +; X64-SSE-LABEL: llrint_v16i64_v16f32: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movq %rdi, %rax +; X64-SSE-NEXT: cvtss2si %xmm0, %rcx +; X64-SSE-NEXT: movq %rcx, %xmm4 +; X64-SSE-NEXT: movaps %xmm0, %xmm5 +; X64-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm0[1,1] +; X64-SSE-NEXT: cvtss2si %xmm5, %rcx +; X64-SSE-NEXT: movq %rcx, %xmm5 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0] +; X64-SSE-NEXT: movaps %xmm0, %xmm5 +; X64-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[3,3],xmm0[3,3] +; X64-SSE-NEXT: cvtss2si %xmm5, %rcx +; X64-SSE-NEXT: movq %rcx, %xmm5 +; X64-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-SSE-NEXT: cvtss2si %xmm0, %rcx +; X64-SSE-NEXT: movq %rcx, %xmm0 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0] +; X64-SSE-NEXT: cvtss2si %xmm1, %rcx +; X64-SSE-NEXT: movq %rcx, %xmm5 +; X64-SSE-NEXT: movaps %xmm1, %xmm6 +; X64-SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[1,1],xmm1[1,1] +; X64-SSE-NEXT: cvtss2si %xmm6, %rcx +; X64-SSE-NEXT: movq %rcx, %xmm6 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0] +; X64-SSE-NEXT: movaps %xmm1, %xmm6 +; X64-SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[3,3],xmm1[3,3] +; X64-SSE-NEXT: cvtss2si %xmm6, %rcx +; X64-SSE-NEXT: movq %rcx, %xmm6 +; X64-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; X64-SSE-NEXT: cvtss2si %xmm1, %rcx +; X64-SSE-NEXT: movq %rcx, %xmm1 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm6[0] +; X64-SSE-NEXT: cvtss2si %xmm2, %rcx +; X64-SSE-NEXT: movq %rcx, %xmm6 +; X64-SSE-NEXT: movaps %xmm2, %xmm7 +; X64-SSE-NEXT: shufps {{.*#+}} xmm7 = xmm7[1,1],xmm2[1,1] +; X64-SSE-NEXT: cvtss2si %xmm7, %rcx +; X64-SSE-NEXT: movq %rcx, %xmm7 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0] +; X64-SSE-NEXT: movaps %xmm2, %xmm7 +; X64-SSE-NEXT: shufps {{.*#+}} xmm7 = xmm7[3,3],xmm2[3,3] +; X64-SSE-NEXT: cvtss2si %xmm7, %rcx +; X64-SSE-NEXT: movq %rcx, %xmm7 +; X64-SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; X64-SSE-NEXT: cvtss2si %xmm2, %rcx +; X64-SSE-NEXT: movq %rcx, %xmm2 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm7[0] +; X64-SSE-NEXT: cvtss2si %xmm3, %rcx +; X64-SSE-NEXT: movq %rcx, %xmm7 +; X64-SSE-NEXT: movaps %xmm3, %xmm8 +; X64-SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[1,1],xmm3[1,1] +; X64-SSE-NEXT: cvtss2si %xmm8, %rcx +; X64-SSE-NEXT: movq %rcx, %xmm8 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm8[0] +; X64-SSE-NEXT: movaps %xmm3, %xmm8 +; X64-SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[3,3],xmm3[3,3] +; X64-SSE-NEXT: cvtss2si %xmm8, %rcx +; X64-SSE-NEXT: movq %rcx, %xmm8 +; X64-SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] +; X64-SSE-NEXT: cvtss2si %xmm3, %rcx +; X64-SSE-NEXT: movq %rcx, %xmm3 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm8[0] +; X64-SSE-NEXT: movdqa %xmm3, 112(%rdi) +; X64-SSE-NEXT: movdqa %xmm7, 96(%rdi) +; X64-SSE-NEXT: movdqa %xmm2, 80(%rdi) +; X64-SSE-NEXT: movdqa %xmm6, 64(%rdi) +; X64-SSE-NEXT: movdqa %xmm1, 48(%rdi) +; X64-SSE-NEXT: movdqa %xmm5, 32(%rdi) +; X64-SSE-NEXT: movdqa %xmm0, 16(%rdi) +; X64-SSE-NEXT: movdqa %xmm4, (%rdi) +; X64-SSE-NEXT: retq + %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) + +define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { +; X64-SSE-LABEL: llrint_v1i64_v1f64: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: cvtsd2si %xmm0, %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: llrint_v1i64_v1f64: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX-NEXT: retq + %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>) + +define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { +; X64-SSE-LABEL: llrint_v2i64_v2f64: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: cvtsd2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm1 +; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; X64-SSE-NEXT: cvtsd2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm0 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X64-SSE-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: llrint_v2i64_v2f64: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX-NEXT: vmovq %rax, %xmm1 +; X64-AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] +; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX-NEXT: vmovq %rax, %xmm0 +; X64-AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; X64-AVX-NEXT: retq + %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>) + +define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { +; X64-SSE-LABEL: llrint_v4i64_v4f64: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: cvtsd2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm2 +; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; X64-SSE-NEXT: cvtsd2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm0 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; X64-SSE-NEXT: cvtsd2si %xmm1, %rax +; X64-SSE-NEXT: movq %rax, %xmm3 +; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] +; X64-SSE-NEXT: cvtsd2si %xmm1, %rax +; X64-SSE-NEXT: movq %rax, %xmm0 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] +; X64-SSE-NEXT: movdqa %xmm2, %xmm0 +; X64-SSE-NEXT: movdqa %xmm3, %xmm1 +; X64-SSE-NEXT: retq + %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) + +define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { +; X64-SSE-LABEL: llrint_v8i64_v8f64: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: cvtsd2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm4 +; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; X64-SSE-NEXT: cvtsd2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm0 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm0[0] +; X64-SSE-NEXT: cvtsd2si %xmm1, %rax +; X64-SSE-NEXT: movq %rax, %xmm5 +; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] +; X64-SSE-NEXT: cvtsd2si %xmm1, %rax +; X64-SSE-NEXT: movq %rax, %xmm0 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm0[0] +; X64-SSE-NEXT: cvtsd2si %xmm2, %rax +; X64-SSE-NEXT: movq %rax, %xmm6 +; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] +; X64-SSE-NEXT: cvtsd2si %xmm2, %rax +; X64-SSE-NEXT: movq %rax, %xmm0 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm0[0] +; X64-SSE-NEXT: cvtsd2si %xmm3, %rax +; X64-SSE-NEXT: movq %rax, %xmm7 +; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] +; X64-SSE-NEXT: cvtsd2si %xmm3, %rax +; X64-SSE-NEXT: movq %rax, %xmm0 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm0[0] +; X64-SSE-NEXT: movdqa %xmm4, %xmm0 +; X64-SSE-NEXT: movdqa %xmm5, %xmm1 +; X64-SSE-NEXT: movdqa %xmm6, %xmm2 +; X64-SSE-NEXT: movdqa %xmm7, %xmm3 +; X64-SSE-NEXT: retq + %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) diff --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll new file mode 100644 index 0000000000000..7373cd32df98d --- /dev/null +++ b/llvm/test/CodeGen/X86/vector-lrint.ll @@ -0,0 +1,429 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=X86-SSE2 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefix=X86-AVX +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefix=X86-AVX +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64-SSE +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefix=X64-AVX +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64-AVX +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64-SSE +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefix=X64-AVX +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64-AVX + +define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { +; X86-SSE2-LABEL: lrint_v1f32: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: lrint_v1f32: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vcvtss2si {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: retl + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x) + ret <1 x iXLen> %a +} +declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>) + +define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { +; X86-SSE2-LABEL: lrint_v2f32: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movaps %xmm0, %xmm1 +; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] +; X86-SSE2-NEXT: cvtss2si %xmm1, %eax +; X86-SSE2-NEXT: movd %eax, %xmm1 +; X86-SSE2-NEXT: movaps %xmm0, %xmm2 +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; X86-SSE2-NEXT: cvtss2si %xmm2, %eax +; X86-SSE2-NEXT: movd %eax, %xmm2 +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X86-SSE2-NEXT: cvtss2si %xmm0, %eax +; X86-SSE2-NEXT: movd %eax, %xmm1 +; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE2-NEXT: cvtss2si %xmm0, %eax +; X86-SSE2-NEXT: movd %eax, %xmm0 +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: lrint_v2f32: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; X86-AVX-NEXT: vcvtss2si %xmm1, %eax +; X86-AVX-NEXT: vcvtss2si %xmm0, %ecx +; X86-AVX-NEXT: vmovd %ecx, %xmm1 +; X86-AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; X86-AVX-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] +; X86-AVX-NEXT: vcvtss2si %xmm2, %eax +; X86-AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; X86-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X86-AVX-NEXT: vcvtss2si %xmm0, %eax +; X86-AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; X86-AVX-NEXT: retl + %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x) + ret <2 x iXLen> %a +} +declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>) + +define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { +; X86-SSE2-LABEL: lrint_v4f32: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movaps %xmm0, %xmm1 +; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] +; X86-SSE2-NEXT: cvtss2si %xmm1, %eax +; X86-SSE2-NEXT: movd %eax, %xmm1 +; X86-SSE2-NEXT: movaps %xmm0, %xmm2 +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; X86-SSE2-NEXT: cvtss2si %xmm2, %eax +; X86-SSE2-NEXT: movd %eax, %xmm2 +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X86-SSE2-NEXT: cvtss2si %xmm0, %eax +; X86-SSE2-NEXT: movd %eax, %xmm1 +; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE2-NEXT: cvtss2si %xmm0, %eax +; X86-SSE2-NEXT: movd %eax, %xmm0 +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: lrint_v4f32: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; X86-AVX-NEXT: vcvtss2si %xmm1, %eax +; X86-AVX-NEXT: vcvtss2si %xmm0, %ecx +; X86-AVX-NEXT: vmovd %ecx, %xmm1 +; X86-AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; X86-AVX-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] +; X86-AVX-NEXT: vcvtss2si %xmm2, %eax +; X86-AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; X86-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X86-AVX-NEXT: vcvtss2si %xmm0, %eax +; X86-AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; X86-AVX-NEXT: retl + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x) + ret <4 x iXLen> %a +} +declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>) + +define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { +; X86-SSE2-LABEL: lrint_v8f32: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movaps %xmm0, %xmm2 +; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X86-SSE2-NEXT: cvtss2si %xmm0, %eax +; X86-SSE2-NEXT: movd %eax, %xmm0 +; X86-SSE2-NEXT: movaps %xmm2, %xmm3 +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm2[1] +; X86-SSE2-NEXT: cvtss2si %xmm3, %eax +; X86-SSE2-NEXT: movd %eax, %xmm3 +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] +; X86-SSE2-NEXT: cvtss2si %xmm2, %eax +; X86-SSE2-NEXT: movd %eax, %xmm0 +; X86-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,1,1] +; X86-SSE2-NEXT: cvtss2si %xmm2, %eax +; X86-SSE2-NEXT: movd %eax, %xmm2 +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; X86-SSE2-NEXT: movaps %xmm1, %xmm2 +; X86-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,3],xmm1[3,3] +; X86-SSE2-NEXT: cvtss2si %xmm2, %eax +; X86-SSE2-NEXT: movd %eax, %xmm2 +; X86-SSE2-NEXT: movaps %xmm1, %xmm3 +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] +; X86-SSE2-NEXT: cvtss2si %xmm3, %eax +; X86-SSE2-NEXT: movd %eax, %xmm3 +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; X86-SSE2-NEXT: cvtss2si %xmm1, %eax +; X86-SSE2-NEXT: movd %eax, %xmm2 +; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] +; X86-SSE2-NEXT: cvtss2si %xmm1, %eax +; X86-SSE2-NEXT: movd %eax, %xmm1 +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 +; X86-SSE2-NEXT: retl + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x) + ret <8 x iXLen> %a +} +declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>) + +define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) { + %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x) + ret <16 x iXLen> %a +} +declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>) + +define <1 x i64> @lrint_v1f64(<1 x double> %x) { +; X86-SSE2-LABEL: lrint_v1f64: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE2-NEXT: .cfi_offset %ebp, -8 +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: fldl (%esp) +; X86-SSE2-NEXT: fistpll (%esp) +; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movd %xmm0, %eax +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE2-NEXT: movd %xmm0, %edx +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: lrint_v1f64: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: .cfi_def_cfa_offset 8 +; X86-AVX-NEXT: .cfi_offset %ebp, -8 +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: .cfi_def_cfa_register %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: fldl (%esp) +; X86-AVX-NEXT: fistpll (%esp) +; X86-AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: vpextrd $1, %xmm0, %edx +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: .cfi_def_cfa %esp, 4 +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: lrint_v1f64: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: cvtsd2si %xmm0, %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: lrint_v1f64: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX-NEXT: retq + %a = call <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>) + +define <2 x i64> @lrint_v2f64(<2 x double> %x) { +; X86-SSE2-LABEL: lrint_v2f64: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE2-NEXT: .cfi_offset %ebp, -8 +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: movhps %xmm0, (%esp) +; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fldl (%esp) +; X86-SSE2-NEXT: fistpll (%esp) +; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: lrint_v2f64: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: .cfi_def_cfa_offset 8 +; X86-AVX-NEXT: .cfi_offset %ebp, -8 +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: .cfi_def_cfa_register %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $16, %esp +; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; X86-AVX-NEXT: vmovhps %xmm0, (%esp) +; X86-AVX-NEXT: fldl {{[0-9]+}}(%esp) +; X86-AVX-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-AVX-NEXT: fldl (%esp) +; X86-AVX-NEXT: fistpll (%esp) +; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; X86-AVX-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 +; X86-AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: .cfi_def_cfa %esp, 4 +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: lrint_v2f64: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: cvtsd2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm1 +; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; X64-SSE-NEXT: cvtsd2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm0 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X64-SSE-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: lrint_v2f64: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX-NEXT: vmovq %rax, %xmm1 +; X64-AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] +; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX-NEXT: vmovq %rax, %xmm0 +; X64-AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; X64-AVX-NEXT: retq + %a = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>) + +define <4 x i64> @lrint_v4f64(<4 x double> %x) { +; X86-SSE2-LABEL: lrint_v4f64: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE2-NEXT: .cfi_offset %ebp, -8 +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $32, %esp +; X86-SSE2-NEXT: movhps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movhps %xmm1, (%esp) +; X86-SSE2-NEXT: movlps %xmm1, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fldl (%esp) +; X86-SSE2-NEXT: fistpll (%esp) +; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X86-SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X86-SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 +; X86-SSE2-NEXT: retl +; +; X64-SSE-LABEL: lrint_v4f64: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: cvtsd2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm2 +; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; X64-SSE-NEXT: cvtsd2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm0 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; X64-SSE-NEXT: cvtsd2si %xmm1, %rax +; X64-SSE-NEXT: movq %rax, %xmm3 +; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] +; X64-SSE-NEXT: cvtsd2si %xmm1, %rax +; X64-SSE-NEXT: movq %rax, %xmm0 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] +; X64-SSE-NEXT: movdqa %xmm2, %xmm0 +; X64-SSE-NEXT: movdqa %xmm3, %xmm1 +; X64-SSE-NEXT: retq + %a = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>) + +define <8 x i64> @lrint_v8f64(<8 x double> %x) { +; X86-SSE2-LABEL: lrint_v8f64: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE2-NEXT: .cfi_offset %ebp, -8 +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp +; X86-SSE2-NEXT: andl $-16, %esp +; X86-SSE2-NEXT: subl $80, %esp +; X86-SSE2-NEXT: movaps 8(%ebp), %xmm3 +; X86-SSE2-NEXT: movhps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movhps %xmm1, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movlps %xmm1, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movhps %xmm2, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movlps %xmm2, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movhps %xmm3, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movlps %xmm3, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X86-SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X86-SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X86-SSE2-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero +; X86-SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; X86-SSE2-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; X86-SSE2-NEXT: movsd {{.*#+}} xmm4 = mem[0],zero +; X86-SSE2-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero +; X86-SSE2-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 +; X86-SSE2-NEXT: retl +; +; X64-SSE-LABEL: lrint_v8f64: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: cvtsd2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm4 +; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; X64-SSE-NEXT: cvtsd2si %xmm0, %rax +; X64-SSE-NEXT: movq %rax, %xmm0 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm0[0] +; X64-SSE-NEXT: cvtsd2si %xmm1, %rax +; X64-SSE-NEXT: movq %rax, %xmm5 +; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] +; X64-SSE-NEXT: cvtsd2si %xmm1, %rax +; X64-SSE-NEXT: movq %rax, %xmm0 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm0[0] +; X64-SSE-NEXT: cvtsd2si %xmm2, %rax +; X64-SSE-NEXT: movq %rax, %xmm6 +; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] +; X64-SSE-NEXT: cvtsd2si %xmm2, %rax +; X64-SSE-NEXT: movq %rax, %xmm0 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm0[0] +; X64-SSE-NEXT: cvtsd2si %xmm3, %rax +; X64-SSE-NEXT: movq %rax, %xmm7 +; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] +; X64-SSE-NEXT: cvtsd2si %xmm3, %rax +; X64-SSE-NEXT: movq %rax, %xmm0 +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm0[0] +; X64-SSE-NEXT: movdqa %xmm4, %xmm0 +; X64-SSE-NEXT: movdqa %xmm5, %xmm1 +; X64-SSE-NEXT: movdqa %xmm6, %xmm2 +; X64-SSE-NEXT: movdqa %xmm7, %xmm3 +; X64-SSE-NEXT: retq + %a = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>)