diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index e82bdb6906163..73c5df8ac47f5 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1599,6 +1599,9 @@ class SelectionDAG { SDValue getPartialReduceAdd(SDLoc DL, EVT ReducedTy, SDValue Op1, SDValue Op2); + /// Expand the specified \c ISD::FSINCOS node as the Legalize pass would. + bool expandFSINCOS(SDNode *Node, SmallVectorImpl &Results); + /// Expand the specified \c ISD::VAARG node as the Legalize pass would. SDValue expandVAArg(SDNode *Node); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 47a9ae12248cc..b4846f3239191 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2348,75 +2348,6 @@ static bool useSinCos(SDNode *Node) { return false; } -/// Issue libcalls to sincos to compute sin / cos pairs. -void SelectionDAGLegalize::ExpandSinCosLibCall( - SDNode *Node, SmallVectorImpl &Results) { - EVT VT = Node->getValueType(0); - Type *Ty = VT.getTypeForEVT(*DAG.getContext()); - RTLIB::Libcall LC = RTLIB::getFSINCOS(VT); - - // Find users of the node that store the results (and share input chains). The - // destination pointers can be used instead of creating stack allocations. - SDValue StoresInChain{}; - std::array ResultStores = {nullptr}; - for (SDNode *User : Node->uses()) { - if (!ISD::isNormalStore(User)) - continue; - auto *ST = cast(User); - if (!ST->isSimple() || ST->getAddressSpace() != 0 || - ST->getAlign() < DAG.getDataLayout().getABITypeAlign(Ty) || - (StoresInChain && ST->getChain() != StoresInChain) || - Node->isPredecessorOf(ST->getChain().getNode())) - continue; - ResultStores[ST->getValue().getResNo()] = ST; - StoresInChain = ST->getChain(); - } - - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry{}; - - // Pass the argument. - Entry.Node = Node->getOperand(0); - Entry.Ty = Ty; - Args.push_back(Entry); - - // Pass the output pointers for sin and cos. - SmallVector ResultPtrs{}; - for (StoreSDNode *ST : ResultStores) { - SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(VT); - Entry.Node = ResultPtr; - Entry.Ty = PointerType::getUnqual(Ty->getContext()); - Args.push_back(Entry); - ResultPtrs.push_back(ResultPtr); - } - - SDLoc DL(Node); - SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode(); - SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy(DAG.getDataLayout())); - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(DL).setChain(InChain).setLibCallee( - TLI.getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), Callee, - std::move(Args)); - - auto [Call, OutChain] = TLI.LowerCallTo(CLI); - - for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) { - MachinePointerInfo PtrInfo; - if (StoreSDNode *ST = ResultStores[ResNo]) { - // Replace store with the library call. - DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain); - PtrInfo = ST->getPointerInfo(); - } else { - PtrInfo = MachinePointerInfo::getFixedStack( - DAG.getMachineFunction(), - cast(ResultPtr)->getIndex()); - } - SDValue LoadResult = DAG.getLoad(VT, DL, OutChain, ResultPtr, PtrInfo); - Results.push_back(LoadResult); - } -} - SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); @@ -4633,7 +4564,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { break; case ISD::FSINCOS: // Expand into sincos libcall. - ExpandSinCosLibCall(Node, Results); + (void)DAG.expandFSINCOS(Node, Results); break; case ISD::FLOG: case ISD::STRICT_FLOG: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index c80da28b3dc34..e31fcd7a57b7c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1191,6 +1191,11 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { RTLIB::REM_PPCF128, Results)) return; + break; + case ISD::FSINCOS: + if (DAG.expandFSINCOS(Node, Results)) + return; + break; case ISD::VECTOR_COMPRESS: Results.push_back(TLI.expandVECTOR_COMPRESS(Node, DAG)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 8f255cce1fe15..1781abd0e6b03 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -2483,6 +2484,103 @@ SDValue SelectionDAG::getPartialReduceAdd(SDLoc DL, EVT ReducedTy, SDValue Op1, return Subvectors[0]; } +bool SelectionDAG::expandFSINCOS(SDNode *Node, + SmallVectorImpl &Results) { + EVT VT = Node->getValueType(0); + LLVMContext *Ctx = getContext(); + Type *Ty = VT.getTypeForEVT(*Ctx); + RTLIB::Libcall LC = + RTLIB::getFSINCOS(VT.isVector() ? VT.getVectorElementType() : VT); + + const char *LCName = TLI->getLibcallName(LC); + if (!LC || !LCName) + return false; + + auto getVecDesc = [&]() -> VecDesc const * { + for (bool Masked : {false, true}) { + if (VecDesc const *VD = getLibInfo().getVectorMappingInfo( + LCName, VT.getVectorElementCount(), Masked)) { + return VD; + } + } + return nullptr; + }; + + VecDesc const *VD = nullptr; + if (VT.isVector() && !(VD = getVecDesc())) + return false; + + // Find users of the node that store the results (and share input chains). The + // destination pointers can be used instead of creating stack allocations. + SDValue StoresInChain{}; + std::array ResultStores = {nullptr}; + for (SDNode *User : Node->uses()) { + if (!ISD::isNormalStore(User)) + continue; + auto *ST = cast(User); + if (!ST->isSimple() || ST->getAddressSpace() != 0 || + ST->getAlign() < getDataLayout().getABITypeAlign(Ty->getScalarType()) || + (StoresInChain && ST->getChain() != StoresInChain) || + Node->isPredecessorOf(ST->getChain().getNode())) + continue; + ResultStores[ST->getValue().getResNo()] = ST; + StoresInChain = ST->getChain(); + } + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry{}; + + // Pass the argument. + Entry.Node = Node->getOperand(0); + Entry.Ty = Ty; + Args.push_back(Entry); + + // Pass the output pointers for sin and cos. + SmallVector ResultPtrs{}; + for (StoreSDNode *ST : ResultStores) { + SDValue ResultPtr = ST ? ST->getBasePtr() : CreateStackTemporary(VT); + Entry.Node = ResultPtr; + Entry.Ty = PointerType::getUnqual(Ty->getContext()); + Args.push_back(Entry); + ResultPtrs.push_back(ResultPtr); + } + + SDLoc DL(Node); + + if (VD && VD->isMasked()) { + EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), *Ctx, VT); + Entry.Node = getBoolConstant(true, DL, MaskVT, VT); + Entry.Ty = MaskVT.getTypeForEVT(*Ctx); + Args.push_back(Entry); + } + + SDValue InChain = StoresInChain ? StoresInChain : getEntryNode(); + SDValue Callee = getExternalSymbol(VD ? VD->getVectorFnName().data() : LCName, + TLI->getPointerTy(getDataLayout())); + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(DL).setChain(InChain).setLibCallee( + TLI->getLibcallCallingConv(LC), Type::getVoidTy(*Ctx), Callee, + std::move(Args)); + + auto [Call, OutChain] = TLI->LowerCallTo(CLI); + + for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) { + MachinePointerInfo PtrInfo; + if (StoreSDNode *ST = ResultStores[ResNo]) { + // Replace store with the library call. + ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain); + PtrInfo = ST->getPointerInfo(); + } else { + PtrInfo = MachinePointerInfo::getFixedStack( + getMachineFunction(), cast(ResultPtr)->getIndex()); + } + SDValue LoadResult = getLoad(VT, DL, OutChain, ResultPtr, PtrInfo); + Results.push_back(LoadResult); + } + + return true; +} + SDValue SelectionDAG::expandVAArg(SDNode *Node) { SDLoc dl(Node); const TargetLowering &TLI = getTargetLoweringInfo(); diff --git a/llvm/test/CodeGen/AArch64/veclib-llvm.sincos.ll b/llvm/test/CodeGen/AArch64/veclib-llvm.sincos.ll new file mode 100644 index 0000000000000..e18ac46165d2e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/veclib-llvm.sincos.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter "(bl|ptrue)" --version 5 +; RUN: llc -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=sleefgnuabi < %s | FileCheck %s -check-prefix=SLEEF +; RUN: llc -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=ArmPL < %s | FileCheck %s -check-prefix=ARMPL + +define void @test_sincos_v4f32(<4 x float> %x, ptr noalias %out_sin, ptr noalias %out_cos) { +; SLEEF-LABEL: test_sincos_v4f32: +; SLEEF: bl _ZGVnN4vl4l4_sincosf +; +; ARMPL-LABEL: test_sincos_v4f32: +; ARMPL: bl armpl_vsincosq_f32 + %result = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %x) + %result.0 = extractvalue { <4 x float>, <4 x float> } %result, 0 + %result.1 = extractvalue { <4 x float>, <4 x float> } %result, 1 + store <4 x float> %result.0, ptr %out_sin, align 4 + store <4 x float> %result.1, ptr %out_cos, align 4 + ret void +} + +define void @test_sincos_v2f64(<2 x double> %x, ptr noalias %out_sin, ptr noalias %out_cos) { +; SLEEF-LABEL: test_sincos_v2f64: +; SLEEF: bl _ZGVnN2vl8l8_sincos +; +; ARMPL-LABEL: test_sincos_v2f64: +; ARMPL: bl armpl_vsincosq_f64 + %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %x) + %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0 + %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1 + store <2 x double> %result.0, ptr %out_sin, align 8 + store <2 x double> %result.1, ptr %out_cos, align 8 + ret void +} + +define void @test_sincos_nxv4f32( %x, ptr noalias %out_sin, ptr noalias %out_cos) { +; SLEEF-LABEL: test_sincos_nxv4f32: +; SLEEF: bl _ZGVsNxvl4l4_sincosf +; +; ARMPL-LABEL: test_sincos_nxv4f32: +; ARMPL: ptrue p0.s +; ARMPL: bl armpl_svsincos_f32_x + %result = call { , } @llvm.sincos.nxv4f32( %x) + %result.0 = extractvalue { , } %result, 0 + %result.1 = extractvalue { , } %result, 1 + store %result.0, ptr %out_sin, align 4 + store %result.1, ptr %out_cos, align 4 + ret void +} + +define void @test_sincos_nxv2f64( %x, ptr noalias %out_sin, ptr noalias %out_cos) { +; SLEEF-LABEL: test_sincos_nxv2f64: +; SLEEF: bl _ZGVsNxvl8l8_sincos +; +; ARMPL-LABEL: test_sincos_nxv2f64: +; ARMPL: ptrue p0.d +; ARMPL: bl armpl_svsincos_f64_x + %result = call { , } @llvm.sincos.nxv2f64( %x) + %result.0 = extractvalue { , } %result, 0 + %result.1 = extractvalue { , } %result, 1 + store %result.0, ptr %out_sin, align 8 + store %result.1, ptr %out_cos, align 8 + ret void +}