diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 0244c170a2123..b6fff948f89d5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -132,6 +132,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break; case ISD::FFREXP: R = SoftenFloatRes_FFREXP(N); break; case ISD::FSINCOS: R = SoftenFloatRes_FSINCOS(N); break; + case ISD::FMODF: R = SoftenFloatRes_FMODF(N); break; case ISD::STRICT_FREM: case ISD::FREM: R = SoftenFloatRes_FREM(N); break; case ISD::STRICT_FRINT: @@ -791,27 +792,35 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFREXP(SDNode *N) { return ReturnVal; } -SDValue -DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(SDNode *N, - RTLIB::Libcall LC) { +SDValue DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults( + SDNode *N, RTLIB::Libcall LC, std::optional CallRetResNo) { assert(!N->isStrictFPOpcode() && "strictfp not implemented"); EVT VT = N->getValueType(0); + assert(VT == N->getValueType(1) && + "expected both return values to have the same type"); + if (!TLI.getLibcallName(LC)) return SDValue(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - SDValue FirstResultSlot = DAG.CreateStackTemporary(NVT); - SDValue SecondResultSlot = DAG.CreateStackTemporary(NVT); SDLoc DL(N); - TargetLowering::MakeLibCallOptions CallOptions; - std::array Ops{GetSoftenedFloat(N->getOperand(0)), FirstResultSlot, - SecondResultSlot}; - std::array OpsVT{VT, FirstResultSlot.getValueType(), - SecondResultSlot.getValueType()}; + SmallVector Ops = {GetSoftenedFloat(N->getOperand(0))}; + SmallVector OpsVT = {VT}; + + std::array StackSlots; + for (auto [ResNum, _] : enumerate(N->values())) { + if (ResNum == CallRetResNo) + continue; + SDValue StackSlot = DAG.CreateStackTemporary(NVT); + Ops.push_back(StackSlot); + OpsVT.push_back(StackSlot.getValueType()); + StackSlots[ResNum] = StackSlot; + } + TargetLowering::MakeLibCallOptions CallOptions; // TODO: setTypeListBeforeSoften can't properly express multiple return types, // but since both returns have the same type it should be okay. CallOptions.setTypeListBeforeSoften({OpsVT}, VT, true); @@ -825,8 +834,14 @@ DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(SDNode *N, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); return DAG.getLoad(NVT, DL, Chain, StackSlot, PtrInfo); }; - SetSoftenedFloat(SDValue(N, 0), CreateStackLoad(FirstResultSlot)); - SetSoftenedFloat(SDValue(N, 1), CreateStackLoad(SecondResultSlot)); + + for (auto [ResNum, SlackSlot] : enumerate(StackSlots)) { + if (CallRetResNo == ResNum) { + SetSoftenedFloat(SDValue(N, ResNum), ReturnVal); + continue; + } + SetSoftenedFloat(SDValue(N, ResNum), CreateStackLoad(SlackSlot)); + } return SDValue(); } @@ -836,6 +851,11 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSINCOS(SDNode *N) { N, RTLIB::getSINCOS(N->getValueType(0))); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FMODF(SDNode *N) { + return SoftenFloatRes_UnaryWithTwoFPResults( + N, RTLIB::getMODF(N->getValueType(0)), /*CallRetResNo=*/0); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), RTLIB::REM_F32, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index b6abad830c371..3a2d69d8a8eca 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -435,8 +435,21 @@ bool DAGTypeLegalizer::run() { #endif PerformExpensiveChecks(); - // If the root changed (e.g. it was a dead load) update the root. - DAG.setRoot(Dummy.getValue()); + // Get the value of the original root after type legalization. + SDValue Root = Dummy.getValue(); + + // Get the current root value, if it's not null combine it with the original + // root to prevent it being removed as a dead node. + if (SDValue LegalRoot = DAG.getRoot()) { + Root = DAG.getNode(ISD::TokenFactor, SDLoc(LegalRoot), MVT::Other, Root, + LegalRoot); + // The token_factor should not need any legalization (as both inputs have + // already been legalized). + Root->setNodeId(Processed); + } + + // Restore the root. + DAG.setRoot(Root); // Remove dead nodes. This is important to do for cleanliness but also before // the checking loop below. Implicit folding by the DAG.getNode operators and diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index cac969f7e2185..5cbec5cf409cf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -562,7 +562,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { // Convert Float Results to Integer. void SoftenFloatResult(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC); - SDValue SoftenFloatRes_UnaryWithTwoFPResults(SDNode *N, RTLIB::Libcall LC); + SDValue SoftenFloatRes_UnaryWithTwoFPResults( + SDNode *N, RTLIB::Libcall LC, std::optional CallRetResNo = {}); SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_ARITH_FENCE(SDNode *N); @@ -608,6 +609,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_ExpOp(SDNode *N); SDValue SoftenFloatRes_FFREXP(SDNode *N); SDValue SoftenFloatRes_FSINCOS(SDNode *N); + SDValue SoftenFloatRes_FMODF(SDNode *N); SDValue SoftenFloatRes_FREEZE(SDNode *N); SDValue SoftenFloatRes_FREM(SDNode *N); SDValue SoftenFloatRes_FRINT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0a3210a10d394..8fd0de8d2fc44 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2644,8 +2644,10 @@ bool SelectionDAG::expandMultipleResultFPLibCall( // optimized out. This prevents an FP stack pop from being emitted for it. // Setting the root like this ensures there will be a use of the // `CopyFromReg` chain, and ensures the FP pop will be emitted. + SDValue OldRoot = getRoot(); SDValue NewRoot = - getNode(ISD::TokenFactor, DL, MVT::Other, getRoot(), CallChain); + OldRoot ? getNode(ISD::TokenFactor, DL, MVT::Other, OldRoot, CallChain) + : CallChain; setRoot(NewRoot); // Ensure the new root is reachable from the results. Results[0] = getMergeValues({Results[0], NewRoot}, DL); diff --git a/llvm/test/CodeGen/ARM/llvm.modf.ll b/llvm/test/CodeGen/ARM/llvm.modf.ll new file mode 100644 index 0000000000000..66f6c9b9383a7 --- /dev/null +++ b/llvm/test/CodeGen/ARM/llvm.modf.ll @@ -0,0 +1,504 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=thumbv7-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -mtriple=armv6m < %s | FileCheck %s --check-prefix=THUMB + +define { half, half } @test_modf_f16(half %a) { +; CHECK-LABEL: test_modf_f16: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: bl modff +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: ldr r0, [sp, #4] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r4, pc} +; +; THUMB-LABEL: test_modf_f16: +; THUMB: @ %bb.0: +; THUMB-NEXT: push {r4, lr} +; THUMB-NEXT: sub sp, #8 +; THUMB-NEXT: uxth r0, r0 +; THUMB-NEXT: bl __gnu_h2f_ieee +; THUMB-NEXT: add r1, sp, #4 +; THUMB-NEXT: bl modff +; THUMB-NEXT: bl __gnu_f2h_ieee +; THUMB-NEXT: mov r4, r0 +; THUMB-NEXT: ldr r0, [sp, #4] +; THUMB-NEXT: bl __gnu_f2h_ieee +; THUMB-NEXT: mov r1, r0 +; THUMB-NEXT: mov r0, r4 +; THUMB-NEXT: add sp, #8 +; THUMB-NEXT: pop {r4, pc} + %result = call { half, half } @llvm.modf.f16(half %a) + ret { half, half } %result +} + +define half @test_modf_f16_only_use_fractional_part(half %a) { +; CHECK-LABEL: test_modf_f16_only_use_fractional_part: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: bl modff +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r7, pc} +; +; THUMB-LABEL: test_modf_f16_only_use_fractional_part: +; THUMB: @ %bb.0: +; THUMB-NEXT: push {r7, lr} +; THUMB-NEXT: sub sp, #8 +; THUMB-NEXT: uxth r0, r0 +; THUMB-NEXT: bl __gnu_h2f_ieee +; THUMB-NEXT: add r1, sp, #4 +; THUMB-NEXT: bl modff +; THUMB-NEXT: bl __gnu_f2h_ieee +; THUMB-NEXT: add sp, #8 +; THUMB-NEXT: pop {r7, pc} + %result = call { half, half } @llvm.modf.f16(half %a) + %result.0 = extractvalue { half, half } %result, 0 + ret half %result.0 +} + +define half @test_modf_f16_only_use_integral_part(half %a) { +; CHECK-LABEL: test_modf_f16_only_use_integral_part: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: bl modff +; CHECK-NEXT: ldr r0, [sp, #4] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r7, pc} +; +; THUMB-LABEL: test_modf_f16_only_use_integral_part: +; THUMB: @ %bb.0: +; THUMB-NEXT: push {r7, lr} +; THUMB-NEXT: sub sp, #8 +; THUMB-NEXT: uxth r0, r0 +; THUMB-NEXT: bl __gnu_h2f_ieee +; THUMB-NEXT: add r1, sp, #4 +; THUMB-NEXT: bl modff +; THUMB-NEXT: ldr r0, [sp, #4] +; THUMB-NEXT: bl __gnu_f2h_ieee +; THUMB-NEXT: add sp, #8 +; THUMB-NEXT: pop {r7, pc} + %result = call { half, half } @llvm.modf.f16(half %a) + %result.1 = extractvalue { half, half } %result, 1 + ret half %result.1 +} + +define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) { +; CHECK-LABEL: test_modf_v2f16: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: vpush {d8} +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: bl modff +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: strh.w r0, [sp, #14] +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: mov r1, sp +; CHECK-NEXT: bl modff +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: strh.w r0, [sp, #12] +; CHECK-NEXT: add r0, sp, #12 +; CHECK-NEXT: vld1.32 {d8[0]}, [r0:32] +; CHECK-NEXT: ldr r0, [sp, #4] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: ldr r1, [sp] +; CHECK-NEXT: strh.w r0, [sp, #10] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: strh.w r0, [sp, #8] +; CHECK-NEXT: add r0, sp, #8 +; CHECK-NEXT: vmovl.u16 q9, d8 +; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] +; CHECK-NEXT: vmovl.u16 q8, d16 +; CHECK-NEXT: vmov.32 r0, d18[0] +; CHECK-NEXT: vmov.32 r1, d18[1] +; CHECK-NEXT: vmov.32 r2, d16[0] +; CHECK-NEXT: vmov.32 r3, d16[1] +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: vpop {d8} +; CHECK-NEXT: pop {r4, pc} +; +; THUMB-LABEL: test_modf_v2f16: +; THUMB: @ %bb.0: +; THUMB-NEXT: push {r4, r5, r6, lr} +; THUMB-NEXT: sub sp, #8 +; THUMB-NEXT: mov r5, r1 +; THUMB-NEXT: uxth r0, r0 +; THUMB-NEXT: bl __gnu_h2f_ieee +; THUMB-NEXT: mov r1, sp +; THUMB-NEXT: bl modff +; THUMB-NEXT: bl __gnu_f2h_ieee +; THUMB-NEXT: mov r4, r0 +; THUMB-NEXT: uxth r0, r5 +; THUMB-NEXT: bl __gnu_h2f_ieee +; THUMB-NEXT: add r1, sp, #4 +; THUMB-NEXT: bl modff +; THUMB-NEXT: bl __gnu_f2h_ieee +; THUMB-NEXT: mov r5, r0 +; THUMB-NEXT: ldr r0, [sp] +; THUMB-NEXT: bl __gnu_f2h_ieee +; THUMB-NEXT: mov r6, r0 +; THUMB-NEXT: ldr r0, [sp, #4] +; THUMB-NEXT: bl __gnu_f2h_ieee +; THUMB-NEXT: mov r3, r0 +; THUMB-NEXT: mov r0, r4 +; THUMB-NEXT: mov r1, r5 +; THUMB-NEXT: mov r2, r6 +; THUMB-NEXT: add sp, #8 +; THUMB-NEXT: pop {r4, r5, r6, pc} + %result = call { <2 x half>, <2 x half> } @llvm.modf.v2f16(<2 x half> %a) + ret { <2 x half>, <2 x half> } %result +} + +define { float, float } @test_modf_f32(float %a) { +; CHECK-LABEL: test_modf_f32: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: bl modff +; CHECK-NEXT: ldr r1, [sp, #4] +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r7, pc} +; +; THUMB-LABEL: test_modf_f32: +; THUMB: @ %bb.0: +; THUMB-NEXT: push {r7, lr} +; THUMB-NEXT: sub sp, #8 +; THUMB-NEXT: add r1, sp, #4 +; THUMB-NEXT: bl modff +; THUMB-NEXT: ldr r1, [sp, #4] +; THUMB-NEXT: add sp, #8 +; THUMB-NEXT: pop {r7, pc} + %result = call { float, float } @llvm.modf.f32(float %a) + ret { float, float } %result +} + +define { <3 x float>, <3 x float> } @test_modf_v3f32(<3 x float> %a) { +; CHECK-LABEL: test_modf_v3f32: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: vldr d9, [sp, #40] +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r1, sp +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: vmov d8, r2, r3 +; CHECK-NEXT: bl modff +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: bl modff +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: vldmia sp, {s0, s1} +; CHECK-NEXT: add.w r1, r4, #16 +; CHECK-NEXT: vst1.32 {d0}, [r1:64]! +; CHECK-NEXT: bl modff +; CHECK-NEXT: vmov s1, r5 +; CHECK-NEXT: vmov s0, r6 +; CHECK-NEXT: vst1.32 {d0}, [r4:64]! +; CHECK-NEXT: str r0, [r4] +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, r5, r6, pc} +; +; THUMB-LABEL: test_modf_v3f32: +; THUMB: @ %bb.0: +; THUMB-NEXT: push {r4, r5, r6, r7, lr} +; THUMB-NEXT: sub sp, #12 +; THUMB-NEXT: mov r7, r3 +; THUMB-NEXT: mov r5, r2 +; THUMB-NEXT: mov r4, r0 +; THUMB-NEXT: ldr r0, [sp, #32] +; THUMB-NEXT: add r1, sp, #8 +; THUMB-NEXT: bl modff +; THUMB-NEXT: mov r6, r0 +; THUMB-NEXT: ldr r0, [sp, #8] +; THUMB-NEXT: str r0, [r4, #24] +; THUMB-NEXT: add r1, sp, #4 +; THUMB-NEXT: mov r0, r7 +; THUMB-NEXT: bl modff +; THUMB-NEXT: mov r7, r0 +; THUMB-NEXT: ldr r0, [sp, #4] +; THUMB-NEXT: str r0, [r4, #20] +; THUMB-NEXT: mov r1, sp +; THUMB-NEXT: mov r0, r5 +; THUMB-NEXT: bl modff +; THUMB-NEXT: ldr r1, [sp] +; THUMB-NEXT: str r1, [r4, #16] +; THUMB-NEXT: stm r4!, {r0, r7} +; THUMB-NEXT: str r6, [r4] +; THUMB-NEXT: add sp, #12 +; THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %result = call { <3 x float>, <3 x float> } @llvm.modf.v3f32(<3 x float> %a) + ret { <3 x float>, <3 x float> } %result +} + +define { <2 x float>, <2 x float> } @test_modf_v2f32(<2 x float> %a) { +; CHECK-LABEL: test_modf_v2f32: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: vpush {d8} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: mov r1, sp +; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: bl modff +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: bl modff +; CHECK-NEXT: vldr s1, [sp] +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: vldr s0, [sp, #4] +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: vpop {d8} +; CHECK-NEXT: pop {r4, pc} +; +; THUMB-LABEL: test_modf_v2f32: +; THUMB: @ %bb.0: +; THUMB-NEXT: push {r4, r5, r7, lr} +; THUMB-NEXT: sub sp, #8 +; THUMB-NEXT: mov r4, r1 +; THUMB-NEXT: mov r1, sp +; THUMB-NEXT: bl modff +; THUMB-NEXT: mov r5, r0 +; THUMB-NEXT: add r1, sp, #4 +; THUMB-NEXT: mov r0, r4 +; THUMB-NEXT: bl modff +; THUMB-NEXT: mov r1, r0 +; THUMB-NEXT: ldr r2, [sp] +; THUMB-NEXT: ldr r3, [sp, #4] +; THUMB-NEXT: mov r0, r5 +; THUMB-NEXT: add sp, #8 +; THUMB-NEXT: pop {r4, r5, r7, pc} + %result = call { <2 x float>, <2 x float> } @llvm.modf.v2f32(<2 x float> %a) + ret { <2 x float>, <2 x float> } %result +} + +define { <2 x double>, <2 x double> } @test_modf_v2f64(<2 x double> %a) { +; CHECK-LABEL: test_modf_v2f64: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: mov r3, r2 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r3 +; CHECK-NEXT: bl modf +; CHECK-NEXT: ldrd r12, r3, [sp, #40] +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: bl modf +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: vldr d17, [sp] +; CHECK-NEXT: vldr d16, [sp, #8] +; CHECK-NEXT: vst1.64 {d8, d9}, [r4]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r4] +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, pc} +; +; THUMB-LABEL: test_modf_v2f64: +; THUMB: @ %bb.0: +; THUMB-NEXT: push {r4, r5, r6, r7, lr} +; THUMB-NEXT: sub sp, #28 +; THUMB-NEXT: str r3, [sp, #4] @ 4-byte Spill +; THUMB-NEXT: mov r7, r2 +; THUMB-NEXT: mov r4, r0 +; THUMB-NEXT: ldr r0, [sp, #48] +; THUMB-NEXT: ldr r1, [sp, #52] +; THUMB-NEXT: add r2, sp, #16 +; THUMB-NEXT: bl modf +; THUMB-NEXT: mov r6, r0 +; THUMB-NEXT: mov r5, r1 +; THUMB-NEXT: ldr r0, [sp, #20] +; THUMB-NEXT: str r0, [r4, #28] +; THUMB-NEXT: ldr r0, [sp, #16] +; THUMB-NEXT: str r0, [r4, #24] +; THUMB-NEXT: add r2, sp, #8 +; THUMB-NEXT: mov r0, r7 +; THUMB-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; THUMB-NEXT: bl modf +; THUMB-NEXT: ldr r2, [sp, #12] +; THUMB-NEXT: str r2, [r4, #20] +; THUMB-NEXT: ldr r2, [sp, #8] +; THUMB-NEXT: str r2, [r4, #16] +; THUMB-NEXT: str r5, [r4, #12] +; THUMB-NEXT: stm r4!, {r0, r1, r6} +; THUMB-NEXT: add sp, #28 +; THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %result = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> %a) + ret { <2 x double>, <2 x double> } %result +} + +define { double, double } @test_modf_f64(double %a) { +; CHECK-LABEL: test_modf_f64: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl modf +; CHECK-NEXT: ldrd r2, r3, [sp], #8 +; CHECK-NEXT: pop {r7, pc} +; +; THUMB-LABEL: test_modf_f64: +; THUMB: @ %bb.0: +; THUMB-NEXT: push {r7, lr} +; THUMB-NEXT: sub sp, #8 +; THUMB-NEXT: mov r2, sp +; THUMB-NEXT: bl modf +; THUMB-NEXT: ldr r2, [sp] +; THUMB-NEXT: ldr r3, [sp, #4] +; THUMB-NEXT: add sp, #8 +; THUMB-NEXT: pop {r7, pc} + %result = call { double, double } @llvm.modf.f64(double %a) + ret { double, double } %result +} + +define double @test_modf_f64_only_use_intergral(double %a) { +; CHECK-LABEL: test_modf_f64_only_use_intergral: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl modf +; CHECK-NEXT: ldrd r0, r1, [sp], #8 +; CHECK-NEXT: pop {r7, pc} +; +; THUMB-LABEL: test_modf_f64_only_use_intergral: +; THUMB: @ %bb.0: +; THUMB-NEXT: push {r7, lr} +; THUMB-NEXT: sub sp, #8 +; THUMB-NEXT: mov r2, sp +; THUMB-NEXT: bl modf +; THUMB-NEXT: ldr r0, [sp] +; THUMB-NEXT: ldr r1, [sp, #4] +; THUMB-NEXT: add sp, #8 +; THUMB-NEXT: pop {r7, pc} + %result = call { double, double } @llvm.modf.f64(double %a) + %result.1 = extractvalue { double, double } %result, 1 + ret double %result.1 +} + +define double @test_modf_f64_only_use_fractional(double %a) { +; CHECK-LABEL: test_modf_f64_only_use_fractional: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl modf +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r7, pc} +; +; THUMB-LABEL: test_modf_f64_only_use_fractional: +; THUMB: @ %bb.0: +; THUMB-NEXT: push {r7, lr} +; THUMB-NEXT: sub sp, #8 +; THUMB-NEXT: mov r2, sp +; THUMB-NEXT: bl modf +; THUMB-NEXT: add sp, #8 +; THUMB-NEXT: pop {r7, pc} + %result = call { double, double } @llvm.modf.f64(double %a) + %result.1 = extractvalue { double, double } %result, 0 + ret double %result.1 +} + +define { double, double } @test_modf_f64_tail_call(double %a) { +; CHECK-LABEL: test_modf_f64_tail_call: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl modf +; CHECK-NEXT: ldrd r2, r3, [sp], #8 +; CHECK-NEXT: pop {r7, pc} +; +; THUMB-LABEL: test_modf_f64_tail_call: +; THUMB: @ %bb.0: +; THUMB-NEXT: push {r7, lr} +; THUMB-NEXT: sub sp, #8 +; THUMB-NEXT: mov r2, sp +; THUMB-NEXT: bl modf +; THUMB-NEXT: ldr r2, [sp] +; THUMB-NEXT: ldr r3, [sp, #4] +; THUMB-NEXT: add sp, #8 +; THUMB-NEXT: pop {r7, pc} + %result = tail call { double, double } @llvm.modf.f64(double %a) + ret { double, double } %result +} + +define double @test_modf_f64_only_use_intergral_tail_call(double %a) { +; CHECK-LABEL: test_modf_f64_only_use_intergral_tail_call: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl modf +; CHECK-NEXT: ldrd r0, r1, [sp], #8 +; CHECK-NEXT: pop {r7, pc} +; +; THUMB-LABEL: test_modf_f64_only_use_intergral_tail_call: +; THUMB: @ %bb.0: +; THUMB-NEXT: push {r7, lr} +; THUMB-NEXT: sub sp, #8 +; THUMB-NEXT: mov r2, sp +; THUMB-NEXT: bl modf +; THUMB-NEXT: ldr r0, [sp] +; THUMB-NEXT: ldr r1, [sp, #4] +; THUMB-NEXT: add sp, #8 +; THUMB-NEXT: pop {r7, pc} + %result = tail call { double, double } @llvm.modf.f64(double %a) + %result.1 = extractvalue { double, double } %result, 1 + ret double %result.1 +} + +define double @test_modf_f64_only_use_fractional_tail_call(double %a) { +; CHECK-LABEL: test_modf_f64_only_use_fractional_tail_call: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl modf +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r7, pc} +; +; THUMB-LABEL: test_modf_f64_only_use_fractional_tail_call: +; THUMB: @ %bb.0: +; THUMB-NEXT: push {r7, lr} +; THUMB-NEXT: sub sp, #8 +; THUMB-NEXT: mov r2, sp +; THUMB-NEXT: bl modf +; THUMB-NEXT: add sp, #8 +; THUMB-NEXT: pop {r7, pc} + %result = tail call { double, double } @llvm.modf.f64(double %a) + %result.1 = extractvalue { double, double } %result, 0 + ret double %result.1 +} diff --git a/llvm/test/CodeGen/PowerPC/llvm.modf.ll b/llvm/test/CodeGen/PowerPC/llvm.modf.ll index 69e3b22c7352c..1b137c786cc91 100644 --- a/llvm/test/CodeGen/PowerPC/llvm.modf.ll +++ b/llvm/test/CodeGen/PowerPC/llvm.modf.ll @@ -328,3 +328,108 @@ define { ppc_fp128, ppc_fp128 } @test_modf_ppcf128(ppc_fp128 %a) { %result = call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a) ret { ppc_fp128, ppc_fp128 } %result } + +define ppc_fp128 @test_modf_ppcf128_only_use_intergral(ppc_fp128 %a) { +; CHECK-LABEL: test_modf_ppcf128_only_use_intergral: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r5, r1, 32 +; CHECK-NEXT: bl modfl +; CHECK-NEXT: nop +; CHECK-NEXT: lfd f1, 32(r1) +; CHECK-NEXT: lfd f2, 40(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a) + %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 1 + ret ppc_fp128 %result.1 +} + +define ppc_fp128 @test_modf_ppcf128_only_use_fractional(ppc_fp128 %a) { +; CHECK-LABEL: test_modf_ppcf128_only_use_fractional: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r5, r1, 32 +; CHECK-NEXT: bl modfl +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a) + %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 0 + ret ppc_fp128 %result.1 +} + +define { ppc_fp128, ppc_fp128 } @test_modf_ppcf128_tail_call(ppc_fp128 %a) { +; CHECK-LABEL: test_modf_ppcf128_tail_call: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r5, r1, 32 +; CHECK-NEXT: bl modfl +; CHECK-NEXT: nop +; CHECK-NEXT: lfd f3, 32(r1) +; CHECK-NEXT: lfd f4, 40(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = tail call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a) + ret { ppc_fp128, ppc_fp128 } %result +} + +define ppc_fp128 @test_modf_ppcf128_only_use_intergral_tail_call(ppc_fp128 %a) { +; CHECK-LABEL: test_modf_ppcf128_only_use_intergral_tail_call: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r5, r1, 32 +; CHECK-NEXT: bl modfl +; CHECK-NEXT: nop +; CHECK-NEXT: lfd f1, 32(r1) +; CHECK-NEXT: lfd f2, 40(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = tail call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a) + %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 1 + ret ppc_fp128 %result.1 +} + +define ppc_fp128 @test_modf_ppcf128_only_use_fractional_tail_call(ppc_fp128 %a) { +; CHECK-LABEL: test_modf_ppcf128_only_use_fractional_tail_call: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r5, r1, 32 +; CHECK-NEXT: bl modfl +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = tail call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a) + %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 0 + ret ppc_fp128 %result.1 +}