@@ -2325,6 +2325,59 @@ std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2325
2325
return std::make_pair(DstAddr, DstInfo);
2326
2326
}
2327
2327
2328
+ // Returns the type of copying which is required to set up a byval argument to
2329
+ // a tail-called function. This isn't needed for non-tail calls, because they
2330
+ // always need the equivalent of CopyOnce, but tail-calls sometimes need two to
2331
+ // avoid clobbering another argument (CopyViaTemp), and sometimes can be
2332
+ // optimised to zero copies when forwarding an argument from the caller's
2333
+ // caller (NoCopy).
2334
+ ARMTargetLowering::ByValCopyKind ARMTargetLowering::ByValNeedsCopyForTailCall(
2335
+ SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {
2336
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
2337
+ ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2338
+
2339
+ // Globals are always safe to copy from.
2340
+ if (isa<GlobalAddressSDNode>(Src) || isa<ExternalSymbolSDNode>(Src))
2341
+ return CopyOnce;
2342
+
2343
+ // Can only analyse frame index nodes, conservatively assume we need a
2344
+ // temporary.
2345
+ auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);
2346
+ auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);
2347
+ if (!SrcFrameIdxNode || !DstFrameIdxNode)
2348
+ return CopyViaTemp;
2349
+
2350
+ int SrcFI = SrcFrameIdxNode->getIndex();
2351
+ int DstFI = DstFrameIdxNode->getIndex();
2352
+ assert(MFI.isFixedObjectIndex(DstFI) &&
2353
+ "byval passed in non-fixed stack slot");
2354
+
2355
+ int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
2356
+ int64_t DstOffset = MFI.getObjectOffset(DstFI);
2357
+
2358
+ // If the source is in the local frame, then the copy to the argument memory
2359
+ // is always valid.
2360
+ bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
2361
+ if (!FixedSrc ||
2362
+ (FixedSrc && SrcOffset < -(int64_t)AFI->getArgRegsSaveSize()))
2363
+ return CopyOnce;
2364
+
2365
+ // In the case of byval arguments split between registers and the stack,
2366
+ // computeAddrForCallArg returns a FrameIndex which corresponds only to the
2367
+ // stack portion, but the Src SDValue will refer to the full value, including
2368
+ // the local stack memory that the register portion gets stored into. We only
2369
+ // need to compare them for equality, so normalise on the full value version.
2370
+ uint64_t RegSize = Flags.getByValSize() - MFI.getObjectSize(DstFI);
2371
+ DstOffset -= RegSize;
2372
+
2373
+ // If the value is already in the correct location, then no copying is
2374
+ // needed. If not, then we need to copy via a temporary.
2375
+ if (SrcOffset == DstOffset)
2376
+ return NoCopy;
2377
+ else
2378
+ return CopyViaTemp;
2379
+ }
2380
+
2328
2381
void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2329
2382
SDValue Chain, SDValue &Arg,
2330
2383
RegsToPassVector &RegsToPass,
@@ -2499,37 +2552,58 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2499
2552
// overwritten by the stores of the outgoing arguments. To avoid this, we
2500
2553
// need to make a temporary copy of them in local stack space, then copy back
2501
2554
// to the argument area.
2502
- // TODO This could be optimised to skip byvals which are already being copied
2503
- // from local stack space, or which are copied from the incoming stack at the
2504
- // exact same location.
2505
2555
DenseMap<unsigned, SDValue> ByValTemporaries;
2506
2556
SDValue ByValTempChain;
2507
2557
if (isTailCall) {
2508
- for (unsigned ArgIdx = 0, e = OutVals.size(); ArgIdx != e; ++ArgIdx) {
2509
- SDValue Arg = OutVals[ArgIdx];
2558
+ SmallVector<SDValue, 8> ByValCopyChains;
2559
+ for (const CCValAssign &VA : ArgLocs) {
2560
+ unsigned ArgIdx = VA.getValNo();
2561
+ SDValue Src = OutVals[ArgIdx];
2510
2562
ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;
2511
2563
2512
- if (Flags.isByVal()) {
2513
- int FrameIdx = MFI.CreateStackObject(
2564
+ if (!Flags.isByVal())
2565
+ continue;
2566
+
2567
+ SDValue Dst;
2568
+ MachinePointerInfo DstInfo;
2569
+ std::tie(Dst, DstInfo) =
2570
+ computeAddrForCallArg(dl, DAG, VA, SDValue(), true, SPDiff);
2571
+ ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
2572
+
2573
+ if (Copy == NoCopy) {
2574
+ // If the argument is already at the correct offset on the stack
2575
+ // (because we are forwarding a byval argument from our caller), we
2576
+ // don't need any copying.
2577
+ continue;
2578
+ } else if (Copy == CopyOnce) {
2579
+ // If the argument is in our local stack frame, no other argument
2580
+ // preparation can clobber it, so we can copy it to the final location
2581
+ // later.
2582
+ ByValTemporaries[ArgIdx] = Src;
2583
+ } else {
2584
+ assert(Copy == CopyViaTemp && "unexpected enum value");
2585
+ // If we might be copying this argument from the outgoing argument
2586
+ // stack area, we need to copy via a temporary in the local stack
2587
+ // frame.
2588
+ int TempFrameIdx = MFI.CreateStackObject(
2514
2589
Flags.getByValSize(), Flags.getNonZeroByValAlign(), false);
2515
- SDValue Dst =
2516
- DAG.getFrameIndex(FrameIdx , getPointerTy(DAG.getDataLayout()));
2590
+ SDValue Temp =
2591
+ DAG.getFrameIndex(TempFrameIdx , getPointerTy(DAG.getDataLayout()));
2517
2592
2518
2593
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2519
2594
SDValue AlignNode =
2520
2595
DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2521
2596
2522
2597
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2523
- SDValue Ops[] = { Chain, Dst, Arg , SizeNode, AlignNode};
2524
- MemOpChains .push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2525
- Ops));
2526
- ByValTemporaries[ArgIdx] = Dst ;
2598
+ SDValue Ops[] = {Chain, Temp, Src , SizeNode, AlignNode};
2599
+ ByValCopyChains .push_back(
2600
+ DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops));
2601
+ ByValTemporaries[ArgIdx] = Temp ;
2527
2602
}
2528
2603
}
2529
- if (!MemOpChains.empty()) {
2530
- ByValTempChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2531
- MemOpChains.clear();
2532
- }
2604
+ if (!ByValCopyChains.empty())
2605
+ ByValTempChain =
2606
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
2533
2607
}
2534
2608
2535
2609
// During a tail call, stores to the argument area must happen after all of
@@ -2644,13 +2718,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2644
2718
unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2645
2719
2646
2720
SDValue ByValSrc;
2647
- if (ByValTemporaries.contains(realArgIdx))
2721
+ bool NeedsStackCopy;
2722
+ if (ByValTemporaries.contains(realArgIdx)) {
2648
2723
ByValSrc = ByValTemporaries[realArgIdx];
2649
- else
2724
+ NeedsStackCopy = true;
2725
+ } else {
2650
2726
ByValSrc = Arg;
2727
+ NeedsStackCopy = !isTailCall;
2728
+ }
2651
2729
2730
+ // If part of the argument is in registers, load them.
2652
2731
if (CurByValIdx < ByValArgsCount) {
2653
-
2654
2732
unsigned RegBegin, RegEnd;
2655
2733
CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2656
2734
@@ -2674,7 +2752,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2674
2752
CCInfo.nextInRegsParam();
2675
2753
}
2676
2754
2677
- if (Flags.getByValSize() > 4*offset) {
2755
+ // If the memory part of the argument isn't already in the correct place
2756
+ // (which can happen with tail calls), copy it into the argument area.
2757
+ if (NeedsStackCopy && Flags.getByValSize() > 4 * offset) {
2678
2758
auto PtrVT = getPointerTy(DAG.getDataLayout());
2679
2759
SDValue Dst;
2680
2760
MachinePointerInfo DstInfo;
0 commit comments