@@ -300,7 +300,6 @@ static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
300300 break ;
301301 case ARM::t2ADDri: // add.w r11, sp, #xx
302302 case ARM::t2ADDri12: // add.w r11, sp, #xx
303- case ARM::t2SUBri: // sub.w r4, r11, #xx
304303 case ARM::t2MOVTi16: // movt r4, #xx
305304 case ARM::t2MOVi16: // movw r4, #xx
306305 case ARM::tBL: // bl __chkstk
@@ -633,15 +632,23 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
633632// / Unfortunately we cannot determine this value in determineCalleeSaves() yet
634633// / as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
635634// / this to produce a conservative estimate that we check in an assert() later.
636- static int getMaxFPOffset (const ARMSubtarget &STI, const ARMFunctionInfo &AFI) {
635+ static int getMaxFPOffset (const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
636+ const MachineFunction &MF) {
637637 // For Thumb1, push.w isn't available, so the first push will always push
638638 // r7 and lr onto the stack first.
639639 if (AFI.isThumb1OnlyFunction ())
640640 return -AFI.getArgRegsSaveSize () - (2 * 4 );
641641 // This is a conservative estimation: Assume the frame pointer being r7 and
642642 // pc("r15") up to r8 getting spilled before (= 8 registers).
643- int FPCXTSaveSize = (STI.hasV8_1MMainlineOps () && AFI.isCmseNSEntryFunction ()) ? 4 : 0 ;
644- return - FPCXTSaveSize - AFI.getArgRegsSaveSize () - (8 * 4 );
643+ int MaxRegBytes = 8 * 4 ;
644+ if (STI.splitFramePointerPush (MF)) {
645+ // Here, r11 can be stored below all of r4-r15 (3 registers more than
646+ // above), plus d8-d15.
647+ MaxRegBytes = 11 * 4 + 8 * 8 ;
648+ }
649+ int FPCXTSaveSize =
650+ (STI.hasV8_1MMainlineOps () && AFI.isCmseNSEntryFunction ()) ? 4 : 0 ;
651+ return -FPCXTSaveSize - AFI.getArgRegsSaveSize () - MaxRegBytes;
645652}
646653
647654void ARMFrameLowering::emitPrologue (MachineFunction &MF,
@@ -704,42 +711,80 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
704711 }
705712
706713 // Determine spill area sizes.
707- for (const CalleeSavedInfo &I : CSI) {
708- Register Reg = I.getReg ();
709- int FI = I.getFrameIdx ();
710- switch (Reg) {
711- case ARM::R8:
712- case ARM::R9:
713- case ARM::R10:
714- case ARM::R11:
715- case ARM::R12:
716- if (STI.splitFramePushPop (MF)) {
714+ if (STI.splitFramePointerPush (MF)) {
715+ for (const CalleeSavedInfo &I : CSI) {
716+ Register Reg = I.getReg ();
717+ int FI = I.getFrameIdx ();
718+ switch (Reg) {
719+ case ARM::R11:
720+ case ARM::LR:
721+ if (Reg == FramePtr)
722+ FramePtrSpillFI = FI;
717723 GPRCS2Size += 4 ;
718724 break ;
725+ case ARM::R0:
726+ case ARM::R1:
727+ case ARM::R2:
728+ case ARM::R3:
729+ case ARM::R4:
730+ case ARM::R5:
731+ case ARM::R6:
732+ case ARM::R7:
733+ case ARM::R8:
734+ case ARM::R9:
735+ case ARM::R10:
736+ case ARM::R12:
737+ GPRCS1Size += 4 ;
738+ break ;
739+ case ARM::FPCXTNS:
740+ FPCXTSaveSize = 4 ;
741+ break ;
742+ default :
743+ // This is a DPR. Exclude the aligned DPRCS2 spills.
744+ if (Reg == ARM::D8)
745+ D8SpillFI = FI;
746+ if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs ())
747+ DPRCSSize += 8 ;
748+ }
749+ }
750+ } else {
751+ for (const CalleeSavedInfo &I : CSI) {
752+ Register Reg = I.getReg ();
753+ int FI = I.getFrameIdx ();
754+ switch (Reg) {
755+ case ARM::R8:
756+ case ARM::R9:
757+ case ARM::R10:
758+ case ARM::R11:
759+ case ARM::R12:
760+ if (STI.splitFramePushPop (MF)) {
761+ GPRCS2Size += 4 ;
762+ break ;
763+ }
764+ LLVM_FALLTHROUGH;
765+ case ARM::R0:
766+ case ARM::R1:
767+ case ARM::R2:
768+ case ARM::R3:
769+ case ARM::R4:
770+ case ARM::R5:
771+ case ARM::R6:
772+ case ARM::R7:
773+ case ARM::LR:
774+ if (Reg == FramePtr)
775+ FramePtrSpillFI = FI;
776+ GPRCS1Size += 4 ;
777+ break ;
778+ case ARM::FPCXTNS:
779+ FPCXTSaveSize = 4 ;
780+ break ;
781+ default :
782+ // This is a DPR. Exclude the aligned DPRCS2 spills.
783+ if (Reg == ARM::D8)
784+ D8SpillFI = FI;
785+ if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs ())
786+ DPRCSSize += 8 ;
719787 }
720- LLVM_FALLTHROUGH;
721- case ARM::R0:
722- case ARM::R1:
723- case ARM::R2:
724- case ARM::R3:
725- case ARM::R4:
726- case ARM::R5:
727- case ARM::R6:
728- case ARM::R7:
729- case ARM::LR:
730- if (Reg == FramePtr)
731- FramePtrSpillFI = FI;
732- GPRCS1Size += 4 ;
733- break ;
734- case ARM::FPCXTNS:
735- FPCXTSaveSize = 4 ;
736- break ;
737- default :
738- // This is a DPR. Exclude the aligned DPRCS2 spills.
739- if (Reg == ARM::D8)
740- D8SpillFI = FI;
741- if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs ())
742- DPRCSSize += 8 ;
743788 }
744789 }
745790
@@ -774,15 +819,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
774819 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
775820 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
776821 Align DPRAlign = DPRCSSize ? std::min (Align (8 ), Alignment) : Align (4 );
777- unsigned DPRGapSize =
778- (GPRCS1Size + GPRCS2Size + FPCXTSaveSize + ArgRegsSaveSize) %
779- DPRAlign.value ();
822+ unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
823+ if (!STI.splitFramePointerPush (MF)) {
824+ DPRGapSize += GPRCS2Size;
825+ }
826+ DPRGapSize %= DPRAlign.value ();
780827
781- unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
828+ unsigned DPRCSOffset;
829+ if (STI.splitFramePointerPush (MF)) {
830+ DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
831+ GPRCS2Offset = DPRCSOffset - GPRCS2Size;
832+ } else {
833+ DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
834+ }
782835 int FramePtrOffsetInPush = 0 ;
783836 if (HasFP) {
784837 int FPOffset = MFI.getObjectOffset (FramePtrSpillFI);
785- assert (getMaxFPOffset (STI, *AFI) <= FPOffset &&
838+ assert (getMaxFPOffset (STI, *AFI, MF ) <= FPOffset &&
786839 " Max FP estimation is wrong" );
787840 FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
788841 AFI->setFramePtrSpillOffset (MFI.getObjectOffset (FramePtrSpillFI) +
@@ -793,7 +846,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
793846 AFI->setDPRCalleeSavedAreaOffset (DPRCSOffset);
794847
795848 // Move past area 2.
796- if (GPRCS2Size > 0 ) {
849+ if (GPRCS2Size > 0 && !STI. splitFramePointerPush (MF) ) {
797850 GPRCS2Push = LastPush = MBBI++;
798851 DefCFAOffsetCandidates.addInst (LastPush, GPRCS2Size);
799852 }
@@ -833,6 +886,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
833886 } else
834887 NumBytes = DPRCSOffset;
835888
889+ if (GPRCS2Size > 0 && STI.splitFramePointerPush (MF)) {
890+ GPRCS2Push = LastPush = MBBI++;
891+ DefCFAOffsetCandidates.addInst (LastPush, GPRCS2Size);
892+ }
893+
894+ bool NeedsWinCFIStackAlloc = NeedsWinCFI;
895+ if (STI.splitFramePointerPush (MF) && HasFP)
896+ NeedsWinCFIStackAlloc = false ;
897+
836898 if (STI.isTargetWindows () && WindowsRequiresStackProbe (MF, NumBytes)) {
837899 uint32_t NumWords = NumBytes >> 2 ;
838900
@@ -888,7 +950,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
888950 .setMIFlags (MachineInstr::FrameSetup)
889951 .add (predOps (ARMCC::AL))
890952 .add (condCodeOp ());
891- if (NeedsWinCFI ) {
953+ if (NeedsWinCFIStackAlloc ) {
892954 SEH = BuildMI (MF, dl, TII.get (ARM::SEH_StackAlloc))
893955 .addImm (NumBytes)
894956 .addImm (/* Wide=*/ 1 )
@@ -927,13 +989,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
927989 // into spill area 1, including the FP in R11. In either case, it
928990 // is in area one and the adjustment needs to take place just after
929991 // that push.
992+ MachineBasicBlock::iterator AfterPush;
930993 if (HasFP) {
931- MachineBasicBlock::iterator AfterPush = std::next (GPRCS1Push);
994+ AfterPush = std::next (GPRCS1Push);
932995 unsigned PushSize = sizeOfSPAdjustment (*GPRCS1Push);
933- emitRegPlusImmediate (!AFI->isThumbFunction (), MBB, AfterPush,
934- dl, TII, FramePtr, ARM::SP,
935- PushSize + FramePtrOffsetInPush,
936- MachineInstr::FrameSetup);
996+ int FPOffset = PushSize + FramePtrOffsetInPush;
997+ if (STI.splitFramePointerPush (MF)) {
998+ AfterPush = std::next (GPRCS2Push);
999+ emitRegPlusImmediate (!AFI->isThumbFunction (), MBB, AfterPush, dl, TII,
1000+ FramePtr, ARM::SP, 0 , MachineInstr::FrameSetup);
1001+ } else {
1002+ emitRegPlusImmediate (!AFI->isThumbFunction (), MBB, AfterPush, dl, TII,
1003+ FramePtr, ARM::SP, FPOffset,
1004+ MachineInstr::FrameSetup);
1005+ }
9371006 if (!NeedsWinCFI) {
9381007 if (FramePtrOffsetInPush + PushSize != 0 ) {
9391008 unsigned CFIIndex = MF.addFrameInst (MCCFIInstruction::cfiDefCfa (
@@ -956,8 +1025,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
9561025 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
9571026 // instructions below don't need to be replayed to unwind the stack.
9581027 if (NeedsWinCFI && MBBI != MBB.begin ()) {
959- insertSEHRange (MBB, {}, MBBI, TII, MachineInstr::FrameSetup);
960- BuildMI (MBB, MBBI, dl, TII.get (ARM::SEH_PrologEnd))
1028+ MachineBasicBlock::iterator End = MBBI;
1029+ if (HasFP && STI.splitFramePointerPush (MF))
1030+ End = AfterPush;
1031+ insertSEHRange (MBB, {}, End, TII, MachineInstr::FrameSetup);
1032+ BuildMI (MBB, End, dl, TII.get (ARM::SEH_PrologEnd))
9611033 .setMIFlag (MachineInstr::FrameSetup);
9621034 MF.setHasWinCFI (true );
9631035 }
@@ -1483,7 +1555,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
14831555 continue ;
14841556 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
14851557 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore () == 0 &&
1486- STI.hasV5TOps () && MBB.succ_empty () && !hasPAC) {
1558+ STI.hasV5TOps () && MBB.succ_empty () && !hasPAC &&
1559+ !STI.splitFramePointerPush (MF)) {
14871560 Reg = ARM::PC;
14881561 // Fold the return instruction into the LDM.
14891562 DeleteRet = true ;
@@ -1847,12 +1920,21 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
18471920 .addImm (-4 )
18481921 .add (predOps (ARMCC::AL));
18491922 }
1850- emitPushInst (MBB, MI, CSI, PushOpc, PushOneOpc, false , &isARMArea1Register, 0 ,
1851- MachineInstr::FrameSetup);
1852- emitPushInst (MBB, MI, CSI, PushOpc, PushOneOpc, false , &isARMArea2Register, 0 ,
1853- MachineInstr::FrameSetup);
1854- emitPushInst (MBB, MI, CSI, FltOpc, 0 , true , &isARMArea3Register,
1855- NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1923+ if (STI.splitFramePointerPush (MF)) {
1924+ emitPushInst (MBB, MI, CSI, PushOpc, PushOneOpc, false ,
1925+ &isSplitFPArea1Register, 0 , MachineInstr::FrameSetup);
1926+ emitPushInst (MBB, MI, CSI, FltOpc, 0 , true , &isARMArea3Register,
1927+ NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1928+ emitPushInst (MBB, MI, CSI, PushOpc, PushOneOpc, false ,
1929+ &isSplitFPArea2Register, 0 , MachineInstr::FrameSetup);
1930+ } else {
1931+ emitPushInst (MBB, MI, CSI, PushOpc, PushOneOpc, false , &isARMArea1Register,
1932+ 0 , MachineInstr::FrameSetup);
1933+ emitPushInst (MBB, MI, CSI, PushOpc, PushOneOpc, false , &isARMArea2Register,
1934+ 0 , MachineInstr::FrameSetup);
1935+ emitPushInst (MBB, MI, CSI, FltOpc, 0 , true , &isARMArea3Register,
1936+ NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1937+ }
18561938
18571939 // The code above does not insert spill code for the aligned DPRCS2 registers.
18581940 // The stack realignment code will be inserted between the push instructions
@@ -1880,14 +1962,24 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
18801962 emitAlignedDPRCS2Restores (MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
18811963
18821964 unsigned PopOpc = AFI->isThumbFunction () ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1883- unsigned LdrOpc = AFI->isThumbFunction () ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
1965+ unsigned LdrOpc =
1966+ AFI->isThumbFunction () ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
18841967 unsigned FltOpc = ARM::VLDMDIA_UPD;
1885- emitPopInst (MBB, MI, CSI, FltOpc, 0 , isVarArg, true , &isARMArea3Register,
1886- NumAlignedDPRCS2Regs);
1887- emitPopInst (MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false ,
1888- &isARMArea2Register, 0 );
1889- emitPopInst (MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false ,
1890- &isARMArea1Register, 0 );
1968+ if (STI.splitFramePointerPush (MF)) {
1969+ emitPopInst (MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false ,
1970+ &isSplitFPArea2Register, 0 );
1971+ emitPopInst (MBB, MI, CSI, FltOpc, 0 , isVarArg, true , &isARMArea3Register,
1972+ NumAlignedDPRCS2Regs);
1973+ emitPopInst (MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false ,
1974+ &isSplitFPArea1Register, 0 );
1975+ } else {
1976+ emitPopInst (MBB, MI, CSI, FltOpc, 0 , isVarArg, true , &isARMArea3Register,
1977+ NumAlignedDPRCS2Regs);
1978+ emitPopInst (MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false ,
1979+ &isARMArea2Register, 0 );
1980+ emitPopInst (MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false ,
1981+ &isARMArea1Register, 0 );
1982+ }
18911983
18921984 return true ;
18931985}
@@ -2287,7 +2379,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
22872379 //
22882380 // We could do slightly better on Thumb1; in some cases, an sp-relative
22892381 // offset would be legal even though an fp-relative offset is not.
2290- int MaxFPOffset = getMaxFPOffset (STI, *AFI);
2382+ int MaxFPOffset = getMaxFPOffset (STI, *AFI, MF );
22912383 bool HasLargeArgumentList =
22922384 HasFP && (MaxFixedOffset - MaxFPOffset) > (int )EstimatedRSFixedSizeLimit;
22932385
0 commit comments