@@ -6783,48 +6783,11 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
6783
6783
6784
6784
// Properties about candidate MBBs that hold for all of them.
6785
6785
unsigned FlagsSetInAll = 0xF ;
6786
-
6787
- // Compute liveness information for each candidate, and set FlagsSetInAll.
6788
6786
std::for_each (RepeatedSequenceLocs.begin (), RepeatedSequenceLocs.end (),
6789
6787
[&FlagsSetInAll](outliner::Candidate &C) {
6790
6788
FlagsSetInAll &= C.Flags ;
6791
6789
});
6792
6790
6793
- // According to the AArch64 Procedure Call Standard, the following are
6794
- // undefined on entry/exit from a function call:
6795
- //
6796
- // * Registers x16, x17, (and thus w16, w17)
6797
- // * Condition codes (and thus the NZCV register)
6798
- //
6799
- // Because if this, we can't outline any sequence of instructions where
6800
- // one
6801
- // of these registers is live into/across it. Thus, we need to delete
6802
- // those
6803
- // candidates.
6804
- auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
6805
- // If the unsafe registers in this block are all dead, then we don't need
6806
- // to compute liveness here.
6807
- if (C.Flags & UnsafeRegsDead)
6808
- return false ;
6809
- return C.isAnyUnavailableAcrossOrOutOfSeq (
6810
- {AArch64::W16, AArch64::W17, AArch64::NZCV}, TRI);
6811
- };
6812
-
6813
- // Are there any candidates where those registers are live?
6814
- if (!(FlagsSetInAll & UnsafeRegsDead)) {
6815
- // Erase every candidate that violates the restrictions above. (It could be
6816
- // true that we have viable candidates, so it's not worth bailing out in
6817
- // the case that, say, 1 out of 20 candidates violate the restructions.)
6818
- llvm::erase_if (RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
6819
-
6820
- // If the sequence doesn't have enough candidates left, then we're done.
6821
- if (RepeatedSequenceLocs.size () < 2 )
6822
- return outliner::OutlinedFunction ();
6823
- }
6824
-
6825
- // At this point, we have only "safe" candidates to outline. Figure out
6826
- // frame + call instruction information.
6827
-
6828
6791
unsigned LastInstrOpcode = RepeatedSequenceLocs[0 ].back ()->getOpcode ();
6829
6792
6830
6793
// Helper lambda which sets call information for every candidate.
@@ -6952,6 +6915,10 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
6952
6915
6953
6916
// Check if we have to save LR.
6954
6917
for (outliner::Candidate &C : RepeatedSequenceLocs) {
6918
+ bool LRAvailable =
6919
+ (C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)
6920
+ ? C.isAvailableAcrossAndOutOfSeq (AArch64::LR, TRI)
6921
+ : true ;
6955
6922
// If we have a noreturn caller, then we're going to be conservative and
6956
6923
// say that we have to save LR. If we don't have a ret at the end of the
6957
6924
// block, then we can't reason about liveness accurately.
@@ -6962,7 +6929,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
6962
6929
C.getMF ()->getFunction ().hasFnAttribute (Attribute::NoReturn);
6963
6930
6964
6931
// Is LR available? If so, we don't need a save.
6965
- if (C. isAvailableAcrossAndOutOfSeq (AArch64::LR, TRI) && !IsNoReturn) {
6932
+ if (LRAvailable && !IsNoReturn) {
6966
6933
NumBytesNoStackCalls += 4 ;
6967
6934
C.setCallInfo (MachineOutlinerNoLRSave, 4 );
6968
6935
CandidatesWithoutStackFixups.push_back (C);
@@ -7134,72 +7101,88 @@ bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
7134
7101
return true ;
7135
7102
}
7136
7103
7137
- bool AArch64InstrInfo::isMBBSafeToOutlineFrom (MachineBasicBlock &MBB,
7138
- unsigned &Flags) const {
7139
- if (!TargetInstrInfo::isMBBSafeToOutlineFrom (MBB, Flags))
7140
- return false ;
7141
- // Check if LR is available through all of the MBB. If it's not, then set
7142
- // a flag.
7104
+ SmallVector<std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
7105
+ AArch64InstrInfo::getOutlinableRanges (MachineBasicBlock &MBB,
7106
+ unsigned &Flags) const {
7143
7107
assert (MBB.getParent ()->getRegInfo ().tracksLiveness () &&
7144
- " Suitable Machine Function for outlining must track liveness" );
7145
- LiveRegUnits LRU (getRegisterInfo ());
7108
+ " Must track liveness!" );
7109
+ SmallVector<
7110
+ std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
7111
+ Ranges;
7146
7112
7147
- std::for_each (MBB.rbegin (), MBB.rend (),
7148
- [&LRU](MachineInstr &MI) { LRU.accumulate (MI); });
7113
+ // The range [RangeBegin, RangeEnd).
7114
+ MachineBasicBlock::instr_iterator RangeEnd = MBB.instr_end ();
7115
+ MachineBasicBlock::instr_iterator RangeBegin = RangeEnd;
7116
+ unsigned RangeLen = 0 ;
7149
7117
7150
- // Check if each of the unsafe registers are available...
7151
- bool W16AvailableInBlock = LRU.available (AArch64::W16);
7152
- bool W17AvailableInBlock = LRU.available (AArch64::W17);
7153
- bool NZCVAvailableInBlock = LRU.available (AArch64::NZCV);
7118
+ // According to the AArch64 Procedure Call Standard, the following are
7119
+ // undefined on entry/exit from a function call:
7120
+ //
7121
+ // * Registers x16, x17, (and thus w16, w17)
7122
+ // * Condition codes (and thus the NZCV register)
7123
+ //
7124
+ // If any of these registers are used inside or live across an outlined
7125
+ // function, then they may be modified later, either by the compiler or
7126
+ // some other tool (like the linker).
7127
+ //
7128
+ // To avoid outlining in these situations, partition each block into ranges
7129
+ // where these registers are dead. We will only outline from those ranges.
7130
+ LiveRegUnits LRU (getRegisterInfo ());
7131
+ auto AreAllUnsafeRegsDead = [&LRU]() {
7132
+ return LRU.available (AArch64::W16) && LRU.available (AArch64::W17) &&
7133
+ LRU.available (AArch64::NZCV);
7134
+ };
7154
7135
7155
- // If all of these are dead (and not live out), we know we don't have to check
7156
- // them later.
7157
- if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
7158
- Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
7136
+ // We need to know if LR is live across an outlining boundary later on in
7137
+ // order to decide how we'll create the outlined call, frame, etc.
7138
+ //
7139
+ // It's pretty expensive to check this for *every candidate* within a block.
7140
+ // That's some potentially n^2 behaviour, since in the worst case, we'd need
7141
+ // to compute liveness from the end of the block for O(n) candidates within
7142
+ // the block.
7143
+ //
7144
+ // So, to improve the average case, let's keep track of liveness from the end
7145
+ // of the block to the beginning of *every outlinable range*. If we know that
7146
+ // LR is available in every range we could outline from, then we know that
7147
+ // we don't need to check liveness for any candidate within that range.
7148
+ bool LRAvailableEverywhere = true ;
7159
7149
7160
- // Now, add the live outs to the set .
7150
+ // Compute liveness bottom-up .
7161
7151
LRU.addLiveOuts (MBB);
7162
-
7163
- // If any of these registers is available in the MBB, but also a live out of
7164
- // the block, then we know outlining is unsafe.
7165
- if (W16AvailableInBlock && !LRU.available (AArch64::W16))
7166
- return false ;
7167
- if (W17AvailableInBlock && !LRU.available (AArch64::W17))
7168
- return false ;
7169
- if (NZCVAvailableInBlock && !LRU.available (AArch64::NZCV))
7170
- return false ;
7171
-
7172
- // Check if there's a call inside this MachineBasicBlock. If there is, then
7173
- // set a flag.
7174
- if (any_of (MBB, [](MachineInstr &MI) { return MI.isCall (); }))
7175
- Flags |= MachineOutlinerMBBFlags::HasCalls;
7176
-
7177
- MachineFunction *MF = MBB.getParent ();
7178
-
7179
- // In the event that we outline, we may have to save LR. If there is an
7180
- // available register in the MBB, then we'll always save LR there. Check if
7181
- // this is true.
7182
- bool CanSaveLR = false ;
7183
- const AArch64RegisterInfo *ARI = static_cast <const AArch64RegisterInfo *>(
7184
- MF->getSubtarget ().getRegisterInfo ());
7185
-
7186
- // Check if there is an available register across the sequence that we can
7187
- // use.
7188
- for (unsigned Reg : AArch64::GPR64RegClass) {
7189
- if (!ARI->isReservedReg (*MF, Reg) && Reg != AArch64::LR &&
7190
- Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available (Reg)) {
7191
- CanSaveLR = true ;
7192
- break ;
7152
+ for (auto &MI : make_range (MBB.instr_rbegin (), MBB.instr_rend ())) {
7153
+ LRU.stepBackward (MI);
7154
+ // If we are in a range where all of the unsafe registers are dead, then
7155
+ // update the beginning of the range. Also try to precalculate some stuff
7156
+ // for getOutliningCandidateInfo.
7157
+ if (AreAllUnsafeRegsDead ()) {
7158
+ if (MI.isCall ())
7159
+ Flags |= MachineOutlinerMBBFlags::HasCalls;
7160
+ LRAvailableEverywhere &= LRU.available (AArch64::LR);
7161
+ RangeBegin = MI.getIterator ();
7162
+ ++RangeLen;
7163
+ continue ;
7193
7164
}
7194
- }
7195
-
7196
- // Check if we have a register we can save LR to, and if LR was used
7197
- // somewhere. If both of those things are true, then we need to evaluate the
7198
- // safety of outlining stack instructions later.
7199
- if (!CanSaveLR && !LRU.available (AArch64::LR))
7165
+ // At least one unsafe register is not dead. We do not want to outline at
7166
+ // this point. If it is long enough to outline from, save the range
7167
+ // [RangeBegin, RangeEnd).
7168
+ if (RangeLen > 1 )
7169
+ Ranges.push_back (std::make_pair (RangeBegin, RangeEnd));
7170
+ // Start a new range where RangeEnd is the first known unsafe point.
7171
+ RangeLen = 0 ;
7172
+ RangeBegin = MI.getIterator ();
7173
+ RangeEnd = MI.getIterator ();
7174
+ }
7175
+ // Above loop misses the last (or only) range.
7176
+ if (AreAllUnsafeRegsDead () && RangeLen > 1 )
7177
+ Ranges.push_back (std::make_pair (RangeBegin, RangeEnd));
7178
+ if (Ranges.empty ())
7179
+ return Ranges;
7180
+ // We found the ranges bottom-up. Mapping expects the top-down. Reverse
7181
+ // the order.
7182
+ std::reverse (Ranges.begin (), Ranges.end ());
7183
+ if (!LRAvailableEverywhere)
7200
7184
Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
7201
-
7202
- return true ;
7185
+ return Ranges;
7203
7186
}
7204
7187
7205
7188
outliner::InstrType
0 commit comments