Skip to content

Commit b1806e6

Browse files
[AArch64] Stack probing for dynamic allocas in SelectionDAG (#66525)
Add support for probing for dynamic allocas (variable-size objects and outgoing stack arguments). Co-authored-by: Oliver Stannard <[email protected]>
1 parent a60a542 commit b1806e6

File tree

6 files changed

+528
-57
lines changed

6 files changed

+528
-57
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,11 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
479479
/// included as part of the stack frame.
480480
bool
481481
AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
482+
// The stack probing code for the dynamically allocated outgoing arguments
483+
// area assumes that the stack is probed at the top - either by the prologue
484+
// code, which issues a probe if `hasVarSizedObjects` return true, or by the
485+
// most recent variable-sized object allocation. Changing the condition here
486+
// may need to be followed up by changes to the probe issuing logic.
482487
return !MF.getFrameInfo().hasVarSizedObjects();
483488
}
484489

@@ -487,6 +492,9 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
487492
MachineBasicBlock::iterator I) const {
488493
const AArch64InstrInfo *TII =
489494
static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
495+
const AArch64TargetLowering *TLI =
496+
MF.getSubtarget<AArch64Subtarget>().getTargetLowering();
497+
MachineFrameInfo &MFI = MF.getFrameInfo();
490498
DebugLoc DL = I->getDebugLoc();
491499
unsigned Opc = I->getOpcode();
492500
bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
@@ -513,8 +521,24 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
513521
// Most call frames will be allocated at the start of a function so
514522
// this is OK, but it is a limitation that needs dealing with.
515523
assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
516-
emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
517-
StackOffset::getFixed(Amount), TII);
524+
525+
if (TLI->hasInlineStackProbe(MF) &&
526+
-Amount >= AArch64::StackProbeMaxUnprobedStack) {
527+
// When stack probing is enabled, the decrement of SP may need to be
528+
// probed. We only need to do this if the call site needs 1024 bytes of
529+
// space or more, because a region smaller than that is allowed to be
530+
// unprobed at an ABI boundary. We rely on the fact that SP has been
531+
// probed exactly at this point, either by the prologue or most recent
532+
// dynamic allocation.
533+
assert(MFI.hasVarSizedObjects() &&
534+
"non-reserved call frame without var sized objects?");
535+
Register ScratchReg =
536+
MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
537+
inlineStackProbeFixed(I, ScratchReg, -Amount, StackOffset::get(0, 0));
538+
} else {
539+
emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
540+
StackOffset::getFixed(Amount), TII);
541+
}
518542
}
519543
} else if (CalleePopAmount != 0) {
520544
// If the calling convention demands that the callee pops arguments from the

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 102 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -569,10 +569,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
569569
setOperationAction(ISD::FSHL, MVT::i32, Custom);
570570
setOperationAction(ISD::FSHL, MVT::i64, Custom);
571571

572-
if (Subtarget->isTargetWindows())
573-
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
574-
else
575-
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
572+
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
576573

577574
// Constant pool entries
578575
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
@@ -2353,6 +2350,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
23532350
MAKE_CASE(AArch64ISD::CSINC)
23542351
MAKE_CASE(AArch64ISD::THREAD_POINTER)
23552352
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
2353+
MAKE_CASE(AArch64ISD::PROBED_ALLOCA)
23562354
MAKE_CASE(AArch64ISD::ABDS_PRED)
23572355
MAKE_CASE(AArch64ISD::ABDU_PRED)
23582356
MAKE_CASE(AArch64ISD::HADDS_PRED)
@@ -2719,6 +2717,22 @@ MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
27192717
return BB;
27202718
}
27212719

2720+
MachineBasicBlock *
2721+
AArch64TargetLowering::EmitDynamicProbedAlloc(MachineInstr &MI,
2722+
MachineBasicBlock *MBB) const {
2723+
MachineFunction &MF = *MBB->getParent();
2724+
MachineBasicBlock::iterator MBBI = MI.getIterator();
2725+
DebugLoc DL = MBB->findDebugLoc(MBBI);
2726+
const AArch64InstrInfo &TII =
2727+
*MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
2728+
Register TargetReg = MI.getOperand(0).getReg();
2729+
MachineBasicBlock::iterator NextInst =
2730+
TII.probedStackAlloc(MBBI, TargetReg, false);
2731+
2732+
MI.eraseFromParent();
2733+
return NextInst->getParent();
2734+
}
2735+
27222736
MachineBasicBlock *
27232737
AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg,
27242738
MachineInstr &MI,
@@ -2863,6 +2877,10 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
28632877

28642878
case AArch64::CATCHRET:
28652879
return EmitLoweredCatchRet(MI, BB);
2880+
2881+
case AArch64::PROBED_STACKALLOC_DYN:
2882+
return EmitDynamicProbedAlloc(MI, BB);
2883+
28662884
case AArch64::LD1_MXIPXX_H_PSEUDO_B:
28672885
return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0, MI, BB);
28682886
case AArch64::LD1_MXIPXX_H_PSEUDO_H:
@@ -14052,9 +14070,34 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
1405214070
AN->getMemOperand());
1405314071
}
1405414072

14055-
SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
14056-
SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const {
14073+
SDValue
14074+
AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(SDValue Op,
14075+
SelectionDAG &DAG) const {
14076+
1405714077
SDLoc dl(Op);
14078+
// Get the inputs.
14079+
SDNode *Node = Op.getNode();
14080+
SDValue Chain = Op.getOperand(0);
14081+
SDValue Size = Op.getOperand(1);
14082+
MaybeAlign Align =
14083+
cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
14084+
EVT VT = Node->getValueType(0);
14085+
14086+
if (DAG.getMachineFunction().getFunction().hasFnAttribute(
14087+
"no-stack-arg-probe")) {
14088+
SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
14089+
Chain = SP.getValue(1);
14090+
SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
14091+
if (Align)
14092+
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
14093+
DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
14094+
Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
14095+
SDValue Ops[2] = {SP, Chain};
14096+
return DAG.getMergeValues(Ops, dl);
14097+
}
14098+
14099+
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
14100+
1405814101
EVT PtrVT = getPointerTy(DAG.getDataLayout());
1405914102
SDValue Callee = DAG.getTargetExternalSymbol(Subtarget->getChkStkName(),
1406014103
PtrVT, 0);
@@ -14078,7 +14121,59 @@ SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
1407814121

1407914122
Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
1408014123
DAG.getConstant(4, dl, MVT::i64));
14081-
return Chain;
14124+
14125+
SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
14126+
Chain = SP.getValue(1);
14127+
SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
14128+
if (Align)
14129+
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
14130+
DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
14131+
Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
14132+
14133+
Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
14134+
14135+
SDValue Ops[2] = {SP, Chain};
14136+
return DAG.getMergeValues(Ops, dl);
14137+
}
14138+
14139+
SDValue
14140+
AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(SDValue Op,
14141+
SelectionDAG &DAG) const {
14142+
// Get the inputs.
14143+
SDNode *Node = Op.getNode();
14144+
SDValue Chain = Op.getOperand(0);
14145+
SDValue Size = Op.getOperand(1);
14146+
14147+
MaybeAlign Align =
14148+
cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
14149+
SDLoc dl(Op);
14150+
EVT VT = Node->getValueType(0);
14151+
14152+
// Construct the new SP value in a GPR.
14153+
SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
14154+
Chain = SP.getValue(1);
14155+
SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
14156+
if (Align)
14157+
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
14158+
DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
14159+
14160+
// Set the real SP to the new value with a probing loop.
14161+
Chain = DAG.getNode(AArch64ISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
14162+
SDValue Ops[2] = {SP, Chain};
14163+
return DAG.getMergeValues(Ops, dl);
14164+
}
14165+
14166+
SDValue
14167+
AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
14168+
SelectionDAG &DAG) const {
14169+
MachineFunction &MF = DAG.getMachineFunction();
14170+
14171+
if (Subtarget->isTargetWindows())
14172+
return LowerWindowsDYNAMIC_STACKALLOC(Op, DAG);
14173+
else if (hasInlineStackProbe(MF))
14174+
return LowerInlineDYNAMIC_STACKALLOC(Op, DAG);
14175+
else
14176+
return SDValue();
1408214177
}
1408314178

1408414179
// When x and y are extended, lower:
@@ -14132,51 +14227,6 @@ SDValue AArch64TargetLowering::LowerAVG(SDValue Op, SelectionDAG &DAG,
1413214227
return DAG.getNode(ISD::ADD, dl, VT, Add, tmp);
1413314228
}
1413414229

14135-
SDValue
14136-
AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
14137-
SelectionDAG &DAG) const {
14138-
assert(Subtarget->isTargetWindows() &&
14139-
"Only Windows alloca probing supported");
14140-
SDLoc dl(Op);
14141-
// Get the inputs.
14142-
SDNode *Node = Op.getNode();
14143-
SDValue Chain = Op.getOperand(0);
14144-
SDValue Size = Op.getOperand(1);
14145-
MaybeAlign Align =
14146-
cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
14147-
EVT VT = Node->getValueType(0);
14148-
14149-
if (DAG.getMachineFunction().getFunction().hasFnAttribute(
14150-
"no-stack-arg-probe")) {
14151-
SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
14152-
Chain = SP.getValue(1);
14153-
SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
14154-
if (Align)
14155-
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
14156-
DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
14157-
Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
14158-
SDValue Ops[2] = {SP, Chain};
14159-
return DAG.getMergeValues(Ops, dl);
14160-
}
14161-
14162-
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
14163-
14164-
Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG);
14165-
14166-
SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
14167-
Chain = SP.getValue(1);
14168-
SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
14169-
if (Align)
14170-
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
14171-
DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
14172-
Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
14173-
14174-
Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
14175-
14176-
SDValue Ops[2] = {SP, Chain};
14177-
return DAG.getMergeValues(Ops, dl);
14178-
}
14179-
1418014230
SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
1418114231
SelectionDAG &DAG) const {
1418214232
EVT VT = Op.getValueType();

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ enum NodeType : unsigned {
8383
ADC,
8484
SBC, // adc, sbc instructions
8585

86+
// To avoid stack clash, allocation is performed by block and each block is
87+
// probed.
88+
PROBED_ALLOCA,
89+
8690
// Predicated instructions where inactive lanes produce undefined results.
8791
ABDS_PRED,
8892
ABDU_PRED,
@@ -616,6 +620,9 @@ class AArch64TargetLowering : public TargetLowering {
616620
MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
617621
MachineBasicBlock *BB) const;
618622

623+
MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI,
624+
MachineBasicBlock *MBB) const;
625+
619626
MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
620627
MachineInstr &MI,
621628
MachineBasicBlock *BB) const;
@@ -1141,10 +1148,10 @@ class AArch64TargetLowering : public TargetLowering {
11411148
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
11421149
SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
11431150
SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
1151+
SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1152+
SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
11441153
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1145-
SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
1146-
SDValue &Size,
1147-
SelectionDAG &DAG) const;
1154+
11481155
SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
11491156

11501157
SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,12 @@ def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain,
863863
def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
864864

865865
def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
866+
867+
def AArch64probedalloca
868+
: SDNode<"AArch64ISD::PROBED_ALLOCA",
869+
SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
870+
[SDNPHasChain, SDNPMayStore]>;
871+
866872
def AArch64mrs : SDNode<"AArch64ISD::MRS",
867873
SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>,
868874
[SDNPHasChain, SDNPOutGlue]>;
@@ -963,6 +969,14 @@ def PROBED_STACKALLOC_VAR : Pseudo<(outs),
963969
[]>,
964970
Sched<[]>;
965971

972+
// Probed stack allocations of a variable size, used for allocas of unknown size
973+
// when stack-clash protection is enabled.
974+
let usesCustomInserter = 1 in
975+
def PROBED_STACKALLOC_DYN : Pseudo<(outs),
976+
(ins GPR64common:$target),
977+
[(AArch64probedalloca GPR64common:$target)]>,
978+
Sched<[]>;
979+
966980
} // Defs = [SP, NZCV], Uses = [SP] in
967981
} // hasSideEffects = 1, isCodeGenOnly = 1
968982

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
; RUN: llc --stop-after=finalize-isel -o - | FileCheck %s
2+
target triple = "aarch64-linux"
3+
4+
; Check dynamic stack allocation and probing instructions do not have
5+
; the FrameSetup flag.
6+
7+
; CHECK-NOT: frame-setup
8+
define void @no_frame_setup(i64 %size, ptr %out) #0 {
9+
%v = alloca i8, i64 %size, align 1
10+
store ptr %v, ptr %out, align 8
11+
ret void
12+
}
13+
14+
attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" }

0 commit comments

Comments
 (0)