Skip to content

[AMDGPU] Update base addr of dyn alloca considering GrowingUp stack #119822

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1204,15 +1204,18 @@ bool AMDGPURegisterBankInfo::applyMappingDynStackAlloc(
auto WaveSize = B.buildConstant(LLT::scalar(32), ST.getWavefrontSizeLog2());
auto ScaledSize = B.buildShl(IntPtrTy, AllocSize, WaveSize);

auto SPCopy = B.buildCopy(PtrTy, SPReg);
auto OldSP = B.buildCopy(PtrTy, SPReg);
if (Alignment > TFI.getStackAlign()) {
auto PtrAdd = B.buildPtrAdd(PtrTy, SPCopy, ScaledSize);
B.buildMaskLowPtrBits(Dst, PtrAdd,
auto StackAlignMask = (Alignment.value() << ST.getWavefrontSizeLog2()) - 1;
auto Tmp1 = B.buildPtrAdd(PtrTy, OldSP,
B.buildConstant(LLT::scalar(32), StackAlignMask));
B.buildMaskLowPtrBits(Dst, Tmp1,
Log2(Alignment) + ST.getWavefrontSizeLog2());
} else {
B.buildPtrAdd(Dst, SPCopy, ScaledSize);
B.buildCopy(Dst, OldSP);
}

auto PtrAdd = B.buildPtrAdd(PtrTy, Dst, ScaledSize);
B.buildCopy(SPReg, PtrAdd);
MI.eraseFromParent();
return true;
}
Expand Down
37 changes: 21 additions & 16 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4016,8 +4016,9 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
InVals, /*IsThisReturn=*/false, SDValue());
}

// This is identical to the default implementation in ExpandDYNAMIC_STACKALLOC,
// except for applying the wave size scale to the increment amount.
// This is similar to the default implementation in ExpandDYNAMIC_STACKALLOC,
// except for stack growth direction(default: downwards, AMDGPU: upwards) and
// applying the wave size scale to the increment amount.
SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op,
SelectionDAG &DAG) const {
const MachineFunction &MF = DAG.getMachineFunction();
Expand All @@ -4037,31 +4038,35 @@ SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op,
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);

SDValue Size = Tmp2.getOperand(1);
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
MaybeAlign Alignment = cast<ConstantSDNode>(Tmp3)->getMaybeAlignValue();
SDValue BaseAddr = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Align Alignment = cast<ConstantSDNode>(Tmp3)->getAlignValue();

const TargetFrameLowering *TFL = Subtarget->getFrameLowering();
assert(TFL->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp &&
"Stack grows upwards for AMDGPU");

Chain = BaseAddr.getValue(1);
Align StackAlign = TFL->getStackAlign();
if (Alignment > StackAlign) {
uint64_t ScaledAlignment = (uint64_t)Alignment.value()
<< Subtarget->getWavefrontSizeLog2();
uint64_t StackAlignMask = ScaledAlignment - 1;
SDValue TmpAddr = DAG.getNode(ISD::ADD, dl, VT, BaseAddr,
DAG.getConstant(StackAlignMask, dl, VT));
BaseAddr = DAG.getNode(ISD::AND, dl, VT, TmpAddr,
DAG.getSignedConstant(-ScaledAlignment, dl, VT));
}

SDValue ScaledSize = DAG.getNode(
ISD::SHL, dl, VT, Size,
DAG.getConstant(Subtarget->getWavefrontSizeLog2(), dl, MVT::i32));

Align StackAlign = TFL->getStackAlign();
Tmp1 = DAG.getNode(ISD::ADD, dl, VT, SP, ScaledSize); // Value
if (Alignment && *Alignment > StackAlign) {
Tmp1 = DAG.getNode(
ISD::AND, dl, VT, Tmp1,
DAG.getSignedConstant(-(uint64_t)Alignment->value()
<< Subtarget->getWavefrontSizeLog2(),
dl, VT));
}
SDValue NewSP = DAG.getNode(ISD::ADD, dl, VT, BaseAddr, ScaledSize); // Value

Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
Chain = DAG.getCopyToReg(Chain, dl, SPReg, NewSP); // Output chain
Tmp2 = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);

return DAG.getMergeValues({Tmp1, Tmp2}, dl);
return DAG.getMergeValues({BaseAddr, Tmp2}, dl);
}

SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
Expand Down
Loading
Loading