-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[ARM] musttail fixes #102896
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ARM] musttail fixes #102896
Conversation
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write If you have received no comments on your PR for a week, you can request a review If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
@llvm/pr-subscribers-lldb @llvm/pr-subscribers-clang Author: Kiran (kiran-isaac) ChangesBackend:
Frontend (clang):
Patch is 39.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/102896.diff 10 Files Affected:
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index e4f221ae55eef..b035c7443ac1c 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5085,7 +5085,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
RawAddress SRetAlloca = RawAddress::invalid();
llvm::Value *UnusedReturnSizePtr = nullptr;
if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
- if (IsVirtualFunctionPointerThunk && RetAI.isIndirect()) {
+ if ((IsVirtualFunctionPointerThunk || IsMustTail) && RetAI.isIndirect()) {
SRetPtr = makeNaturalAddressForPointer(CurFn->arg_begin() +
IRFunctionArgs.getSRetArgNo(),
RetTy, CharUnits::fromQuantity(1));
diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h
index 932a2a94ab1f1..fdb5982cb2042 100644
--- a/llvm/include/llvm/CodeGen/CallingConvLower.h
+++ b/llvm/include/llvm/CodeGen/CallingConvLower.h
@@ -540,6 +540,8 @@ class CCState {
});
}
+ void dump() const;
+
private:
/// MarkAllocated - Mark a register and all of its aliases as allocated.
void MarkAllocated(MCPhysReg Reg);
diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp
index b7152587a9fa0..7ba3ea83115db 100644
--- a/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -290,3 +290,64 @@ bool CCState::resultsCompatible(CallingConv::ID CalleeCC,
return std::equal(RVLocs1.begin(), RVLocs1.end(), RVLocs2.begin(),
RVLocs2.end(), AreCompatible);
}
+
+void CCState::dump() const {
+ dbgs() << "CCState:\n";
+ for (const CCValAssign &Loc : Locs) {
+ if (Loc.isRegLoc()) {
+ dbgs() << " Reg " << TRI.getName(Loc.getLocReg());
+ } else if (Loc.isMemLoc()) {
+ dbgs() << " Mem " << Loc.getLocMemOffset();
+ } else {
+ assert(Loc.isPendingLoc());
+ dbgs() << " Pend " << Loc.getExtraInfo();
+ }
+
+ dbgs() << " ValVT:" << Loc.getValVT();
+ dbgs() << " LocVT:" << Loc.getLocVT();
+
+ if (Loc.needsCustom())
+ dbgs() << " custom";
+
+ switch (Loc.getLocInfo()) {
+ case CCValAssign::Full:
+ dbgs() << " Full";
+ break;
+ case CCValAssign::SExt:
+ dbgs() << " SExt";
+ break;
+ case CCValAssign::ZExt:
+ dbgs() << " ZExt";
+ break;
+ case CCValAssign::AExt:
+ dbgs() << " AExt";
+ break;
+ case CCValAssign::SExtUpper:
+ dbgs() << " SExtUpper";
+ break;
+ case CCValAssign::ZExtUpper:
+ dbgs() << " ZExtUpper";
+ break;
+ case CCValAssign::AExtUpper:
+ dbgs() << " AExtUpper";
+ break;
+ case CCValAssign::BCvt:
+ dbgs() << " BCvt";
+ break;
+ case CCValAssign::Trunc:
+ dbgs() << " Trunc";
+ break;
+ case CCValAssign::VExt:
+ dbgs() << " VExt";
+ break;
+ case CCValAssign::FPExt:
+ dbgs() << " FPExt";
+ break;
+ case CCValAssign::Indirect:
+ dbgs() << " Indirect";
+ break;
+ }
+
+ dbgs() << "\n";
+ }
+}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 1fab30a0b8550..242dfe9cb44ba 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2407,8 +2407,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isTailCall = false;
// For both the non-secure calls and the returns from a CMSE entry function,
- // the function needs to do some extra work afte r the call, or before the
- // return, respectively, thus it cannot end with atail call
+ // the function needs to do some extra work after the call, or before the
+ // return, respectively, thus it cannot end with a tail call
if (isCmseNSCall || AFI->isCmseNSEntryFunction())
isTailCall = false;
@@ -2959,50 +2959,6 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
Size = std::max<int>(Size - Excess, 0);
}
-/// MatchingStackOffset - Return true if the given stack call argument is
-/// already available in the same position (relatively) of the caller's
-/// incoming argument stack.
-static
-bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
- MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
- const TargetInstrInfo *TII) {
- unsigned Bytes = Arg.getValueSizeInBits() / 8;
- int FI = std::numeric_limits<int>::max();
- if (Arg.getOpcode() == ISD::CopyFromReg) {
- Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
- if (!VR.isVirtual())
- return false;
- MachineInstr *Def = MRI->getVRegDef(VR);
- if (!Def)
- return false;
- if (!Flags.isByVal()) {
- if (!TII->isLoadFromStackSlot(*Def, FI))
- return false;
- } else {
- return false;
- }
- } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
- if (Flags.isByVal())
- // ByVal argument is passed in as a pointer but it's now being
- // dereferenced. e.g.
- // define @foo(%struct.X* %A) {
- // tail call @bar(%struct.X* byval %A)
- // }
- return false;
- SDValue Ptr = Ld->getBasePtr();
- FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
- if (!FINode)
- return false;
- FI = FINode->getIndex();
- } else
- return false;
-
- assert(FI != std::numeric_limits<int>::max());
- if (!MFI.isFixedObjectIndex(FI))
- return false;
- return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
-}
-
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function. Note that this function also
@@ -3044,8 +3000,10 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
for (const CCValAssign &AL : ArgLocs)
if (AL.isRegLoc())
AddressRegisters.erase(AL.getLocReg());
- if (AddressRegisters.empty())
+ if (AddressRegisters.empty()) {
+ LLVM_DEBUG(dbgs() << "false (no space for target address)\n");
return false;
+ }
}
// Look for obvious safe cases to perform tail call optimization that do not
@@ -3054,18 +3012,26 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
// Exception-handling functions need a special set of instructions to indicate
// a return to the hardware. Tail-calling another function would probably
// break this.
- if (CallerF.hasFnAttribute("interrupt"))
+ if (CallerF.hasFnAttribute("interrupt")) {
+ LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");
return false;
+ }
- if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
+ if (canGuaranteeTCO(CalleeCC,
+ getTargetMachine().Options.GuaranteedTailCallOpt)) {
+ LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")
+ << " (guaranteed tail-call CC)\n");
return CalleeCC == CallerCC;
+ }
- // Also avoid sibcall optimization if either caller or callee uses struct
- // return semantics.
+ // Also avoid sibcall optimization if only one of caller or callee uses
+ // struct return semantics.
bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
- if (isCalleeStructRet || isCallerStructRet)
+ if (isCalleeStructRet != isCallerStructRet) {
+ LLVM_DEBUG(dbgs() << "false (struct-ret)\n");
return false;
+ }
// Externally-defined functions with weak linkage should not be
// tail-called on ARM when the OS does not support dynamic
@@ -3078,8 +3044,11 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
const GlobalValue *GV = G->getGlobal();
const Triple &TT = getTargetMachine().getTargetTriple();
if (GV->hasExternalWeakLinkage() &&
- (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
+ (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
+ TT.isOSBinFormatMachO())) {
+ LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");
return false;
+ }
}
// Check that the call results are passed in the same way.
@@ -3088,70 +3057,44 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
getEffectiveCallingConv(CalleeCC, isVarArg),
getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
CCAssignFnForReturn(CalleeCC, isVarArg),
- CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
+ CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) {
+ LLVM_DEBUG(dbgs() << "false (incompatible results)\n");
return false;
+ }
// The callee has to preserve all registers the caller needs to preserve.
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
if (CalleeCC != CallerCC) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
- if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
+ if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
+ LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n");
return false;
+ }
}
- // If Caller's vararg or byval argument has been split between registers and
+ // If Caller's vararg argument has been split between registers and
// stack, do not perform tail call, since part of the argument is in caller's
// local frame.
const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
- if (AFI_Caller->getArgRegsSaveSize())
+ if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) {
+ LLVM_DEBUG(dbgs() << "false (vararg arg reg save area)\n");
return false;
+ }
// If the callee takes no arguments then go on to check the results of the
// call.
- if (!Outs.empty()) {
- if (CCInfo.getStackSize()) {
- // Check if the arguments are already laid out in the right way as
- // the caller's fixed stack objects.
- MachineFrameInfo &MFI = MF.getFrameInfo();
- const MachineRegisterInfo *MRI = &MF.getRegInfo();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
- for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
- i != e;
- ++i, ++realArgIdx) {
- CCValAssign &VA = ArgLocs[i];
- EVT RegVT = VA.getLocVT();
- SDValue Arg = OutVals[realArgIdx];
- ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
- if (VA.getLocInfo() == CCValAssign::Indirect)
- return false;
- if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
- // f64 and vector types are split into multiple registers or
- // register/stack-slot combinations. The types will not match
- // the registers; give up on memory f64 refs until we figure
- // out what to do about this.
- if (!VA.isRegLoc())
- return false;
- if (!ArgLocs[++i].isRegLoc())
- return false;
- if (RegVT == MVT::v2f64) {
- if (!ArgLocs[++i].isRegLoc())
- return false;
- if (!ArgLocs[++i].isRegLoc())
- return false;
- }
- } else if (!VA.isRegLoc()) {
- if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
- MFI, MRI, TII))
- return false;
- }
- }
- }
-
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
- return false;
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {
+ LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");
+ return false;
}
+ // If the stack arguments for this call do not fit into our own save area then
+ // the call cannot be made tail.
+ if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "true\n");
return true;
}
diff --git a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll
index d8e22f4f5312a..e186ae3a96150 100644
--- a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll
+++ b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll
@@ -12,17 +12,11 @@ define void @check227(
; arg1 --> SP+188
entry:
-
-;CHECK: sub sp, sp, #12
-;CHECK: push {r11, lr}
-;CHECK: sub sp, sp, #4
-;CHECK: add r0, sp, #12
-;CHECK: stm r0, {r1, r2, r3}
-;CHECK: ldr r0, [sp, #212]
-;CHECK: bl useInt
-;CHECK: add sp, sp, #4
-;CHECK: pop {r11, lr}
-;CHECK: add sp, sp, #12
+; CHECK: sub sp, sp, #12
+; CHECK: stm sp, {r1, r2, r3}
+; CHECK: ldr r0, [sp, #200]
+; CHECK: add sp, sp, #12
+; CHECK: b useInt
%0 = ptrtoint ptr %arg1 to i32
tail call void @useInt(i32 %0)
diff --git a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll
index 0c5d22984b99e..efdecce9ae723 100644
--- a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll
+++ b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll
@@ -7,14 +7,11 @@
define void @foo(ptr byval(%struct4bytes) %p0, ; --> R0
ptr byval(%struct20bytes) %p1 ; --> R1,R2,R3, [SP+0 .. SP+8)
) {
-;CHECK: sub sp, sp, #16
-;CHECK: push {r11, lr}
-;CHECK: add r12, sp, #8
-;CHECK: stm r12, {r0, r1, r2, r3}
-;CHECK: add r0, sp, #12
-;CHECK: bl useInt
-;CHECK: pop {r11, lr}
-;CHECK: add sp, sp, #16
+;CHECK: sub sp, sp, #16
+;CHECK: stm sp, {r0, r1, r2, r3}
+;CHECK: add r0, sp, #4
+;CHECK: add sp, sp, #16
+;CHECK: b useInt
%1 = ptrtoint ptr %p1 to i32
tail call void @useInt(i32 %1)
diff --git a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll
index 4996cc8ecbf02..2ceb7a7b97a1f 100644
--- a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll
+++ b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll
@@ -3,6 +3,28 @@
; CHECK: function1
; CHECK-NOT: vmov
define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp {
+; CHECK-LABEL: function1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r11, lr}
+; CHECK-NEXT: push {r4, r5, r11, lr}
+; CHECK-NEXT: vldr d16, [sp, #40]
+; CHECK-NEXT: vldr d17, [sp, #32]
+; CHECK-NEXT: vmov r12, lr, d16
+; CHECK-NEXT: vldr d16, [sp, #16]
+; CHECK-NEXT: vmov r4, r5, d17
+; CHECK-NEXT: vldr d17, [sp, #24]
+; CHECK-NEXT: str r3, [sp, #36]
+; CHECK-NEXT: str r2, [sp, #32]
+; CHECK-NEXT: str r1, [sp, #44]
+; CHECK-NEXT: str r0, [sp, #40]
+; CHECK-NEXT: vstr d17, [sp, #16]
+; CHECK-NEXT: vstr d16, [sp, #24]
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: mov r1, lr
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: pop {r4, r5, r11, lr}
+; CHECK-NEXT: b function2
entry:
%call = tail call double @function2(double %f, double %e, double %d, double %c, double %b, double %a) nounwind
ret double %call
diff --git a/llvm/test/CodeGen/ARM/fp16-vector-argument.ll b/llvm/test/CodeGen/ARM/fp16-vector-argument.ll
index 6fc56967bc7aa..65aff46658fd1 100644
--- a/llvm/test/CodeGen/ARM/fp16-vector-argument.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vector-argument.ll
@@ -145,26 +145,21 @@ entry:
define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x half>, <8 x half>) {
; SOFT-LABEL: many_args_test:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: push {r11, lr}
-; SOFT-NEXT: sub sp, sp, #32
-; SOFT-NEXT: add r12, sp, #80
+; SOFT-NEXT: add r12, sp, #40
; SOFT-NEXT: vld1.64 {d16, d17}, [r12]
-; SOFT-NEXT: add r12, sp, #48
+; SOFT-NEXT: add r12, sp, #8
; SOFT-NEXT: vabs.f16 q8, q8
; SOFT-NEXT: vld1.64 {d18, d19}, [r12]
-; SOFT-NEXT: add r12, sp, #64
+; SOFT-NEXT: add r12, sp, #24
; SOFT-NEXT: vadd.f16 q8, q8, q9
; SOFT-NEXT: vld1.64 {d18, d19}, [r12]
; SOFT-NEXT: add r12, sp, #16
; SOFT-NEXT: vmul.f16 q8, q9, q8
; SOFT-NEXT: vst1.64 {d16, d17}, [r12]
-; SOFT-NEXT: mov r12, sp
-; SOFT-NEXT: vldr d16, [sp, #40]
-; SOFT-NEXT: vst1.16 {d16}, [r12:64]!
-; SOFT-NEXT: str r3, [r12]
-; SOFT-NEXT: bl use
-; SOFT-NEXT: add sp, sp, #32
-; SOFT-NEXT: pop {r11, pc}
+; SOFT-NEXT: vldr d16, [sp]
+; SOFT-NEXT: vstr d16, [sp]
+; SOFT-NEXT: str r3, [sp, #8]
+; SOFT-NEXT: b use
;
; HARD-LABEL: many_args_test:
; HARD: @ %bb.0: @ %entry
@@ -177,33 +172,25 @@ define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x hal
;
; SOFTEB-LABEL: many_args_test:
; SOFTEB: @ %bb.0: @ %entry
-; SOFTEB-NEXT: .save {r11, lr}
-; SOFTEB-NEXT: push {r11, lr}
-; SOFTEB-NEXT: .pad #32
-; SOFTEB-NEXT: sub sp, sp, #32
-; SOFTEB-NEXT: add r12, sp, #80
-; SOFTEB-NEXT: mov lr, sp
+; SOFTEB-NEXT: add r12, sp, #40
; SOFTEB-NEXT: vld1.64 {d16, d17}, [r12]
-; SOFTEB-NEXT: add r12, sp, #48
+; SOFTEB-NEXT: add r12, sp, #8
; SOFTEB-NEXT: vrev64.16 q8, q8
; SOFTEB-NEXT: vabs.f16 q8, q8
; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12]
-; SOFTEB-NEXT: add r12, sp, #64
+; SOFTEB-NEXT: add r12, sp, #24
; SOFTEB-NEXT: vrev64.16 q9, q9
; SOFTEB-NEXT: vadd.f16 q8, q8, q9
; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12]
; SOFTEB-NEXT: add r12, sp, #16
; SOFTEB-NEXT: vrev64.16 q9, q9
; SOFTEB-NEXT: vmul.f16 q8, q9, q8
-; SOFTEB-NEXT: vldr d18, [sp, #40]
-; SOFTEB-NEXT: vrev64.16 d18, d18
-; SOFTEB-NEXT: vst1.16 {d18}, [lr:64]!
-; SOFTEB-NEXT: str r3, [lr]
+; SOFTEB-NEXT: vldr d18, [sp]
; SOFTEB-NEXT: vrev64.16 q8, q8
; SOFTEB-NEXT: vst1.64 {d16, d17}, [r12]
-; SOFTEB-NEXT: bl use
-; SOFTEB-NEXT: add sp, sp, #32
-; SOFTEB-NEXT: pop {r11, pc}
+; SOFTEB-NEXT: vstr d18, [sp]
+; SOFTEB-NEXT: str r3, [sp, #8]
+; SOFTEB-NEXT: b use
;
; HARDEB-LABEL: many_args_test:
; HARDEB: @ %bb.0: @ %entry
diff --git a/llvm/test/CodeGen/ARM/struct_byval.ll b/llvm/test/CodeGen/ARM/struct_byval.ll
index 73a1b5ee33bca..5564f254c9e74 100644
--- a/llvm/test/CodeGen/ARM/struct_byval.ll
+++ b/llvm/test/CodeGen/ARM/struct_byval.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=armv7-apple-ios6.0 | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-ios6.0 | FileCheck %s
; RUN: llc < %s -mtriple=armv7-unknown-nacl-gnueabi | FileCheck %s -check-prefix=NACL
@@ -10,11 +11,122 @@
%struct.LargeStruct = type { i32, [1001 x i8], [300 x i32] }
define i32 @f() nounwind ssp {
+; NACL-LABEL: f:
+; NACL: @ %bb.0: @ %entry
+; NACL-NEXT: .save {r4, lr}
+; NACL-NEXT: push {r4, lr}
+; NACL-NEXT: .pad #152
+; NACL-NEXT: sub sp, sp, #152
+; NACL-NEXT: movw r0, :lower16:__stack_chk_guard
+; NACL-NEXT: add r3, sp, #72
+; NACL-NEXT: movt r0, :upper16:__stack_chk_guard
+; NACL-NEXT: mov lr, sp
+; NACL-NEXT: ldr r0, [r0]
+; NACL-NEXT: str r0, [sp, #148]
+; NACL-NEXT: add r0, sp, #72
+; NACL-NEXT: add r12, r0, #16
+; NACL-NEXT: ldm r3, {r0, r1, r2, r3}
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-N...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please also add a testcase for something like:
struct Large { int x[60]; };
void f(long long a, long long b, Large c, Large d);
void g(long long a, long long b, Large c, Large d) {
[[clang::musttail]] return f(a,b,d,c);
}
9c8c441
to
03d4cc0
Compare
Accidentally pushed changes to this PR to main. Have reverted. see 1a908c6. I meant to push to my fork. Have now done so. Whoops, having a bit of a git nightmare today |
03d4cc0
to
39371e0
Compare
Force push to remove revert commit, rather than reverting the revert. |
349e5ee
to
39371e0
Compare
Really made a mess of it here. For anyone trying to understand what on earth happened:
Apologies for any confusion. |
Backend: - Caller and callee arguments no longer have to match, just to take up the same space, as they can be changed before the call - Allowed tail calls if callee and callee both (or neither) use sret, wheras before it would be dissalowed if either used sret - Allowed tail calls if byval args are used - Added debug trace for IsEligibleForTailCallOptimisation Frontend (clang): - Do not generate extra alloca if sret is used with musttail, as the space for the sret is allocated already Change-Id: Ic7f246a7eca43c06874922d642d7dc44bdfc98ec
2e4db4c
to
082c980
Compare
never let me use git again. Forgot to revert one of my commits to main, causing merge conflicts. |
fixes #57069, and adds a test reflecting this |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the state of byval handling with the current version of the patch?
byval arguments to a tail call are allowed as long as they are not split between register and stack, same as before. My change to this check was incorrect |
added this testcase (compiled to ir), it works with my patch and does not without |
@@ -0,0 +1,43 @@ | |||
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please generate checks with update_llc_test_checks.py
define dso_local void @large_caller(i64 noundef %0, i64 noundef %1, %struct.Large* noundef byval(%struct.Large) align 4 %2, %struct.Large* noundef byval(%struct.Large) align 4 %3) #0 { | ||
entry: | ||
; CHECK: b large_callee | ||
musttail call void @large_callee(i64 noundef %0, i64 noundef %1, %struct.Large* noundef byval(%struct.Large) align 4 %2, %struct.Large* noundef byval(%struct.Large) align 4 %3) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add tests for both "%0, %1, %2, %3" and "%0, %1, %3, %2".
Superseded by #109943. |
Backend:
Frontend (clang):