diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 6f885f4588c4b..2bbb4997d56a5 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -17,6 +17,11 @@ class CCIfBigEndian : class CCIfILP32 : CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>; +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget + : CCIf().", F), + A>; //===----------------------------------------------------------------------===// // ARM AAPCS64 Calling Convention @@ -496,36 +501,44 @@ def CC_AArch64_GHC : CallingConv<[ let Entry = 1 in def CC_AArch64_Preserve_None : CallingConv<[ - // We can pass arguments in all general registers, except: - // - X8, used for sret - // - X16/X17, used by the linker as IP0/IP1 - // - X18, the platform register - // - X19, the base pointer - // - X29, the frame pointer - // - X30, the link register - // General registers are not preserved with the exception of - // FP, LR, and X18 - // Non-volatile registers are used first, so functions may call - // normal functions without saving and reloading arguments. - // X9 is assigned last as it is used in FrameLowering as the first - // choice for a scratch register. - CCIfType<[i32], CCAssignToReg<[W20, W21, W22, W23, - W24, W25, W26, W27, W28, - W0, W1, W2, W3, W4, W5, - W6, W7, W10, W11, - W12, W13, W14, W9]>>, - CCIfType<[i64], CCAssignToReg<[X20, X21, X22, X23, - X24, X25, X26, X27, X28, - X0, X1, X2, X3, X4, X5, - X6, X7, X10, X11, - X12, X13, X14, X9]>>, - - // Windows uses X15 for stack allocation - CCIf<"!State.getMachineFunction().getSubtarget().isTargetWindows()", - CCIfType<[i32], CCAssignToReg<[W15]>>>, - CCIf<"!State.getMachineFunction().getSubtarget().isTargetWindows()", - CCIfType<[i64], CCAssignToReg<[X15]>>>, - CCDelegateTo + // VarArgs are only supported using the C calling convention. + // This handles the non-variadic parameter case. Variadic parameters + // are handled in CCAssignFnForCall. + CCIfVarArg>>, + CCIfVarArg>>, + CCIfVarArg>, + + // We can pass arguments in all general registers, except: + // - X8, used for sret + // - X16/X17, used by the linker as IP0/IP1 + // - X18, the platform register + // - X19, the base pointer + // - X29, the frame pointer + // - X30, the link register + // General registers are not preserved with the exception of + // FP, LR, and X18 + // Non-volatile registers are used first, so functions may call + // normal functions without saving and reloading arguments. + // X9 is assigned last as it is used in FrameLowering as the first + // choice for a scratch register. + CCIfType<[i32], CCAssignToReg<[W20, W21, W22, W23, + W24, W25, W26, W27, W28, + W0, W1, W2, W3, W4, W5, + W6, W7, W10, W11, + W12, W13, W14, W9]>>, + CCIfType<[i64], CCAssignToReg<[X20, X21, X22, X23, + X24, X25, X26, X27, X28, + X0, X1, X2, X3, X4, X5, + X6, X7, X10, X11, + X12, X13, X14, X9]>>, + + // Windows uses X15 for stack allocation + CCIf<"!State.getMachineFunction().getSubtarget().isTargetWindows()", + CCIfType<[i32], CCAssignToReg<[W15]>>>, + CCIf<"!State.getMachineFunction().getSubtarget().isTargetWindows()", + CCIfType<[i64], CCAssignToReg<[X15]>>>, + + CCDelegateTo ]>; // The order of the callee-saves in this file is important, because the diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index dac03bc3c1d9e..65461bf86e0c5 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1901,8 +1901,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, return; } - bool IsWin64 = - Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); + bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg()); unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; @@ -2308,8 +2307,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // How much of the stack used by incoming arguments this function is expected // to restore in this particular epilogue. int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB); - bool IsWin64 = - Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); + bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(), + MF.getFunction().isVarArg()); unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); int64_t AfterCSRPopSize = ArgumentStackToRestore; @@ -2615,8 +2614,8 @@ static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset) { const auto *AFI = MF.getInfo(); const auto &Subtarget = MF.getSubtarget(); - bool IsWin64 = - Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); + const Function &F = MF.getFunction(); + bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg()); unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false); int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo()); @@ -2722,9 +2721,9 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( // via the frame pointer, so we have to use the FP in the parent // function. (void) Subtarget; - assert( - Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) && - "Funclets should only be present on Win64"); + assert(Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(), + MF.getFunction().isVarArg()) && + "Funclets should only be present on Win64"); UseFP = true; } else { // We have the choice between FP and (SP or BP). diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index dbaf31166229b..339ff5b8ce03b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7109,7 +7109,13 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, case CallingConv::GHC: return CC_AArch64_GHC; case CallingConv::PreserveNone: - return CC_AArch64_Preserve_None; + // The VarArg implementation makes assumptions about register + // argument passing that do not hold for preserve_none, so we + // instead fall back to C argument passing. + // The non-vararg case is handled in the CC function itself. + if (!IsVarArg) + return CC_AArch64_Preserve_None; + [[fallthrough]]; case CallingConv::C: case CallingConv::Fast: case CallingConv::PreserveMost: @@ -7182,7 +7188,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments( MachineFunction &MF = DAG.getMachineFunction(); const Function &F = MF.getFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - bool IsWin64 = Subtarget->isCallingConvWin64(F.getCallingConv()); + bool IsWin64 = + Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg()); bool StackViaX4 = CallConv == CallingConv::ARM64EC_Thunk_X64 || (isVarArg && Subtarget->isWindowsArm64EC()); AArch64FunctionInfo *FuncInfo = MF.getInfo(); @@ -7634,7 +7641,9 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, MachineFrameInfo &MFI = MF.getFrameInfo(); AArch64FunctionInfo *FuncInfo = MF.getInfo(); auto PtrVT = getPointerTy(DAG.getDataLayout()); - bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()); + Function &F = MF.getFunction(); + bool IsWin64 = + Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg()); SmallVector MemOps; @@ -7805,6 +7814,21 @@ static bool mayTailCallThisCC(CallingConv::ID CC) { } } +/// Return true if the call convention supports varargs +/// Currently only those that pass varargs like the C +/// calling convention does are eligible +/// Calling conventions listed in this function must also +/// be properly handled in AArch64Subtarget::isCallingConvWin64 +static bool callConvSupportsVarArgs(CallingConv::ID CC) { + switch (CC) { + case CallingConv::C: + case CallingConv::PreserveNone: + return true; + default: + return false; + } +} + static void analyzeCallOperands(const AArch64TargetLowering &TLI, const AArch64Subtarget *Subtarget, const TargetLowering::CallLoweringInfo &CLI, @@ -7813,7 +7837,7 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI, CallingConv::ID CalleeCC = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; const SmallVector &Outs = CLI.Outs; - bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC); + bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC, IsVarArg); // For Arm64EC thunks, allocate 32 extra bytes at the bottom of the stack // for the shadow store. @@ -7941,8 +7965,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( // I want anyone implementing a new calling convention to think long and hard // about this assert. - assert((!IsVarArg || CalleeCC == CallingConv::C) && - "Unexpected variadic calling convention"); + if (IsVarArg && !callConvSupportsVarArgs(CalleeCC)) + report_fatal_error("Unsupported variadic calling convention"); LLVMContext &C = *DAG.getContext(); // Check that the call results are passed in the same way. @@ -10872,8 +10896,9 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, SDValue AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); + Function &F = MF.getFunction(); - if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv())) + if (Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg())) return LowerWin64_VASTART(Op, DAG); else if (Subtarget->isTargetDarwin()) return LowerDarwin_VASTART(Op, DAG); diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 1e069f4790c53..435cc18cdea62 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -611,7 +611,8 @@ bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF, MCRegister Reg) const { CallingConv::ID CC = MF.getFunction().getCallingConv(); const AArch64Subtarget &STI = MF.getSubtarget(); - bool IsVarArg = STI.isCallingConvWin64(MF.getFunction().getCallingConv()); + bool IsVarArg = STI.isCallingConvWin64(MF.getFunction().getCallingConv(), + MF.getFunction().isVarArg()); auto HasReg = [](ArrayRef RegList, MCRegister Reg) { return llvm::is_contained(RegList, Reg); @@ -623,7 +624,9 @@ bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF, case CallingConv::GHC: return HasReg(CC_AArch64_GHC_ArgRegs, Reg); case CallingConv::PreserveNone: - return HasReg(CC_AArch64_Preserve_None_ArgRegs, Reg); + if (!MF.getFunction().isVarArg()) + return HasReg(CC_AArch64_Preserve_None_ArgRegs, Reg); + [[fallthrough]]; case CallingConv::C: case CallingConv::Fast: case CallingConv::PreserveMost: diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 4b840b24ba134..12c3d25d32ee7 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -322,13 +322,15 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { std::unique_ptr getCustomPBQPConstraints() const override; - bool isCallingConvWin64(CallingConv::ID CC) const { + bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const { switch (CC) { case CallingConv::C: case CallingConv::Fast: case CallingConv::Swift: case CallingConv::SwiftTail: return isTargetWindows(); + case CallingConv::PreserveNone: + return IsVarArg && isTargetWindows(); case CallingConv::Win64: return true; default: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 5206ba46260ed..b4d2a3388c1df 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -117,7 +117,9 @@ struct AArch64OutgoingValueAssigner CCValAssign::LocInfo LocInfo, const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, CCState &State) override { - bool IsCalleeWin = Subtarget.isCallingConvWin64(State.getCallingConv()); + const Function &F = State.getMachineFunction().getFunction(); + bool IsCalleeWin = + Subtarget.isCallingConvWin64(State.getCallingConv(), F.isVarArg()); bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg(); bool Res; @@ -557,8 +559,8 @@ void AArch64CallLowering::saveVarArgRegisters( MachineFrameInfo &MFI = MF.getFrameInfo(); AArch64FunctionInfo *FuncInfo = MF.getInfo(); auto &Subtarget = MF.getSubtarget(); - bool IsWin64CC = - Subtarget.isCallingConvWin64(CCInfo.getCallingConv()); + bool IsWin64CC = Subtarget.isCallingConvWin64(CCInfo.getCallingConv(), + MF.getFunction().isVarArg()); const LLT p0 = LLT::pointer(0, 64); const LLT s64 = LLT::scalar(64); @@ -653,7 +655,9 @@ bool AArch64CallLowering::lowerFormalArguments( F.getCallingConv() == CallingConv::ARM64EC_Thunk_X64) return false; - bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv()) && !Subtarget.isWindowsArm64EC(); + bool IsWin64 = + Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg()) && + !Subtarget.isWindowsArm64EC(); SmallVector SplitArgs; SmallVector> BoolArgs; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 0d3f6d9e353ba..009928a8a7488 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2006,7 +2006,7 @@ bool AArch64InstructionSelector::selectVaStartDarwin( int FrameIdx = FuncInfo->getVarArgsStackIndex(); if (MF.getSubtarget().isCallingConvWin64( - MF.getFunction().getCallingConv())) { + MF.getFunction().getCallingConv(), MF.getFunction().isVarArg())) { FrameIdx = FuncInfo->getVarArgsGPRSize() > 0 ? FuncInfo->getVarArgsGPRIndex() : FuncInfo->getVarArgsStackIndex(); diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll new file mode 100644 index 0000000000000..48898719f40ce --- /dev/null +++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll @@ -0,0 +1,123 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +%va_list = type { ptr, ptr, ptr, i32, i32 } + +define preserve_nonecc i32 @callee(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind noinline ssp { +; CHECK-LABEL: callee: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #192 +; CHECK-NEXT: mov x8, #-24 // =0xffffffffffffffe8 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: add x10, sp, #136 +; CHECK-NEXT: movk x8, #65408, lsl #32 +; CHECK-NEXT: add x9, x9, #128 +; CHECK-NEXT: stp x6, x7, [sp, #144] +; CHECK-NEXT: stp x9, x8, [sp, #176] +; CHECK-NEXT: add x9, x10, #24 +; CHECK-NEXT: add x10, sp, #192 +; CHECK-NEXT: mov w8, #-24 // =0xffffffe8 +; CHECK-NEXT: str x5, [sp, #136] +; CHECK-NEXT: stp q0, q1, [sp] +; CHECK-NEXT: stp q2, q3, [sp, #32] +; CHECK-NEXT: stp q4, q5, [sp, #64] +; CHECK-NEXT: stp q6, q7, [sp, #96] +; CHECK-NEXT: stp x10, x9, [sp, #160] +; CHECK-NEXT: tbz w8, #31, .LBB0_3 +; CHECK-NEXT: // %bb.1: // %maybe_reg +; CHECK-NEXT: add w9, w8, #8 +; CHECK-NEXT: cmp w9, #0 +; CHECK-NEXT: str w9, [sp, #184] +; CHECK-NEXT: b.gt .LBB0_3 +; CHECK-NEXT: // %bb.2: // %in_reg +; CHECK-NEXT: ldr x9, [sp, #168] +; CHECK-NEXT: add x8, x9, w8, sxtw +; CHECK-NEXT: b .LBB0_4 +; CHECK-NEXT: .LBB0_3: // %on_stack +; CHECK-NEXT: ldr x8, [sp, #160] +; CHECK-NEXT: add x9, x8, #8 +; CHECK-NEXT: str x9, [sp, #160] +; CHECK-NEXT: .LBB0_4: // %end +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: add sp, sp, #192 +; CHECK-NEXT: ret +entry: + %args = alloca %va_list, align 8 + call void @llvm.va_start(ptr %args) + %gr_offs_p = getelementptr inbounds %va_list, ptr %args, i32 0, i32 3 + %gr_offs = load i32, ptr %gr_offs_p, align 8 + %0 = icmp sge i32 %gr_offs, 0 + br i1 %0, label %on_stack, label %maybe_reg + +maybe_reg: + %new_reg_offs = add i32 %gr_offs, 8 + store i32 %new_reg_offs, ptr %gr_offs_p, align 8 + %inreg = icmp sle i32 %new_reg_offs, 0 + br i1 %inreg, label %in_reg, label %on_stack + +in_reg: + %reg_top_p = getelementptr inbounds %va_list, ptr %args, i32 0, i32 1 + %reg_top = load ptr, ptr %reg_top_p, align 8 + %reg = getelementptr inbounds i8, ptr %reg_top, i32 %gr_offs + br label %end + +on_stack: + %stack_p = getelementptr inbounds %va_list, ptr %args, i32 0, i32 0 + %stack = load ptr, ptr %stack_p, align 8 + %new_stack = getelementptr inbounds i8, ptr %stack, i64 8 + store ptr %new_stack, ptr %stack_p, align 8 + br label %end + +end: + %p = phi ptr [ %reg, %in_reg ], [ %stack, %on_stack ] + %10 = load i32, ptr %p, align 8 + call void @llvm.va_end.p0(ptr %args) + ret i32 %10 +} + +declare void @llvm.va_start(ptr) nounwind +declare void @llvm.va_end(ptr) nounwind + +define i32 @caller() nounwind ssp { +; CHECK-LABEL: caller: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #176 +; CHECK-NEXT: mov w8, #10 // =0xa +; CHECK-NEXT: mov w9, #9 // =0x9 +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: mov w1, #2 // =0x2 +; CHECK-NEXT: mov w2, #3 // =0x3 +; CHECK-NEXT: mov w3, #4 // =0x4 +; CHECK-NEXT: mov w4, #5 // =0x5 +; CHECK-NEXT: mov w5, #6 // =0x6 +; CHECK-NEXT: mov w6, #7 // =0x7 +; CHECK-NEXT: mov w7, #8 // =0x8 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill +; CHECK-NEXT: str w8, [sp, #8] +; CHECK-NEXT: str w9, [sp] +; CHECK-NEXT: bl callee +; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x27, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #176 +; CHECK-NEXT: ret + %r = tail call preserve_nonecc i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10) + ret i32 %r +} + diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll new file mode 100644 index 0000000000000..e227f14542cc1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-apple-darwin < %s | FileCheck %s + +define preserve_nonecc i32 @callee(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind noinline ssp { +; CHECK-LABEL: callee: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ldr w0, [sp, #16] +; CHECK-NEXT: orr x8, x8, #0x8 +; CHECK-NEXT: str x8, [sp, #8] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %args = alloca ptr, align 8 + call void @llvm.va_start(ptr %args) + %10 = va_arg ptr %args, i32 + call void @llvm.va_end(ptr %args) + ret i32 %10 +} + +declare void @llvm.va_start(ptr) nounwind +declare void @llvm.va_end(ptr) nounwind + +define i32 @caller() nounwind ssp { +; CHECK-LABEL: caller: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #208 +; CHECK-NEXT: mov w8, #10 ; =0xa +; CHECK-NEXT: mov w9, #9 ; =0x9 +; CHECK-NEXT: mov w0, #1 ; =0x1 +; CHECK-NEXT: stp x9, x8, [sp, #24] +; CHECK-NEXT: mov w8, #8 ; =0x8 +; CHECK-NEXT: mov w9, #6 ; =0x6 +; CHECK-NEXT: str x8, [sp, #16] +; CHECK-NEXT: mov w8, #7 ; =0x7 +; CHECK-NEXT: mov w1, #2 ; =0x2 +; CHECK-NEXT: mov w2, #3 ; =0x3 +; CHECK-NEXT: mov w3, #4 ; =0x4 +; CHECK-NEXT: mov w4, #5 ; =0x5 +; CHECK-NEXT: stp d15, d14, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #80] ; 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #96] ; 16-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #112] ; 16-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #128] ; 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #144] ; 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #160] ; 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #176] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #192] ; 16-byte Folded Spill +; CHECK-NEXT: stp x9, x8, [sp] +; CHECK-NEXT: bl _callee +; CHECK-NEXT: ldp x29, x30, [sp, #192] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #176] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #160] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #144] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #128] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x27, [sp, #112] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #96] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #80] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #64] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #48] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #208 +; CHECK-NEXT: ret + %r = tail call preserve_nonecc i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10) + ret i32 %r +} + diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll new file mode 100644 index 0000000000000..83dd240a6540f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-pc-windows < %s | FileCheck %s + +define preserve_nonecc i32 @callee(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind noinline ssp { +; CHECK-LABEL: callee: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: mov x0, x5 +; CHECK-NEXT: add x8, sp, #24 +; CHECK-NEXT: stp x6, x7, [sp, #32] +; CHECK-NEXT: str x5, [sp, #24] +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: str x8, [sp, #8] +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret + %args = alloca ptr, align 8 + call void @llvm.va_start(ptr %args) + %p = load ptr, ptr %args, align 8 + %10 = load i32, ptr %p, align 8 + call void @llvm.va_end(ptr %args) + ret i32 %10 +} + +declare void @llvm.va_start(ptr) nounwind +declare void @llvm.va_end(ptr) nounwind + +define i32 @caller() nounwind ssp { +; CHECK-LABEL: caller: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #176 +; CHECK-NEXT: mov w8, #10 // =0xa +; CHECK-NEXT: mov w9, #9 // =0x9 +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: mov w1, #2 // =0x2 +; CHECK-NEXT: mov w2, #3 // =0x3 +; CHECK-NEXT: mov w3, #4 // =0x4 +; CHECK-NEXT: mov w4, #5 // =0x5 +; CHECK-NEXT: mov w5, #6 // =0x6 +; CHECK-NEXT: mov w6, #7 // =0x7 +; CHECK-NEXT: mov w7, #8 // =0x8 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill +; CHECK-NEXT: str w8, [sp, #8] +; CHECK-NEXT: str w9, [sp] +; CHECK-NEXT: bl callee +; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x27, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #176 +; CHECK-NEXT: ret + %r = tail call preserve_nonecc i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10) + ret i32 %r +} +