From d9788281b37748e41e804f5fdbb777f79d555a49 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Tue, 19 Aug 2025 11:46:10 -0700 Subject: [PATCH 1/2] Add dedicated interpreter opcodes for tailcalls and generate them --- src/coreclr/interpreter/compiler.cpp | 15 ++++++++++----- src/coreclr/interpreter/intops.def | 5 +++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index 6a8671899c85a3..f8ae1b22aa7dbf 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -3074,7 +3074,7 @@ void InterpCompiler::EmitCall(CORINFO_RESOLVED_TOKEN* pConstrainedToken, bool re } else if (isCalli) { - AddIns(INTOP_CALLI); + AddIns(tailcall ? INTOP_CALLI_TAIL : INTOP_CALLI); m_pLastNewIns->data[0] = GetDataItemIndex(calliCookie); m_pLastNewIns->SetSVars2(CALL_ARGS_SVAR, callIFunctionPointerVar); } @@ -3092,6 +3092,11 @@ void InterpCompiler::EmitCall(CORINFO_RESOLVED_TOKEN* pConstrainedToken, bool re assert(!isPInvoke && !isMarshaledPInvoke); AddIns(INTOP_CALLDELEGATE); } + else if (tailcall) + { + assert(!isPInvoke && !isMarshaledPInvoke); + AddIns(INTOP_CALL_TAIL); + } else { AddIns((isPInvoke && !isMarshaledPInvoke) ? INTOP_CALL_PINVOKE : INTOP_CALL); @@ -3128,14 +3133,14 @@ void InterpCompiler::EmitCall(CORINFO_RESOLVED_TOKEN* pConstrainedToken, bool re calliCookie = m_compHnd->GetCookieForInterpreterCalliSig(&callInfo.sig); - AddIns(INTOP_CALLI); + AddIns(tailcall ? INTOP_CALLI_TAIL : INTOP_CALLI); m_pLastNewIns->data[0] = GetDataItemIndex(calliCookie); m_pLastNewIns->SetSVars2(CALL_ARGS_SVAR, codePointerLookupResult); break; } case CORINFO_VIRTUALCALL_VTABLE: // Traditional virtual call. In theory we could optimize this to using the vtable - AddIns(INTOP_CALLVIRT); + AddIns(tailcall ? INTOP_CALLVIRT_TAIL : INTOP_CALLVIRT); m_pLastNewIns->data[0] = GetDataItemIndex(callInfo.hMethod); break; @@ -3163,13 +3168,13 @@ void InterpCompiler::EmitCall(CORINFO_RESOLVED_TOKEN* pConstrainedToken, bool re calliCookie = m_compHnd->GetCookieForInterpreterCalliSig(&callInfo.sig); - AddIns(INTOP_CALLI); + AddIns(tailcall ? INTOP_CALLI_TAIL : INTOP_CALLI); m_pLastNewIns->data[0] = GetDataItemIndex(calliCookie); m_pLastNewIns->SetSVars2(CALL_ARGS_SVAR, synthesizedLdvirtftnPtrVar); } else { - AddIns(INTOP_CALLVIRT); + AddIns(tailcall ? INTOP_CALLVIRT_TAIL : INTOP_CALLVIRT); m_pLastNewIns->data[0] = GetDataItemIndex(callInfo.hMethod); } break; diff --git a/src/coreclr/interpreter/intops.def b/src/coreclr/interpreter/intops.def index 95ea7a745e66f6..5850997ec29cc9 100644 --- a/src/coreclr/interpreter/intops.def +++ b/src/coreclr/interpreter/intops.def @@ -365,6 +365,11 @@ OPDEF(INTOP_NEWOBJ, "newobj", 5, 1, 1, InterpOpMethodHandle) OPDEF(INTOP_NEWOBJ_GENERIC, "newobj.generic", 6, 1, 2, InterpOpMethodHandle) OPDEF(INTOP_NEWOBJ_VT, "newobj.vt", 5, 1, 1, InterpOpMethodHandle) +// Tail calls +OPDEF(INTOP_CALL_TAIL, "call.tail", 4, 1, 1, InterpOpMethodHandle) +OPDEF(INTOP_CALLI_TAIL, "calli", 5, 1, 2, InterpOpLdPtr) +OPDEF(INTOP_CALLVIRT_TAIL, "callvirt.tail", 4, 1, 1, InterpOpMethodHandle) + // The following helper call instructions exist in 2 variants, one for normal methods, and one for cases where a shared generic lookup is needed. // In the case where a shared generic lookup is needed an extra argument is passed as an svar, which is a pointer to the generic context. // If there is a generic context argument it is always the first SVar to the instruction. From 977d1bf079d5e666a1681e6475f09279b9efcd7c Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Tue, 19 Aug 2025 13:06:19 -0700 Subject: [PATCH 2/2] Implement tailcalls Don't assert on tail calli, just do a non-tail call for now so the application will at least run (but potentially run out of stack) Cleanup fixme comments --- src/coreclr/interpreter/compiler.cpp | 2 +- src/coreclr/interpreter/interpretershared.h | 5 +- src/coreclr/vm/interpexec.cpp | 51 +++++++++++++++++---- 3 files changed, 46 insertions(+), 12 deletions(-) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index f8ae1b22aa7dbf..5c493d92b64274 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -1241,7 +1241,7 @@ InterpMethod* InterpCompiler::CreateInterpMethod() bool unmanagedCallersOnly = corJitFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_REVERSE_PINVOKE); - InterpMethod *pMethod = new InterpMethod(m_methodHnd, m_totalVarsStackSize, pDataItems, initLocals, unmanagedCallersOnly); + InterpMethod *pMethod = new InterpMethod(m_methodHnd, m_ILLocalsOffset, m_totalVarsStackSize, pDataItems, initLocals, unmanagedCallersOnly); return pMethod; } diff --git a/src/coreclr/interpreter/interpretershared.h b/src/coreclr/interpreter/interpretershared.h index d9338543d8ea24..aaa90045dd9f29 100644 --- a/src/coreclr/interpreter/interpretershared.h +++ b/src/coreclr/interpreter/interpretershared.h @@ -30,19 +30,20 @@ struct InterpMethod InterpMethod *self; #endif CORINFO_METHOD_HANDLE methodHnd; - int32_t allocaSize; + int32_t argsSize, allocaSize; void** pDataItems; // This stub is used for calling the interpreted method from JITted/AOTed code CallStubHeader *pCallStub; bool initLocals; bool unmanagedCallersOnly; - InterpMethod(CORINFO_METHOD_HANDLE methodHnd, int32_t allocaSize, void** pDataItems, bool initLocals, bool unmanagedCallersOnly) + InterpMethod(CORINFO_METHOD_HANDLE methodHnd, int32_t argsSize, int32_t allocaSize, void** pDataItems, bool initLocals, bool unmanagedCallersOnly) { #if DEBUG this->self = this; #endif this->methodHnd = methodHnd; + this->argsSize = argsSize; this->allocaSize = allocaSize; this->pDataItems = pDataItems; this->initLocals = initLocals; diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index 63a013cd311734..5c82f07e4ec820 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -543,6 +543,7 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr } int32_t returnOffset, callArgsOffset, methodSlot; + bool isTailcall = false; MethodDesc* targetMethod; MAIN_LOOP: @@ -1895,8 +1896,10 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr break; } + case INTOP_CALLVIRT_TAIL: case INTOP_CALLVIRT: { + isTailcall = (*ip == INTOP_CALLVIRT_TAIL); returnOffset = ip[1]; callArgsOffset = ip[2]; methodSlot = ip[3]; @@ -1914,8 +1917,10 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr goto CALL_INTERP_METHOD; } + case INTOP_CALLI_TAIL: case INTOP_CALLI: { + isTailcall = (*ip == INTOP_CALLI_TAIL); returnOffset = ip[1]; callArgsOffset = ip[2]; int32_t calliFunctionPointerVar = ip[3]; @@ -1927,6 +1932,7 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr // Save current execution state for when we return from called method pFrame->ip = ip; + // Interpreter-FIXME: isTailcall InvokeCalliStub(LOCAL_VAR(calliFunctionPointerVar, PCODE), pCallStub, stack + callArgsOffset, stack + returnOffset); break; } @@ -1936,6 +1942,7 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr // This opcode handles p/invokes that don't use a managed wrapper for marshaling. These // calls are special in that they need an InlinedCallFrame in order for proper EH to happen + isTailcall = false; returnOffset = ip[1]; callArgsOffset = ip[2]; methodSlot = ip[3]; @@ -1971,6 +1978,7 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr case INTOP_CALLDELEGATE: { + isTailcall = false; returnOffset = ip[1]; callArgsOffset = ip[2]; methodSlot = ip[3]; @@ -1996,8 +2004,10 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr break; } + case INTOP_CALL_TAIL: case INTOP_CALL: { + isTailcall = (*ip == INTOP_CALL_TAIL); returnOffset = ip[1]; callArgsOffset = ip[2]; methodSlot = ip[3]; @@ -2032,24 +2042,44 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr if (targetIp == NULL) { // If we didn't get the interpreter code pointer setup, then this is a method we need to invoke as a compiled method. + // Interpreter-FIXME: Implement tailcall via helpers, see https://github.com/dotnet/runtime/blob/main/docs/design/features/tailcalls-with-helpers.md InvokeCompiledMethod(targetMethod, stack + callArgsOffset, stack + returnOffset, targetMethod->GetMultiCallableAddrOfCode(CORINFO_ACCESS_ANY)); break; } } - // Allocate child frame. + if (isTailcall) { - InterpMethodContextFrame *pChildFrame = pFrame->pNext; - if (!pChildFrame) + // Move args from callArgsOffset to start of stack frame. + InterpMethod* pTargetMethod = targetIp->Method; + assert(pTargetMethod->CheckIntegrity()); + // It is safe to use memcpy because the source and destination are both on the interp stack, not in the GC heap. + // We need to use the target method's argsSize, not our argsSize, because tail calls (unlike CEE_JMP) can have a + // different signature from the caller. + memcpy(pFrame->pStack, stack + callArgsOffset, pTargetMethod->argsSize); + // Reuse current stack frame. We discard the call insn's returnOffset because it's not important and tail calls are + // required to be followed by a ret, so we know nothing is going to read from stack[returnOffset] after the call. + pFrame->ReInit(pFrame->pParent, targetIp, pFrame->pRetVal, pFrame->pStack); + } + else + { + // Save current execution state for when we return from called method + pFrame->ip = ip; + + // Allocate child frame. { - pChildFrame = (InterpMethodContextFrame*)alloca(sizeof(InterpMethodContextFrame)); - pChildFrame->pNext = NULL; - pFrame->pNext = pChildFrame; + InterpMethodContextFrame *pChildFrame = pFrame->pNext; + if (!pChildFrame) + { + pChildFrame = (InterpMethodContextFrame*)alloca(sizeof(InterpMethodContextFrame)); + pChildFrame->pNext = NULL; + pFrame->pNext = pChildFrame; + } + pChildFrame->ReInit(pFrame, targetIp, stack + returnOffset, stack + callArgsOffset); + pFrame = pChildFrame; } - pChildFrame->ReInit(pFrame, targetIp, stack + returnOffset, stack + callArgsOffset); - pFrame = pChildFrame; + assert (((size_t)pFrame->pStack % INTERP_STACK_ALIGNMENT) == 0); } - assert (((size_t)pFrame->pStack % INTERP_STACK_ALIGNMENT) == 0); // Set execution state for the new frame pMethod = pFrame->startIp->Method; @@ -2061,6 +2091,7 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr } case INTOP_NEWOBJ_GENERIC: { + isTailcall = false; returnOffset = ip[1]; callArgsOffset = ip[2]; methodSlot = ip[4]; @@ -2079,6 +2110,7 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr } case INTOP_NEWOBJ: { + isTailcall = false; returnOffset = ip[1]; callArgsOffset = ip[2]; methodSlot = ip[3]; @@ -2110,6 +2142,7 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr } case INTOP_NEWOBJ_VT: { + isTailcall = false; returnOffset = ip[1]; callArgsOffset = ip[2]; methodSlot = ip[3];