Skip to content

Commit 691e950

Browse files
[clr-interp] Implement tail-calling for delegates and support for delegates in wasm (#120205)
- Disable shuffle thunk creation for portable entrypoints and enable ILDelegateShuffle thunks instead - Also implement the shuffle thunk in the interpreter directly so that most cases don't need to even use those in WASM or cases where we have pregenerated code - Add a new opcode for tail-calling a delegate method
1 parent d47e24c commit 691e950

File tree

4 files changed

+72
-9
lines changed

4 files changed

+72
-9
lines changed

src/coreclr/interpreter/compiler.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4112,7 +4112,10 @@ void InterpCompiler::EmitCall(CORINFO_RESOLVED_TOKEN* pConstrainedToken, bool re
41124112
if (isDelegateInvoke)
41134113
{
41144114
assert(!isPInvoke && !isMarshaledPInvoke);
4115-
opcode = INTOP_CALLDELEGATE;
4115+
if (tailcall)
4116+
opcode = INTOP_CALLDELEGATE_TAIL;
4117+
else
4118+
opcode = INTOP_CALLDELEGATE;
41164119
}
41174120
else if (tailcall)
41184121
{

src/coreclr/interpreter/inc/intops.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,7 @@ OPDEF(INTOP_LDFLDA, "ldflda", 4, 1, 1, InterpOpInt)
362362
OPDEF(INTOP_CALL, "call", 4, 1, 1, InterpOpMethodHandle)
363363
OPDEF(INTOP_CALL_NULLCHECK, "call.nullcheck", 4, 1, 1, InterpOpMethodHandle)
364364
OPDEF(INTOP_CALLDELEGATE, "call.delegate", 4, 1, 1, InterpOpMethodHandle)
365+
OPDEF(INTOP_CALLDELEGATE_TAIL, "call.delegate.tail", 4, 1, 1, InterpOpMethodHandle)
365366
OPDEF(INTOP_CALLI, "calli", 6, 1, 2, InterpOpLdPtr)
366367
OPDEF(INTOP_CALLVIRT, "callvirt", 4, 1, 1, InterpOpMethodHandle)
367368
OPDEF(INTOP_CALL_PINVOKE, "call.pinvoke", 6, 1, 1, InterpOpMethodHandle) // inlined (no marshaling wrapper) pinvokes only

src/coreclr/vm/comdelegate.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,7 @@ LoaderHeap *DelegateEEClass::GetStubHeap()
829829
return GetInvokeMethod()->GetLoaderAllocator()->GetStubHeap();
830830
}
831831

832-
#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
832+
#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) || defined(FEATURE_PORTABLE_ENTRYPOINTS)
833833
static Stub* CreateILDelegateShuffleThunk(MethodDesc* pDelegateMD, bool callTargetWithThis)
834834
{
835835
SigTypeContext typeContext(pDelegateMD);
@@ -933,7 +933,7 @@ static PCODE SetupShuffleThunk(MethodTable * pDelMT, MethodDesc *pTargetMeth)
933933
else
934934
#endif // !FEATURE_PORTABLE_ENTRYPOINTS
935935
{
936-
#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
936+
#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) || defined(FEATURE_PORTABLE_ENTRYPOINTS)
937937
pShuffleThunk = CreateILDelegateShuffleThunk(pMD, isInstRetBuff);
938938
#else
939939
_ASSERTE(FALSE);

src/coreclr/vm/interpexec.cpp

Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2403,32 +2403,91 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr
24032403
break;
24042404
}
24052405

2406+
case INTOP_CALLDELEGATE_TAIL:
24062407
case INTOP_CALLDELEGATE:
24072408
{
2408-
isTailcall = false;
2409+
isTailcall = (*ip == INTOP_CALLDELEGATE_TAIL);
24092410
returnOffset = ip[1];
24102411
callArgsOffset = ip[2];
24112412
methodSlot = ip[3];
24122413

24132414
int8_t* returnValueAddress = LOCAL_VAR_ADDR(returnOffset, int8_t);
2414-
// targetMethod holds a pointer to the Invoke method of the delegate, not the final actual target.
2415-
targetMethod = (MethodDesc*)pMethod->pDataItems[methodSlot];
24162415

24172416
ip += 4;
24182417

24192418
DELEGATEREF delegateObj = LOCAL_VAR(callArgsOffset, DELEGATEREF);
24202419
NULL_CHECK(delegateObj);
24212420
PCODE targetAddress = delegateObj->GetMethodPtr();
2421+
DelegateEEClass *pDelClass = (DelegateEEClass*)delegateObj->GetMethodTable()->GetClass();
2422+
if ((pDelClass->m_pInstRetBuffCallStub != NULL && pDelClass->m_pInstRetBuffCallStub->GetEntryPoint() == targetAddress) ||
2423+
(pDelClass->m_pStaticCallStub != NULL && pDelClass->m_pStaticCallStub->GetEntryPoint() == targetAddress))
2424+
{
2425+
// This implies that we're using a delegate shuffle thunk to strip off the first parameter to the method
2426+
// and call the actual underlying method. We allow for tail-calls to work and for greater efficiency in the
2427+
// interpreter by skipping the shuffle thunk and calling the actual target method directly.
2428+
PCODE actualTarget = delegateObj->GetMethodPtrAux();
2429+
PTR_InterpByteCodeStart targetIp;
2430+
if ((targetMethod = NonVirtualEntry2MethodDesc(actualTarget)) && (targetIp = targetMethod->GetInterpreterCode()) != NULL)
2431+
{
2432+
pFrame->ip = ip;
2433+
InterpMethod* pTargetMethod = targetIp->Method;
2434+
if (isTailcall)
2435+
{
2436+
// Move args from callArgsOffset to start of stack frame.
2437+
assert(pTargetMethod->CheckIntegrity());
2438+
// It is safe to use memcpy because the source and destination are both on the interp stack, not in the GC heap.
2439+
// We need to use the target method's argsSize, not our argsSize, because tail calls (unlike CEE_JMP) can have a
2440+
// different signature from the caller.
2441+
memcpy(pFrame->pStack, LOCAL_VAR_ADDR(callArgsOffset + INTERP_STACK_SLOT_SIZE, int8_t), pTargetMethod->argsSize);
2442+
// Reuse current stack frame. We discard the call insn's returnOffset because it's not important and tail calls are
2443+
// required to be followed by a ret, so we know nothing is going to read from stack[returnOffset] after the call.
2444+
pFrame->ReInit(pFrame->pParent, targetIp, pFrame->pRetVal, pFrame->pStack);
2445+
}
2446+
else
2447+
{
2448+
// Shift args down by one slot to remove the delegate obj pointer
2449+
memmove(LOCAL_VAR_ADDR(callArgsOffset, int8_t), LOCAL_VAR_ADDR(callArgsOffset + INTERP_STACK_SLOT_SIZE, int8_t), pTargetMethod->argsSize);
2450+
// Allocate child frame.
2451+
InterpMethodContextFrame *pChildFrame = pFrame->pNext;
2452+
if (!pChildFrame)
2453+
{
2454+
pChildFrame = (InterpMethodContextFrame*)alloca(sizeof(InterpMethodContextFrame));
2455+
pChildFrame->pNext = NULL;
2456+
pFrame->pNext = pChildFrame;
2457+
// Save the lowest SP in the current method so that we can identify it by that during stackwalk
2458+
pInterpreterFrame->SetInterpExecMethodSP((TADDR)GetCurrentSP());
2459+
}
2460+
pChildFrame->ReInit(pFrame, targetIp, returnValueAddress, LOCAL_VAR_ADDR(callArgsOffset, int8_t));
2461+
pFrame = pChildFrame;
2462+
}
2463+
// Set execution state for the new frame
2464+
pMethod = pFrame->startIp->Method;
2465+
assert(pMethod->CheckIntegrity());
2466+
stack = pFrame->pStack;
2467+
ip = pFrame->startIp->GetByteCodes();
2468+
pThreadContext->pStackPointer = stack + pMethod->allocaSize;
2469+
break;
2470+
}
2471+
}
2472+
24222473
OBJECTREF targetMethodObj = delegateObj->GetTarget();
24232474
LOCAL_VAR(callArgsOffset, OBJECTREF) = targetMethodObj;
2475+
2476+
if ((targetMethod = NonVirtualEntry2MethodDesc(targetAddress)) != NULL)
2477+
{
2478+
// In this case targetMethod holds a pointer to the MethodDesc that will be called by using targetMethodObj as
2479+
// the this pointer. This may be the final method (in the case of instance method delegates), or it may be a
2480+
// shuffle thunk, or multicast invoke method.
2481+
goto CALL_INTERP_METHOD;
2482+
}
2483+
2484+
// targetMethod holds a pointer to the Invoke method of the delegate, not the final actual target.
2485+
targetMethod = (MethodDesc*)pMethod->pDataItems[methodSlot];
24242486
int8_t* callArgsAddress = LOCAL_VAR_ADDR(callArgsOffset, int8_t);
24252487

24262488
// Save current execution state for when we return from called method
24272489
pFrame->ip = ip;
24282490

2429-
// TODO! Once we are investigating performance here, we may want to optimize this so that
2430-
// delegate calls to interpeted methods don't have to go through the native invoke here, but for
2431-
// now this should work well.
24322491
InvokeDelegateInvokeMethod(targetMethod, callArgsAddress, returnValueAddress, targetAddress);
24332492
break;
24342493
}

0 commit comments

Comments
 (0)