diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index 6f77ed87313242..95b75b13c2d51d 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -4112,7 +4112,10 @@ void InterpCompiler::EmitCall(CORINFO_RESOLVED_TOKEN* pConstrainedToken, bool re if (isDelegateInvoke) { assert(!isPInvoke && !isMarshaledPInvoke); - opcode = INTOP_CALLDELEGATE; + if (tailcall) + opcode = INTOP_CALLDELEGATE_TAIL; + else + opcode = INTOP_CALLDELEGATE; } else if (tailcall) { diff --git a/src/coreclr/interpreter/inc/intops.def b/src/coreclr/interpreter/inc/intops.def index 4ca778ea9c7705..33a481b0a9d533 100644 --- a/src/coreclr/interpreter/inc/intops.def +++ b/src/coreclr/interpreter/inc/intops.def @@ -362,6 +362,7 @@ OPDEF(INTOP_LDFLDA, "ldflda", 4, 1, 1, InterpOpInt) OPDEF(INTOP_CALL, "call", 4, 1, 1, InterpOpMethodHandle) OPDEF(INTOP_CALL_NULLCHECK, "call.nullcheck", 4, 1, 1, InterpOpMethodHandle) OPDEF(INTOP_CALLDELEGATE, "call.delegate", 4, 1, 1, InterpOpMethodHandle) +OPDEF(INTOP_CALLDELEGATE_TAIL, "call.delegate.tail", 4, 1, 1, InterpOpMethodHandle) OPDEF(INTOP_CALLI, "calli", 6, 1, 2, InterpOpLdPtr) OPDEF(INTOP_CALLVIRT, "callvirt", 4, 1, 1, InterpOpMethodHandle) OPDEF(INTOP_CALL_PINVOKE, "call.pinvoke", 6, 1, 1, InterpOpMethodHandle) // inlined (no marshaling wrapper) pinvokes only diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp index db965f43b48259..5d59f3298e9a18 100644 --- a/src/coreclr/vm/comdelegate.cpp +++ b/src/coreclr/vm/comdelegate.cpp @@ -829,7 +829,7 @@ LoaderHeap *DelegateEEClass::GetStubHeap() return GetInvokeMethod()->GetLoaderAllocator()->GetStubHeap(); } -#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) || defined(FEATURE_PORTABLE_ENTRYPOINTS) static Stub* CreateILDelegateShuffleThunk(MethodDesc* pDelegateMD, bool callTargetWithThis) { SigTypeContext typeContext(pDelegateMD); @@ -933,7 +933,7 @@ static PCODE SetupShuffleThunk(MethodTable * pDelMT, MethodDesc *pTargetMeth) else #endif // !FEATURE_PORTABLE_ENTRYPOINTS { -#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) || defined(FEATURE_PORTABLE_ENTRYPOINTS) pShuffleThunk = CreateILDelegateShuffleThunk(pMD, isInstRetBuff); #else _ASSERTE(FALSE); diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index 04ca27e472807e..a26fc453f8de1c 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -2403,32 +2403,91 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr break; } + case INTOP_CALLDELEGATE_TAIL: case INTOP_CALLDELEGATE: { - isTailcall = false; + isTailcall = (*ip == INTOP_CALLDELEGATE_TAIL); returnOffset = ip[1]; callArgsOffset = ip[2]; methodSlot = ip[3]; int8_t* returnValueAddress = LOCAL_VAR_ADDR(returnOffset, int8_t); - // targetMethod holds a pointer to the Invoke method of the delegate, not the final actual target. - targetMethod = (MethodDesc*)pMethod->pDataItems[methodSlot]; ip += 4; DELEGATEREF delegateObj = LOCAL_VAR(callArgsOffset, DELEGATEREF); NULL_CHECK(delegateObj); PCODE targetAddress = delegateObj->GetMethodPtr(); + DelegateEEClass *pDelClass = (DelegateEEClass*)delegateObj->GetMethodTable()->GetClass(); + if ((pDelClass->m_pInstRetBuffCallStub != NULL && pDelClass->m_pInstRetBuffCallStub->GetEntryPoint() == targetAddress) || + (pDelClass->m_pStaticCallStub != NULL && pDelClass->m_pStaticCallStub->GetEntryPoint() == targetAddress)) + { + // This implies that we're using a delegate shuffle thunk to strip off the first parameter to the method + // and call the actual underlying method. We allow for tail-calls to work and for greater efficiency in the + // interpreter by skipping the shuffle thunk and calling the actual target method directly. + PCODE actualTarget = delegateObj->GetMethodPtrAux(); + PTR_InterpByteCodeStart targetIp; + if ((targetMethod = NonVirtualEntry2MethodDesc(actualTarget)) && (targetIp = targetMethod->GetInterpreterCode()) != NULL) + { + pFrame->ip = ip; + InterpMethod* pTargetMethod = targetIp->Method; + if (isTailcall) + { + // Move args from callArgsOffset to start of stack frame. + assert(pTargetMethod->CheckIntegrity()); + // It is safe to use memcpy because the source and destination are both on the interp stack, not in the GC heap. + // We need to use the target method's argsSize, not our argsSize, because tail calls (unlike CEE_JMP) can have a + // different signature from the caller. + memcpy(pFrame->pStack, LOCAL_VAR_ADDR(callArgsOffset + INTERP_STACK_SLOT_SIZE, int8_t), pTargetMethod->argsSize); + // Reuse current stack frame. We discard the call insn's returnOffset because it's not important and tail calls are + // required to be followed by a ret, so we know nothing is going to read from stack[returnOffset] after the call. + pFrame->ReInit(pFrame->pParent, targetIp, pFrame->pRetVal, pFrame->pStack); + } + else + { + // Shift args down by one slot to remove the delegate obj pointer + memmove(LOCAL_VAR_ADDR(callArgsOffset, int8_t), LOCAL_VAR_ADDR(callArgsOffset + INTERP_STACK_SLOT_SIZE, int8_t), pTargetMethod->argsSize); + // Allocate child frame. + InterpMethodContextFrame *pChildFrame = pFrame->pNext; + if (!pChildFrame) + { + pChildFrame = (InterpMethodContextFrame*)alloca(sizeof(InterpMethodContextFrame)); + pChildFrame->pNext = NULL; + pFrame->pNext = pChildFrame; + // Save the lowest SP in the current method so that we can identify it by that during stackwalk + pInterpreterFrame->SetInterpExecMethodSP((TADDR)GetCurrentSP()); + } + pChildFrame->ReInit(pFrame, targetIp, returnValueAddress, LOCAL_VAR_ADDR(callArgsOffset, int8_t)); + pFrame = pChildFrame; + } + // Set execution state for the new frame + pMethod = pFrame->startIp->Method; + assert(pMethod->CheckIntegrity()); + stack = pFrame->pStack; + ip = pFrame->startIp->GetByteCodes(); + pThreadContext->pStackPointer = stack + pMethod->allocaSize; + break; + } + } + OBJECTREF targetMethodObj = delegateObj->GetTarget(); LOCAL_VAR(callArgsOffset, OBJECTREF) = targetMethodObj; + + if ((targetMethod = NonVirtualEntry2MethodDesc(targetAddress)) != NULL) + { + // In this case targetMethod holds a pointer to the MethodDesc that will be called by using targetMethodObj as + // the this pointer. This may be the final method (in the case of instance method delegates), or it may be a + // shuffle thunk, or multicast invoke method. + goto CALL_INTERP_METHOD; + } + + // targetMethod holds a pointer to the Invoke method of the delegate, not the final actual target. + targetMethod = (MethodDesc*)pMethod->pDataItems[methodSlot]; int8_t* callArgsAddress = LOCAL_VAR_ADDR(callArgsOffset, int8_t); // Save current execution state for when we return from called method pFrame->ip = ip; - // TODO! Once we are investigating performance here, we may want to optimize this so that - // delegate calls to interpeted methods don't have to go through the native invoke here, but for - // now this should work well. InvokeDelegateInvokeMethod(targetMethod, callArgsAddress, returnValueAddress, targetAddress); break; }