@@ -2403,32 +2403,91 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr
24032403 break ;
24042404 }
24052405
2406+ case INTOP_CALLDELEGATE_TAIL:
24062407 case INTOP_CALLDELEGATE:
24072408 {
2408- isTailcall = false ;
2409+ isTailcall = (*ip == INTOP_CALLDELEGATE_TAIL) ;
24092410 returnOffset = ip[1 ];
24102411 callArgsOffset = ip[2 ];
24112412 methodSlot = ip[3 ];
24122413
24132414 int8_t * returnValueAddress = LOCAL_VAR_ADDR (returnOffset, int8_t );
2414- // targetMethod holds a pointer to the Invoke method of the delegate, not the final actual target.
2415- targetMethod = (MethodDesc*)pMethod->pDataItems [methodSlot];
24162415
24172416 ip += 4 ;
24182417
24192418 DELEGATEREF delegateObj = LOCAL_VAR (callArgsOffset, DELEGATEREF);
24202419 NULL_CHECK (delegateObj);
24212420 PCODE targetAddress = delegateObj->GetMethodPtr ();
2421+ DelegateEEClass *pDelClass = (DelegateEEClass*)delegateObj->GetMethodTable ()->GetClass ();
2422+ if ((pDelClass->m_pInstRetBuffCallStub != NULL && pDelClass->m_pInstRetBuffCallStub ->GetEntryPoint () == targetAddress) ||
2423+ (pDelClass->m_pStaticCallStub != NULL && pDelClass->m_pStaticCallStub ->GetEntryPoint () == targetAddress))
2424+ {
2425+ // This implies that we're using a delegate shuffle thunk to strip off the first parameter to the method
2426+ // and call the actual underlying method. We allow for tail-calls to work and for greater efficiency in the
2427+ // interpreter by skipping the shuffle thunk and calling the actual target method directly.
2428+ PCODE actualTarget = delegateObj->GetMethodPtrAux ();
2429+ PTR_InterpByteCodeStart targetIp;
2430+ if ((targetMethod = NonVirtualEntry2MethodDesc (actualTarget)) && (targetIp = targetMethod->GetInterpreterCode ()) != NULL )
2431+ {
2432+ pFrame->ip = ip;
2433+ InterpMethod* pTargetMethod = targetIp->Method ;
2434+ if (isTailcall)
2435+ {
2436+ // Move args from callArgsOffset to start of stack frame.
2437+ assert (pTargetMethod->CheckIntegrity ());
2438+ // It is safe to use memcpy because the source and destination are both on the interp stack, not in the GC heap.
2439+ // We need to use the target method's argsSize, not our argsSize, because tail calls (unlike CEE_JMP) can have a
2440+ // different signature from the caller.
2441+ memcpy (pFrame->pStack , LOCAL_VAR_ADDR (callArgsOffset + INTERP_STACK_SLOT_SIZE, int8_t ), pTargetMethod->argsSize );
2442+ // Reuse current stack frame. We discard the call insn's returnOffset because it's not important and tail calls are
2443+ // required to be followed by a ret, so we know nothing is going to read from stack[returnOffset] after the call.
2444+ pFrame->ReInit (pFrame->pParent , targetIp, pFrame->pRetVal , pFrame->pStack );
2445+ }
2446+ else
2447+ {
2448+ // Shift args down by one slot to remove the delegate obj pointer
2449+ memmove (LOCAL_VAR_ADDR (callArgsOffset, int8_t ), LOCAL_VAR_ADDR (callArgsOffset + INTERP_STACK_SLOT_SIZE, int8_t ), pTargetMethod->argsSize );
2450+ // Allocate child frame.
2451+ InterpMethodContextFrame *pChildFrame = pFrame->pNext ;
2452+ if (!pChildFrame)
2453+ {
2454+ pChildFrame = (InterpMethodContextFrame*)alloca (sizeof (InterpMethodContextFrame));
2455+ pChildFrame->pNext = NULL ;
2456+ pFrame->pNext = pChildFrame;
2457+ // Save the lowest SP in the current method so that we can identify it by that during stackwalk
2458+ pInterpreterFrame->SetInterpExecMethodSP ((TADDR)GetCurrentSP ());
2459+ }
2460+ pChildFrame->ReInit (pFrame, targetIp, returnValueAddress, LOCAL_VAR_ADDR (callArgsOffset, int8_t ));
2461+ pFrame = pChildFrame;
2462+ }
2463+ // Set execution state for the new frame
2464+ pMethod = pFrame->startIp ->Method ;
2465+ assert (pMethod->CheckIntegrity ());
2466+ stack = pFrame->pStack ;
2467+ ip = pFrame->startIp ->GetByteCodes ();
2468+ pThreadContext->pStackPointer = stack + pMethod->allocaSize ;
2469+ break ;
2470+ }
2471+ }
2472+
24222473 OBJECTREF targetMethodObj = delegateObj->GetTarget ();
24232474 LOCAL_VAR (callArgsOffset, OBJECTREF) = targetMethodObj;
2475+
2476+ if ((targetMethod = NonVirtualEntry2MethodDesc (targetAddress)) != NULL )
2477+ {
2478+ // In this case targetMethod holds a pointer to the MethodDesc that will be called by using targetMethodObj as
2479+ // the this pointer. This may be the final method (in the case of instance method delegates), or it may be a
2480+ // shuffle thunk, or multicast invoke method.
2481+ goto CALL_INTERP_METHOD;
2482+ }
2483+
2484+ // targetMethod holds a pointer to the Invoke method of the delegate, not the final actual target.
2485+ targetMethod = (MethodDesc*)pMethod->pDataItems [methodSlot];
24242486 int8_t * callArgsAddress = LOCAL_VAR_ADDR (callArgsOffset, int8_t );
24252487
24262488 // Save current execution state for when we return from called method
24272489 pFrame->ip = ip;
24282490
2429- // TODO! Once we are investigating performance here, we may want to optimize this so that
2430- // delegate calls to interpeted methods don't have to go through the native invoke here, but for
2431- // now this should work well.
24322491 InvokeDelegateInvokeMethod (targetMethod, callArgsAddress, returnValueAddress, targetAddress);
24332492 break ;
24342493 }
0 commit comments