diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index 0eed53cd415e58..9b69c69e67d82b 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -114,7 +114,7 @@ ARM64-only: When a method returns a structure that is larger than 16 bytes the c ## Hidden parameters -*Stub dispatch* - when a virtual call uses a VSD stub, rather than back-patching the calling code (or disassembling it), the JIT must place the address of the stub used to load the call target, the "stub indirection cell", in (x86) `EAX` / (AMD64) `R11` / (AMD64 NativeAOT ABI) `R10` / (ARM) `R4` / (ARM NativeAOT ABI) `R12` / (ARM64) `R11`. In the JIT, this is encapsulated in the `VirtualStubParamInfo` class. +*Stub dispatch* - when a virtual call uses a VSD stub, rather than back-patching the calling code (or disassembling it), the JIT must place the address of the stub used to load the call target, the "stub indirection cell", in (x86) `EAX` / (AMD64) `R11` / (ARM) `R4` / (ARM NativeAOT ABI) `R12` / (ARM64) `R11`. In the JIT, this is encapsulated in the `VirtualStubParamInfo` class. *Calli Pinvoke* - The VM wants the address of the PInvoke in (AMD64) `R10` / (ARM) `R12` / (ARM64) `R14` (In the JIT: `REG_PINVOKE_TARGET_PARAM`), and the signature (the pinvoke cookie) in (AMD64) `R11` / (ARM) `R4` / (ARM64) `R15` (in the JIT: `REG_PINVOKE_COOKIE_PARAM`). @@ -812,7 +812,7 @@ Therefore it will expand all indirect calls via the validation helper and a manu ## CFG details for x64 On x64, `CORINFO_HELP_VALIDATE_INDIRECT_CALL` takes the call address in `rcx`. -In addition to the usual registers it also preserves all float registers and `rcx` and `r10`; furthermore, shadow stack space is not required to be allocated. +In addition to the usual registers it also preserves all float registers, `rcx`, and `r10`; furthermore, shadow stack space is not required to be allocated. `CORINFO_HELP_DISPATCH_INDIRECT_CALL` takes the call address in `rax` and it reserves the right to use and trash `r10` and `r11`. The JIT uses the dispatch helper on x64 whenever possible as it is expected that the code size benefits outweighs the less accurate branch prediction. diff --git a/src/coreclr/clrfeatures.cmake b/src/coreclr/clrfeatures.cmake index 6bfb717125bbd2..d6a8965843e585 100644 --- a/src/coreclr/clrfeatures.cmake +++ b/src/coreclr/clrfeatures.cmake @@ -49,4 +49,24 @@ endif() if (CLR_CMAKE_TARGET_WIN32) set(FEATURE_TYPEEQUIVALENCE 1) -endif(CLR_CMAKE_TARGET_WIN32) \ No newline at end of file +endif(CLR_CMAKE_TARGET_WIN32) + + +if (CLR_CMAKE_TARGET_MACCATALYST OR CLR_CMAKE_TARGET_IOS OR CLR_CMAKE_TARGET_TVOS) + set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH 1) + set(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH 0) +else() + # Enable cached interface dispatch so that we can test/debug it more easily on non-embedded scenarios (set DOTNET_UseCachedInterfaceDispatch=1) + # Only enable in chk/debug builds as this support isn't intended for retail use elsewhere + if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) + set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH $,1,0>) + else() + set(FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH 0) + endif() + set(FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH 1) +endif() + +if (CLR_CMAKE_HOST_UNIX AND CLR_CMAKE_HOST_ARCH_AMD64) + # Allow 16 byte compare-exchange (cmpxchg16b) + add_compile_options($<${FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH}:-mcx16>) +endif() diff --git a/src/coreclr/crossgen-corelib.proj b/src/coreclr/crossgen-corelib.proj index 7e93e2fcf9b24b..b95536f174a5b6 100644 --- a/src/coreclr/crossgen-corelib.proj +++ b/src/coreclr/crossgen-corelib.proj @@ -118,7 +118,10 @@ $(CrossGenDllCmd) --targetos:linux $(CrossGenDllCmd) -m:$(MergedMibcPath) --embed-pgo-data $(CrossGenDllCmd) -O + $(CrossGenDllCmd) --verify-type-and-field-layout + + $(CrossGenDllCmd) --enable-cached-interface-dispatch-support $(CrossGenDllCmd) @(CoreLib) diff --git a/src/coreclr/debug/CMakeLists.txt b/src/coreclr/debug/CMakeLists.txt index d0a999f65c1485..0d52fa77527ea3 100644 --- a/src/coreclr/debug/CMakeLists.txt +++ b/src/coreclr/debug/CMakeLists.txt @@ -1,3 +1,7 @@ + +add_compile_definitions($<${FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH}:FEATURE_CACHED_INTERFACE_DISPATCH>) +add_compile_definitions($<${FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH}:FEATURE_VIRTUAL_STUB_DISPATCH>) + add_subdirectory(daccess) add_subdirectory(ee) add_subdirectory(di) diff --git a/src/coreclr/debug/daccess/dacdbiimpl.cpp b/src/coreclr/debug/daccess/dacdbiimpl.cpp index eef3dc127b9206..a5f5f7e7653a57 100644 --- a/src/coreclr/debug/daccess/dacdbiimpl.cpp +++ b/src/coreclr/debug/daccess/dacdbiimpl.cpp @@ -3544,7 +3544,9 @@ void DacDbiInterfaceImpl::EnumerateMemRangesForLoaderAllocator(PTR_LoaderAllocat if (pVcsMgr) { if (pVcsMgr->indcell_heap != NULL) heapsToEnumerate.Push(pVcsMgr->indcell_heap); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH if (pVcsMgr->cache_entry_heap != NULL) heapsToEnumerate.Push(pVcsMgr->cache_entry_heap); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } TADDR rangeAccumAsTaddr = TO_TADDR(rangeAcummulator); diff --git a/src/coreclr/debug/daccess/request.cpp b/src/coreclr/debug/daccess/request.cpp index d743d164d119c0..d43689d6dba9b4 100644 --- a/src/coreclr/debug/daccess/request.cpp +++ b/src/coreclr/debug/daccess/request.cpp @@ -3620,14 +3620,19 @@ ClrDataAccess::TraverseVirtCallStubHeap(CLRDATA_ADDRESS pAppDomain, VCSHeapType break; case CacheEntryHeap: +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + // The existence of the CacheEntryHeap is part of the SOS api surface, but currently + // when FEATURE_VIRTUAL_STUB_DISPATCH is not defined, the CacheEntryHeap is not created + // so its commented out in that situation, but is not considered to be a E_INVALIDARG. pLoaderHeap = pVcsMgr->cache_entry_heap; +#endif // FEATURE_VIRTUAL_STUB_DISPATCH break; default: hr = E_INVALIDARG; } - if (SUCCEEDED(hr)) + if (SUCCEEDED(hr) && (pLoaderHeap != NULL)) { hr = TraverseLoaderHeapBlock(pLoaderHeap->m_pFirstBlock, pFunc); } @@ -3670,7 +3675,9 @@ static const char *LoaderAllocatorLoaderHeapNames[] = "FixupPrecodeHeap", "NewStubPrecodeHeap", "IndcellHeap", +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH "CacheEntryHeap", +#endif // FEATURE_VIRTUAL_STUB_DISPATCH }; @@ -3714,7 +3721,9 @@ HRESULT ClrDataAccess::GetLoaderAllocatorHeaps(CLRDATA_ADDRESS loaderAllocatorAd else { pLoaderHeaps[i++] = HOST_CDADDR(pVcsMgr->indcell_heap); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH pLoaderHeaps[i++] = HOST_CDADDR(pVcsMgr->cache_entry_heap); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } // All of the above are "LoaderHeap" and not the ExplicitControl version. diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def index a63679a9fe71e8..2b60d49026b597 100644 --- a/src/coreclr/inc/CrstTypes.def +++ b/src/coreclr/inc/CrstTypes.def @@ -529,3 +529,6 @@ End Crst PerfMap AcquiredAfter CodeVersioning AssemblyList End + +Crst InterfaceDispatchGlobalLists +End \ No newline at end of file diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index ffc68923d7ddc9..6ffb0cb4f0a2dd 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -581,6 +581,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_VirtualCallStubLogging, W("VirtualCallStubLogg CONFIG_DWORD_INFO(INTERNAL_VirtualCallStubMissCount, W("VirtualCallStubMissCount"), 100, "Used only when STUB_LOGGING is defined, which by default is not.") CONFIG_DWORD_INFO(INTERNAL_VirtualCallStubResetCacheCounter, W("VirtualCallStubResetCacheCounter"), 0, "Used only when STUB_LOGGING is defined, which by default is not.") CONFIG_DWORD_INFO(INTERNAL_VirtualCallStubResetCacheIncr, W("VirtualCallStubResetCacheIncr"), 0, "Used only when STUB_LOGGING is defined, which by default is not.") +CONFIG_DWORD_INFO(INTERNAL_UseCachedInterfaceDispatch, W("UseCachedInterfaceDispatch"), 0, "If cached interface dispatch is compiled in, use that instead of virtual stub dispatch") /// /// Watson diff --git a/src/coreclr/inc/crsttypes_generated.h b/src/coreclr/inc/crsttypes_generated.h index 410fb884de27d9..f5bd7a83bb89b5 100644 --- a/src/coreclr/inc/crsttypes_generated.h +++ b/src/coreclr/inc/crsttypes_generated.h @@ -59,66 +59,67 @@ enum CrstType CrstILStubGen = 41, CrstInlineTrackingMap = 42, CrstInstMethodHashTable = 43, - CrstInterop = 44, - CrstInteropData = 45, - CrstIsJMCMethod = 46, - CrstISymUnmanagedReader = 47, - CrstJit = 48, - CrstJitInlineTrackingMap = 49, - CrstJitPatchpoint = 50, - CrstJumpStubCache = 51, - CrstLeafLock = 52, - CrstListLock = 53, - CrstLoaderAllocator = 54, - CrstLoaderAllocatorReferences = 55, - CrstLoaderHeap = 56, - CrstManagedObjectWrapperMap = 57, - CrstMethodDescBackpatchInfoTracker = 58, - CrstMethodTableExposedObject = 59, - CrstModule = 60, - CrstModuleLookupTable = 61, - CrstMulticoreJitHash = 62, - CrstMulticoreJitManager = 63, - CrstNativeImageEagerFixups = 64, - CrstNativeImageLoad = 65, - CrstNotifyGdb = 66, - CrstPEImage = 67, - CrstPendingTypeLoadEntry = 68, - CrstPerfMap = 69, - CrstPgoData = 70, - CrstPinnedByrefValidation = 71, - CrstPinnedHeapHandleTable = 72, - CrstProfilerGCRefDataFreeList = 73, - CrstProfilingAPIStatus = 74, - CrstRCWCache = 75, - CrstRCWCleanupList = 76, - CrstReadyToRunEntryPointToMethodDescMap = 77, - CrstReflection = 78, - CrstReJITGlobalRequest = 79, - CrstRetThunkCache = 80, - CrstSigConvert = 81, - CrstSingleUseLock = 82, - CrstStressLog = 83, - CrstStubCache = 84, - CrstStubDispatchCache = 85, - CrstSyncBlockCache = 86, - CrstSyncHashLock = 87, - CrstSystemDomain = 88, - CrstSystemDomainDelayedUnloadList = 89, - CrstThreadIdDispenser = 90, - CrstThreadLocalStorageLock = 91, - CrstThreadStore = 92, - CrstTieredCompilation = 93, - CrstTypeEquivalenceMap = 94, - CrstTypeIDMap = 95, - CrstUMEntryThunkCache = 96, - CrstUMEntryThunkFreeListLock = 97, - CrstUniqueStack = 98, - CrstUnresolvedClassLock = 99, - CrstUnwindInfoTableLock = 100, - CrstVSDIndirectionCellLock = 101, - CrstWrapperTemplate = 102, - kNumberOfCrstTypes = 103 + CrstInterfaceDispatchGlobalLists = 44, + CrstInterop = 45, + CrstInteropData = 46, + CrstIsJMCMethod = 47, + CrstISymUnmanagedReader = 48, + CrstJit = 49, + CrstJitInlineTrackingMap = 50, + CrstJitPatchpoint = 51, + CrstJumpStubCache = 52, + CrstLeafLock = 53, + CrstListLock = 54, + CrstLoaderAllocator = 55, + CrstLoaderAllocatorReferences = 56, + CrstLoaderHeap = 57, + CrstManagedObjectWrapperMap = 58, + CrstMethodDescBackpatchInfoTracker = 59, + CrstMethodTableExposedObject = 60, + CrstModule = 61, + CrstModuleLookupTable = 62, + CrstMulticoreJitHash = 63, + CrstMulticoreJitManager = 64, + CrstNativeImageEagerFixups = 65, + CrstNativeImageLoad = 66, + CrstNotifyGdb = 67, + CrstPEImage = 68, + CrstPendingTypeLoadEntry = 69, + CrstPerfMap = 70, + CrstPgoData = 71, + CrstPinnedByrefValidation = 72, + CrstPinnedHeapHandleTable = 73, + CrstProfilerGCRefDataFreeList = 74, + CrstProfilingAPIStatus = 75, + CrstRCWCache = 76, + CrstRCWCleanupList = 77, + CrstReadyToRunEntryPointToMethodDescMap = 78, + CrstReflection = 79, + CrstReJITGlobalRequest = 80, + CrstRetThunkCache = 81, + CrstSigConvert = 82, + CrstSingleUseLock = 83, + CrstStressLog = 84, + CrstStubCache = 85, + CrstStubDispatchCache = 86, + CrstSyncBlockCache = 87, + CrstSyncHashLock = 88, + CrstSystemDomain = 89, + CrstSystemDomainDelayedUnloadList = 90, + CrstThreadIdDispenser = 91, + CrstThreadLocalStorageLock = 92, + CrstThreadStore = 93, + CrstTieredCompilation = 94, + CrstTypeEquivalenceMap = 95, + CrstTypeIDMap = 96, + CrstUMEntryThunkCache = 97, + CrstUMEntryThunkFreeListLock = 98, + CrstUniqueStack = 99, + CrstUnresolvedClassLock = 100, + CrstUnwindInfoTableLock = 101, + CrstVSDIndirectionCellLock = 102, + CrstWrapperTemplate = 103, + kNumberOfCrstTypes = 104 }; #endif // __CRST_TYPES_INCLUDED @@ -173,6 +174,7 @@ int g_rgCrstLevelMap[] = 6, // CrstILStubGen 2, // CrstInlineTrackingMap 18, // CrstInstMethodHashTable + 0, // CrstInterfaceDispatchGlobalLists 21, // CrstInterop 9, // CrstInteropData 0, // CrstIsJMCMethod @@ -281,6 +283,7 @@ LPCSTR g_rgCrstNameMap[] = "CrstILStubGen", "CrstInlineTrackingMap", "CrstInstMethodHashTable", + "CrstInterfaceDispatchGlobalLists", "CrstInterop", "CrstInteropData", "CrstIsJMCMethod", diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 43b23d1372a84c..e0e6561d819021 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8408,16 +8408,8 @@ class Compiler reg = REG_EAX; regMask = RBM_EAX; #elif defined(TARGET_AMD64) - if (isNativeAOT) - { - reg = REG_R10; - regMask = RBM_R10; - } - else - { - reg = REG_R11; - regMask = RBM_R11; - } + reg = REG_R11; + regMask = RBM_R11; #elif defined(TARGET_ARM) if (isNativeAOT) { diff --git a/src/coreclr/minipal/minipal.h b/src/coreclr/minipal/minipal.h index 38ab07ec63c54d..afecd9ce74dc72 100644 --- a/src/coreclr/minipal/minipal.h +++ b/src/coreclr/minipal/minipal.h @@ -76,3 +76,16 @@ class VMToOSInterface // true if it succeeded, false if it failed static bool ReleaseRWMapping(void* pStart, size_t size); }; + +#if defined(HOST_64BIT) && defined(FEATURE_CACHED_INTERFACE_DISPATCH) +EXTERN_C uint8_t _InterlockedCompareExchange128(int64_t volatile *, int64_t, int64_t, int64_t *); + +#if defined(HOST_WINDOWS) +#pragma intrinsic(_InterlockedCompareExchange128) +#endif + +FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) +{ + return _InterlockedCompareExchange128(pDst, iValueHigh, iValueLow, pComparandAndResult); +} +#endif // defined(HOST_64BIT) && defined(FEATURE_CACHED_INTERFACE_DISPATCH) \ No newline at end of file diff --git a/src/coreclr/nativeaot/CMakeLists.txt b/src/coreclr/nativeaot/CMakeLists.txt index 71e9567b91e54b..e1c43480500970 100644 --- a/src/coreclr/nativeaot/CMakeLists.txt +++ b/src/coreclr/nativeaot/CMakeLists.txt @@ -23,7 +23,7 @@ if(CLR_CMAKE_HOST_UNIX) endif(CLR_CMAKE_TARGET_APPLE) if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386) - # Allow 16 byte compare-exchange + # Allow 16 byte compare-exchange (cmpxchg16b) add_compile_options(-mcx16) endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386) endif (CLR_CMAKE_HOST_UNIX) diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index ccf197b08f6602..43e0ead275931c 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -1,9 +1,11 @@ set(GC_DIR ../../gc) +set(RUNTIME_DIR ../../runtime) set(COMMON_RUNTIME_SOURCES allocheap.cpp rhassert.cpp - CachedInterfaceDispatch.cpp + ${RUNTIME_DIR}/CachedInterfaceDispatch.cpp + CachedInterfaceDispatchAot.cpp Crst.cpp DebugHeader.cpp MethodTable.cpp @@ -76,6 +78,7 @@ include_directories(.) include_directories(${GC_DIR}) include_directories(${GC_DIR}/env) include_directories(${CMAKE_CURRENT_BINARY_DIR}/eventpipe/inc) +include_directories(${RUNTIME_DIR}) if (WIN32) set(GC_HEADERS @@ -208,11 +211,17 @@ list(APPEND RUNTIME_SOURCES_ARCH_ASM ${ARCH_SOURCES_DIR}/MiscStubs.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/PInvoke.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/InteropThunksHelpers.${ASM_SUFFIX} - ${ARCH_SOURCES_DIR}/StubDispatch.${ASM_SUFFIX} + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/UniversalTransition.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/WriteBarriers.${ASM_SUFFIX} ) +if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) + list(APPEND RUNTIME_SOURCES_ARCH_ASM + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchAot.${ASM_SUFFIX} + ) +endif () + # Add architecture specific folder for looking up headers. convert_to_absolute_path(ARCH_SOURCES_DIR ${ARCH_SOURCES_DIR}) include_directories(${ARCH_SOURCES_DIR}) @@ -289,7 +298,7 @@ if (CLR_CMAKE_TARGET_UNIX) endif(CLR_CMAKE_TARGET_UNIX) -set(RUNTIME_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(NATIVEAOT_RUNTIME_DIR ${CMAKE_CURRENT_SOURCE_DIR}) list(APPEND COMMON_RUNTIME_SOURCES ${GC_HEADERS}) diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchAot.cpp b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchAot.cpp new file mode 100644 index 00000000000000..8eb16e8b630983 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchAot.cpp @@ -0,0 +1,55 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" +#include "CachedInterfaceDispatchPal.h" +#include "CachedInterfaceDispatch.h" + +// The base memory allocator. +static AllocHeap * g_pAllocHeap = NULL; + +bool InterfaceDispatch_InitializePal() +{ + g_pAllocHeap = new (nothrow) AllocHeap(); + if (g_pAllocHeap == NULL) + return false; + + if (!g_pAllocHeap->Init()) + return false; + + return true; +} + +// Allocate memory aligned at sizeof(void*)*2 boundaries +void *InterfaceDispatch_AllocDoublePointerAligned(size_t size) +{ + return g_pAllocHeap->AllocAligned(size, sizeof(void*) * 2); +} + +// Allocate memory aligned at sizeof(void*) boundaries + +void *InterfaceDispatch_AllocPointerAligned(size_t size) +{ + return g_pAllocHeap->AllocAligned(size, sizeof(void*)); +} + +FCIMPL4(PCODE, RhpUpdateDispatchCellCache, InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo) +{ + return InterfaceDispatch_UpdateDispatchCellCache(pCell, pTargetCode, pInstanceType, pNewCellInfo); +} +FCIMPLEND + +FCIMPL2(PCODE, RhpSearchDispatchCellCache, InterfaceDispatchCell * pCell, MethodTable* pInstanceType) +{ + return InterfaceDispatch_SearchDispatchCellCache(pCell, pInstanceType); +} +FCIMPLEND + +// Given a dispatch cell, get the type and slot associated with it. This function MUST be implemented +// in cooperative native code, as the m_pCache field on the cell is unsafe to access from managed +// code due to its use of the GC state as a lock, and as lifetime control +FCIMPL2(void, RhpGetDispatchCellInfo, InterfaceDispatchCell * pCell, DispatchCellInfo* pDispatchCellInfo) +{ + *pDispatchCellInfo = pCell->GetDispatchCellInfo(); +} +FCIMPLEND diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h new file mode 100644 index 00000000000000..7edc8347b28848 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatchPal.h @@ -0,0 +1,38 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __CACHEDINTERFACEDISPATCHPAL_H__ +#define __CACHEDINTERFACEDISPATCHPAL_H__ + +#include "CommonTypes.h" +#include "CommonMacros.h" +#include "daccess.h" +#include "DebugMacrosExt.h" +#include "PalRedhawkCommon.h" +#include "PalRedhawk.h" +#include "rhassert.h" +#include "slist.h" +#include "holder.h" +#include "Crst.h" +#include "RedhawkWarnings.h" +#include "TargetPtrs.h" +#include "MethodTable.h" +#include "Range.h" +#include "allocheap.h" +#include "rhbinder.h" +#include "ObjectLayout.h" +#include "shash.h" +#include "TypeManager.h" +#include "RuntimeInstance.h" +#include "MethodTable.inl" +#include "CommonMacros.inl" + +bool InterfaceDispatch_InitializePal(); + +// Allocate memory aligned at sizeof(void*)*2 boundaries +void *InterfaceDispatch_AllocDoublePointerAligned(size_t size); +// Allocate memory aligned at sizeof(void*) boundaries + +void *InterfaceDispatch_AllocPointerAligned(size_t size); + +#endif // __CACHEDINTERFACEDISPATCHPAL_H__ \ No newline at end of file diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp index a3643e32f5eaf7..aff815a48170f5 100644 --- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp @@ -266,6 +266,7 @@ EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation8; EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation16; EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation32; EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation64; +EXTERN_C CODE_LOCATION RhpVTableOffsetDispatchAVLocation; static bool InInterfaceDispatchHelper(uintptr_t faultingIP) { @@ -280,6 +281,7 @@ static bool InInterfaceDispatchHelper(uintptr_t faultingIP) (uintptr_t)&RhpInterfaceDispatchAVLocation16, (uintptr_t)&RhpInterfaceDispatchAVLocation32, (uintptr_t)&RhpInterfaceDispatchAVLocation64, + (uintptr_t)&RhpVTableOffsetDispatchAVLocation, }; // compare the IP against the list of known possible AV locations in the interface dispatch helpers diff --git a/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt index 11618fd78edc0a..821b4fe8ca9e2e 100644 --- a/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt @@ -59,16 +59,16 @@ endif (CLR_CMAKE_TARGET_WIN32) # Get the current list of definitions get_compile_definitions(DEFINITIONS) -set(ASM_OFFSETS_CSPP ${RUNTIME_DIR}/../Runtime.Base/src/AsmOffsets.cspp) +set(ASM_OFFSETS_CSPP ${NATIVEAOT_RUNTIME_DIR}/../Runtime.Base/src/AsmOffsets.cspp) if(WIN32) set(COMPILER_LANGUAGE "") set(PREPROCESSOR_FLAGS -EP -nologo) - set(ASM_OFFSETS_CPP ${RUNTIME_DIR}/windows/AsmOffsets.cpp) + set(ASM_OFFSETS_CPP ${NATIVEAOT_RUNTIME_DIR}/windows/AsmOffsets.cpp) else() set(COMPILER_LANGUAGE -x c++) set(PREPROCESSOR_FLAGS -E -P) - set(ASM_OFFSETS_CPP ${RUNTIME_DIR}/unix/AsmOffsets.cpp) + set(ASM_OFFSETS_CPP ${NATIVEAOT_RUNTIME_DIR}/unix/AsmOffsets.cpp) endif() add_custom_command( @@ -80,9 +80,9 @@ add_custom_command( ) add_custom_command( - COMMAND ${CMAKE_CXX_COMPILER} ${DEFINITIONS} ${PREPROCESSOR_FLAGS} -I"${RUNTIME_DIR}" -I"${ARCH_SOURCES_DIR}" "${ASM_OFFSETS_CPP}" >"${CMAKE_CURRENT_BINARY_DIR}/AsmOffsets.inc" + COMMAND ${CMAKE_CXX_COMPILER} ${DEFINITIONS} ${PREPROCESSOR_FLAGS} -I"${NATIVEAOT_RUNTIME_DIR}" -I"${ARCH_SOURCES_DIR}" "${ASM_OFFSETS_CPP}" >"${CMAKE_CURRENT_BINARY_DIR}/AsmOffsets.inc" OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/AsmOffsets.inc" - DEPENDS "${ASM_OFFSETS_CPP}" "${RUNTIME_DIR}/AsmOffsets.h" + DEPENDS "${ASM_OFFSETS_CPP}" "${NATIVEAOT_RUNTIME_DIR}/AsmOffsets.h" COMMENT "Generating AsmOffsets.inc" ) diff --git a/src/coreclr/nativeaot/Runtime/Portable/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/Portable/CMakeLists.txt index 46a85046ca1f2b..8a33f1d14056c4 100644 --- a/src/coreclr/nativeaot/Runtime/Portable/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/Portable/CMakeLists.txt @@ -10,12 +10,12 @@ target_link_libraries(PortableRuntime PRIVATE aotminipal) # Get the current list of definitions get_compile_definitions(DEFINITIONS) -set(ASM_OFFSETS_CSPP ${RUNTIME_DIR}/../Runtime.Base/src/AsmOffsets.cspp) +set(ASM_OFFSETS_CSPP ${NATIVEAOT_RUNTIME_DIR}/../Runtime.Base/src/AsmOffsets.cspp) if(WIN32) set(COMPILER_LANGUAGE "") set(PREPROCESSOR_FLAGS -EP -nologo) - set(ASM_OFFSETS_CPP ${RUNTIME_DIR}/windows/AsmOffsets.cpp) + set(ASM_OFFSETS_CPP ${NATIVEAOT_RUNTIME_DIR}/windows/AsmOffsets.cpp) set_target_properties(aotminipal PROPERTIES COMPILE_PDB_NAME "aotminipal" @@ -23,14 +23,14 @@ if(WIN32) else() set(COMPILER_LANGUAGE -x c++) set(PREPROCESSOR_FLAGS -E -P) - set(ASM_OFFSETS_CPP ${RUNTIME_DIR}/unix/AsmOffsets.cpp) + set(ASM_OFFSETS_CPP ${NATIVEAOT_RUNTIME_DIR}/unix/AsmOffsets.cpp) endif() add_custom_command( # The AsmOffsetsPortable.cs is consumed later by the managed build TARGET PortableRuntime COMMAND ${CMAKE_CXX_COMPILER} ${COMPILER_LANGUAGE} ${DEFINITIONS} ${PREPROCESSOR_FLAGS} -I"${ARCH_SOURCES_DIR}" "${ASM_OFFSETS_CSPP}" >"${CMAKE_CURRENT_BINARY_DIR}/AsmOffsetsPortable.cs" - DEPENDS "${RUNTIME_DIR}/AsmOffsets.cpp" "${RUNTIME_DIR}/AsmOffsets.h" + DEPENDS "${NATIVEAOT_RUNTIME_DIR}/AsmOffsets.cpp" "${NATIVEAOT_RUNTIME_DIR}/AsmOffsets.h" ) install_static_library(PortableRuntime aotsdk nativeaot) diff --git a/src/coreclr/nativeaot/Runtime/SyncClean.cpp b/src/coreclr/nativeaot/Runtime/SyncClean.cpp index 8204193f50f140..a856a584744acd 100644 --- a/src/coreclr/nativeaot/Runtime/SyncClean.cpp +++ b/src/coreclr/nativeaot/Runtime/SyncClean.cpp @@ -23,6 +23,6 @@ void SyncClean::CleanUp () { #ifdef FEATURE_CACHED_INTERFACE_DISPATCH // Update any interface dispatch caches that were unsafe to modify outside of this GC. - ReclaimUnusedInterfaceDispatchCaches(); + InterfaceDispatch_ReclaimUnusedInterfaceDispatchCaches(); #endif } diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.h b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.h new file mode 100644 index 00000000000000..eb9905ffca6383 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.h @@ -0,0 +1,7 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +#include // generated by the build from AsmOffsets.cpp +#include diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.inc b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.inc new file mode 100644 index 00000000000000..956d4d22e38313 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros_Shared.inc @@ -0,0 +1,6 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +; This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +include AsmMacros.inc \ No newline at end of file diff --git a/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S new file mode 100644 index 00000000000000..2500ea41767266 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.S @@ -0,0 +1,27 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include // generated by the build from AsmOffsets.cpp +#include + +// trick to avoid PLT relocation at runtime which corrupts registers +#define REL_C_FUNC(name) C_FUNC(name)@gotpcrel + +// Stub dispatch routine for dispatch to a vtable slot +LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + int 3 + // UNIXTODO: Implement this function + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation + int 3 +LEAF_END RhpVTableOffsetDispatch, _TEXT + +// Cache miss case, call the runtime to resolve the target and update the cache. +// Use universal transition helper to allow an exception to flow out of resolution +LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + // r11 contains indirection cell address already, so it will naturally be passed to RhpCidResolve + // the universal transition thunk as an argument to RhpCidResolve + mov r10, [rip + REL_C_FUNC(RhpCidResolve)] + jmp qword ptr [rip + REL_C_FUNC(RhpUniversalTransition_DebugStepTailCall)] + +LEAF_END RhpInterfaceDispatchSlow, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.asm b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.asm new file mode 100644 index 00000000000000..e1caae3adedbc0 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/amd64/CachedInterfaceDispatchAot.asm @@ -0,0 +1,39 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include AsmMacros.inc + +EXTERN RhpCidResolve : PROC +EXTERN RhpUniversalTransition_DebugStepTailCall : PROC + +EXTERN RhpCidResolve : PROC +EXTERN RhpUniversalTransition_DebugStepTailCall : PROC + + +;; Stub dispatch routine for dispatch to a vtable slot +LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + ;; r11 currently contains the indirection cell address. + ;; load rax to point to the vtable offset (which is stored in the m_pCache field). + mov rax, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset + ;; to get the address in the vtable of what we want to dereference + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation + add rax, [rcx] + + ;; Load the target address of the vtable into rax + mov rax, [rax] + + TAILJMP_RAX +LEAF_END RhpVTableOffsetDispatch, _TEXT + +;; Cache miss case, call the runtime to resolve the target and update the cache. +;; Use universal transition helper to allow an exception to flow out of resolution +LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + ;; r11 contains indirection cell address + lea r10, RhpCidResolve + jmp RhpUniversalTransition_DebugStepTailCall + +LEAF_END RhpInterfaceDispatchSlow, _TEXT + +end diff --git a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros_Shared.h b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros_Shared.h new file mode 100644 index 00000000000000..f67496574352de --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros_Shared.h @@ -0,0 +1,11 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +#ifdef TARGET_WINDOWS +#include "AsmMacros.h" +#else +#include +#include "AsmOffsets.inc" +#endif diff --git a/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S new file mode 100644 index 00000000000000..d2a1131c2c8686 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.S @@ -0,0 +1,46 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + .extern RhpCidResolve + .extern RhpUniversalTransition_DebugStepTailCall + +// +// Stub dispatch routine for dispatch to a vtable slot +// + LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + // x11 contains the interface dispatch cell address. + // load x12 to point to the vtable offset (which is stored in the m_pCache field). + ldr x12, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + // Load the MethodTable from the object instance in x0, and add it to the vtable offset + // to get the address in the vtable of what we want to dereference + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation + ldr x13, [x0] + add x12, x12, x13 + + // Load the target address of the vtable into x12 + ldr x12, [x12] + + br x12 + LEAF_END RhpVTableOffsetDispatch, _TEXT + +// +// Cache miss case, call the runtime to resolve the target and update the cache. +// Use universal transition helper to allow an exception to flow out of resolution. +// + LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + // x11 contains the interface dispatch cell address. + // Calling convention of the universal thunk is: + // xip0: target address for the thunk to call + // xip1: parameter of the thunk's target + PREPARE_EXTERNAL_VAR RhpCidResolve, xip0 + mov xip1, x11 + b C_FUNC(RhpUniversalTransition_DebugStepTailCall) + LEAF_END RhpInterfaceDispatchSlow, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.asm b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.asm new file mode 100644 index 00000000000000..5b97f041f75f1d --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/arm64/CachedInterfaceDispatchAot.asm @@ -0,0 +1,49 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + EXTERN RhpCidResolve + EXTERN RhpUniversalTransition_DebugStepTailCall + +;; +;; Stub dispatch routine for dispatch to a vtable slot +;; + LEAF_ENTRY RhpVTableOffsetDispatch + ;; x11 contains the interface dispatch cell address. + ;; load x12 to point to the vtable offset (which is stored in the m_pCache field). + ldr x12, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; Load the MethodTable from the object instance in x0, and add it to the vtable offset + ;; to get the address in the vtable of what we want to dereference + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation + ldr x13, [x0] + add x12, x12, x13 + + ;; Load the target address of the vtable into x12 + ldr x12, [x12] + + br x12 + LEAF_END RhpVTableOffsetDispatch + +;; +;; Cache miss case, call the runtime to resolve the target and update the cache. +;; Use universal transition helper to allow an exception to flow out of resolution. +;; + LEAF_ENTRY RhpInterfaceDispatchSlow + ;; x11 contains the interface dispatch cell address. + ;; Calling convention of the universal thunk is: + ;; xip0: target address for the thunk to call + ;; xip1: parameter of the thunk's target + ldr xip0, =RhpCidResolve + mov xip1, x11 + b RhpUniversalTransition_DebugStepTailCall + LEAF_END RhpInterfaceDispatchSlow + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + + END diff --git a/src/coreclr/nativeaot/Runtime/eventpipe/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/eventpipe/CMakeLists.txt index 6b55b13b76ae19..172d388a0b23a5 100644 --- a/src/coreclr/nativeaot/Runtime/eventpipe/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/eventpipe/CMakeLists.txt @@ -128,15 +128,15 @@ list(APPEND AOT_EVENTPIPE_SHIM_HEADERS list(APPEND AOT_EVENTPIPE_MANAGED_TO_NATIVE_SOURCES - ${RUNTIME_DIR}/eventpipeinternal.cpp - ${RUNTIME_DIR}/EnabledEventPipeInterface.cpp - ${RUNTIME_DIR}/runtimeeventinternal.cpp + ${NATIVEAOT_RUNTIME_DIR}/eventpipeinternal.cpp + ${NATIVEAOT_RUNTIME_DIR}/EnabledEventPipeInterface.cpp + ${NATIVEAOT_RUNTIME_DIR}/runtimeeventinternal.cpp ) if (FEATURE_EVENT_TRACE) list(APPEND AOT_EVENTTRACE_SOURCES - ${RUNTIME_DIR}/eventtrace.cpp - ${RUNTIME_DIR}/profheapwalkhelper.cpp + ${NATIVEAOT_RUNTIME_DIR}/eventtrace.cpp + ${NATIVEAOT_RUNTIME_DIR}/profheapwalkhelper.cpp ) # These are carry-overs from .NET Native and only included for ETW currently @@ -144,15 +144,15 @@ if (FEATURE_EVENT_TRACE) # gcheap : GCHeapDump, GCHeapSurvivalAndMovement - not prioritizing for nativeaot yet if (FEATURE_ETW) list(APPEND AOT_EVENTTRACE_SOURCES - ${RUNTIME_DIR}/eventtrace_bulktype.cpp - ${RUNTIME_DIR}/eventtrace_gcheap.cpp + ${NATIVEAOT_RUNTIME_DIR}/eventtrace_bulktype.cpp + ${NATIVEAOT_RUNTIME_DIR}/eventtrace_gcheap.cpp ) endif() if(CLR_CMAKE_TARGET_WIN32) - set_source_files_properties(${GEN_EVENTPIPE_PROVIDER_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") - set_source_files_properties(${GEN_EVENTPIPE_PLAT_AGNOSTIC_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") - set_source_files_properties(${AOT_EVENTTRACE_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") + set_source_files_properties(${GEN_EVENTPIPE_PROVIDER_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${NATIVEAOT_RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") + set_source_files_properties(${GEN_EVENTPIPE_PLAT_AGNOSTIC_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${NATIVEAOT_RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") + set_source_files_properties(${AOT_EVENTTRACE_SOURCES} PROPERTIES COMPILE_FLAGS "/FI\"${NATIVEAOT_RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h\"") endif() endif() @@ -174,7 +174,7 @@ set_target_properties(eventpipe-shared-objects PROPERTIES ) if (CLR_CMAKE_TARGET_WIN32) target_compile_options(eventpipe-shared-objects PRIVATE - "/FI${RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h") + "/FI${NATIVEAOT_RUNTIME_DIR}/eventpipe/NativeaotEventPipeSupport.h") # Install the compile PDB for the eventpipe unity builds. install(FILES "${CMAKE_CURRENT_BINARY_DIR}/$/eventpipe-shared-objects.pdb" DESTINATION aotsdk COMPONENT nativeaot) @@ -194,10 +194,10 @@ list(APPEND EVENTPIPE_SOURCES ) list(APPEND AOT_EVENTPIPE_DISABLED_SOURCES - ${RUNTIME_DIR}/DisabledEventPipeInterface.cpp - ${RUNTIME_DIR}/disabledeventpipeinternal.cpp - ${RUNTIME_DIR}/disabledeventtrace.cpp - ${RUNTIME_DIR}/disabledruntimeeventinternal.cpp + ${NATIVEAOT_RUNTIME_DIR}/DisabledEventPipeInterface.cpp + ${NATIVEAOT_RUNTIME_DIR}/disabledeventpipeinternal.cpp + ${NATIVEAOT_RUNTIME_DIR}/disabledeventtrace.cpp + ${NATIVEAOT_RUNTIME_DIR}/disabledruntimeeventinternal.cpp ${GEN_EVENTPIPE_PLAT_AGNOSTIC_DISABLED_SOURCES} ) diff --git a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h index b33f4376cbd893..563117596da1f1 100644 --- a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h +++ b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h @@ -28,6 +28,11 @@ struct DispatchCellInfo uint8_t HasCache = 0; uint32_t MetadataToken = 0; uint32_t VTableOffset = 0; + + uint32_t GetVTableOffset() const + { + return VTableOffset; + } }; struct InterfaceDispatchCacheHeader diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index 0c97a5e312436e..dffebe8bbde3c9 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -95,7 +95,7 @@ static bool InitDLL(HANDLE hPalInstance) // // Initialize interface dispatch. // - if (!InitializeInterfaceDispatch()) + if (!InterfaceDispatch_Initialize()) return false; #endif diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 7c6384ee655729..47f73c3214a6f7 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -3639,6 +3639,27 @@ Define_InterlockMethod( ((PVOID)(UINT_PTR)InterlockedCompareExchange((PLONG)(UINT_PTR)(Destination), (LONG)(UINT_PTR)(ExChange), (LONG)(UINT_PTR)(Comperand))) #endif +#if defined(HOST_64BIT) && defined(FEATURE_CACHED_INTERFACE_DISPATCH) +FORCEINLINE uint8_t _InterlockedCompareExchange128(int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) +{ + __int128_t iComparand = ((__int128_t)pComparandAndResult[1] << 64) + (uint64_t)pComparandAndResult[0]; + // TODO-LOONGARCH64: the 128-bit CAS is supported starting from the 3A6000 CPU (ISA1.1). + // When running on older hardware that doesn't support native CAS-128, the system falls back + // to a mutex-based approach via libatomic, which is not suitable for runtime requirements. + // + // TODO-RISCV64: double-check if libatomic's emulated CAS-128 works as expected once AOT applications are + // functional on linux-riscv64: https://github.com/dotnet/runtime/issues/106223. + // CAS-128 is natively supported starting with the Zacas extension in Linux 6.8; however, hardware support + // for RVA23 profile is not available at the time of writing. + // + // See https://github.com/dotnet/runtime/issues/109276. + __int128_t iResult = __sync_val_compare_and_swap((__int128_t volatile*)pDst, iComparand, ((__int128_t)iValueHigh << 64) + (uint64_t)iValueLow); + PAL_InterlockedOperationBarrier(); + pComparandAndResult[0] = (int64_t)iResult; pComparandAndResult[1] = (int64_t)(iResult >> 64); + return iComparand == iResult; +} +#endif + /*++ Function: MemoryBarrier diff --git a/src/coreclr/pal/inc/unixasmmacrosamd64.inc b/src/coreclr/pal/inc/unixasmmacrosamd64.inc index 31093a4073d2ed..a70aa048938abf 100644 --- a/src/coreclr/pal/inc/unixasmmacrosamd64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosamd64.inc @@ -23,6 +23,16 @@ C_FUNC(\Name) = . .endm +.macro ALTERNATE_ENTRY Name +#if defined(__APPLE__) + .alt_entry C_FUNC(\Name) + .private_extern C_FUNC(\Name) +#else + .global C_FUNC(\Name) +#endif +C_FUNC(\Name): +.endm + .macro LEAF_ENTRY Name, Section .global C_FUNC(\Name) #if defined(__APPLE__) diff --git a/src/coreclr/pal/inc/unixasmmacrosarm64.inc b/src/coreclr/pal/inc/unixasmmacrosarm64.inc index 9e86779d4511bc..1e9a8a1e2bba7e 100644 --- a/src/coreclr/pal/inc/unixasmmacrosarm64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosarm64.inc @@ -26,6 +26,17 @@ C_FUNC(\Name): .endm +.macro ALTERNATE_ENTRY Name +#if defined(__APPLE__) + .alt_entry C_FUNC(\Name) + .private_extern C_FUNC(\Name) +#else + .global C_FUNC(\Name) + .hidden C_FUNC(\Name) +#endif +C_FUNC(\Name): +.endm + .macro LEAF_ENTRY Name, Section .global C_FUNC(\Name) #if defined(__APPLE__) diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.cpp b/src/coreclr/runtime/CachedInterfaceDispatch.cpp similarity index 87% rename from src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.cpp rename to src/coreclr/runtime/CachedInterfaceDispatch.cpp index 2938ee70974073..891c2b94c0baa2 100644 --- a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.cpp +++ b/src/coreclr/runtime/CachedInterfaceDispatch.cpp @@ -8,30 +8,7 @@ // ============================================================================ #include "common.h" #ifdef FEATURE_CACHED_INTERFACE_DISPATCH - -#include "CommonTypes.h" -#include "CommonMacros.h" -#include "daccess.h" -#include "DebugMacrosExt.h" -#include "PalRedhawkCommon.h" -#include "PalRedhawk.h" -#include "rhassert.h" -#include "slist.h" -#include "holder.h" -#include "Crst.h" -#include "RedhawkWarnings.h" -#include "TargetPtrs.h" -#include "MethodTable.h" -#include "Range.h" -#include "allocheap.h" -#include "rhbinder.h" -#include "ObjectLayout.h" -#include "shash.h" -#include "TypeManager.h" -#include "RuntimeInstance.h" -#include "MethodTable.inl" -#include "CommonMacros.inl" - +#include "CachedInterfaceDispatchPal.h" #include "CachedInterfaceDispatch.h" // We always allocate cache sizes with a power of 2 number of entries. We have a maximum size we support, @@ -212,9 +189,6 @@ static InterfaceDispatchCache * g_rgFreeLists[CID_MAX_CACHE_SIZE_LOG2 + 1]; // it imposes too much space overhead on list entries on 64-bit (each is actually 16 bytes). static CrstStatic g_sListLock; -// The base memory allocator. -static AllocHeap * g_pAllocHeap = NULL; - // Each cache size has an associated stub used to perform lookup over that cache. extern "C" void RhpInterfaceDispatch1(); extern "C" void RhpInterfaceDispatch2(); @@ -269,10 +243,9 @@ static uintptr_t AllocateCache(uint32_t cCacheEntries, InterfaceDispatchCache * { if (pNewCellInfo->CellType == DispatchCellType::VTableOffset) { - ASSERT(pNewCellInfo->VTableOffset < InterfaceDispatchCell::IDC_MaxVTableOffsetPlusOne); *ppStub = (void *)&RhpVTableOffsetDispatch; - ASSERT(!InterfaceDispatchCell::IsCache(pNewCellInfo->VTableOffset)); - return pNewCellInfo->VTableOffset; + ASSERT(!InterfaceDispatchCell::IsCache(pNewCellInfo->GetVTableOffset())); + return pNewCellInfo->GetVTableOffset(); } ASSERT((cCacheEntries >= 1) && (cCacheEntries <= CID_MAX_CACHE_SIZE)); @@ -299,9 +272,8 @@ static uintptr_t AllocateCache(uint32_t cCacheEntries, InterfaceDispatchCache * if (pCache == NULL) { // No luck with the free list, allocate the cache from via the AllocHeap. - pCache = (InterfaceDispatchCache*)g_pAllocHeap->AllocAligned(sizeof(InterfaceDispatchCache) + - (sizeof(InterfaceDispatchCacheEntry) * cCacheEntries), - sizeof(void*) * 2); + pCache = (InterfaceDispatchCache*)InterfaceDispatch_AllocDoublePointerAligned(sizeof(InterfaceDispatchCache) + + (sizeof(InterfaceDispatchCacheEntry) * cCacheEntries)); if (pCache == NULL) return (uintptr_t)NULL; @@ -342,7 +314,7 @@ static uintptr_t AllocateCache(uint32_t cCacheEntries, InterfaceDispatchCache * // Discards a cache by adding it to a list of caches that may still be in use but will be made available for // re-allocation at the next GC. -static void DiscardCache(InterfaceDispatchCache * pCache) +void InterfaceDispatch_DiscardCache(InterfaceDispatchCache * pCache) { CID_COUNTER_INC(CacheDiscards); @@ -365,7 +337,7 @@ static void DiscardCache(InterfaceDispatchCache * pCache) if (pDiscardedCacheBlock != NULL) g_pDiscardedCacheFree = pDiscardedCacheBlock->m_pNext; else - pDiscardedCacheBlock = (DiscardedCacheBlock *)g_pAllocHeap->Alloc(sizeof(DiscardedCacheBlock)); + pDiscardedCacheBlock = (DiscardedCacheBlock *)InterfaceDispatch_AllocPointerAligned(sizeof(DiscardedCacheBlock)); if (pDiscardedCacheBlock != NULL) // if we did NOT get the memory, we leak the discarded block { @@ -379,7 +351,7 @@ static void DiscardCache(InterfaceDispatchCache * pCache) // Called during a GC to empty the list of discarded caches (which we can now guarantee aren't being accessed) // and sort the results into the free lists we maintain for each cache size. -void ReclaimUnusedInterfaceDispatchCaches() +void InterfaceDispatch_ReclaimUnusedInterfaceDispatchCaches() { // No need for any locks, we're not racing with any other threads any more. @@ -431,13 +403,9 @@ void ReclaimUnusedInterfaceDispatchCaches() } // One time initialization of interface dispatch. -bool InitializeInterfaceDispatch() +bool InterfaceDispatch_Initialize() { - g_pAllocHeap = new (nothrow) AllocHeap(); - if (g_pAllocHeap == NULL) - return false; - - if (!g_pAllocHeap->Init()) + if (!InterfaceDispatch_InitializePal()) return false; g_sListLock.Init(CrstInterfaceDispatchGlobalLists, CRST_DEFAULT); @@ -445,7 +413,7 @@ bool InitializeInterfaceDispatch() return true; } -FCIMPL4(PCODE, RhpUpdateDispatchCellCache, InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo) +PCODE InterfaceDispatch_UpdateDispatchCellCache(InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo) { // Attempt to update the cache with this new mapping (if we have any cache at all, the initial state // is none). @@ -511,35 +479,9 @@ FCIMPL4(PCODE, RhpUpdateDispatchCellCache, InterfaceDispatchCell * pCell, PCODE // value or the cache we just allocated (another thread performed an update first). InterfaceDispatchCache * pDiscardedCache = UpdateCellStubAndCache(pCell, pStub, newCacheValue); if (pDiscardedCache) - DiscardCache(pDiscardedCache); + InterfaceDispatch_DiscardCache(pDiscardedCache); return (PCODE)pTargetCode; } -FCIMPLEND - -FCIMPL2(PCODE, RhpSearchDispatchCellCache, InterfaceDispatchCell * pCell, MethodTable* pInstanceType) -{ - // This function must be implemented in native code so that we do not take a GC while walking the cache - InterfaceDispatchCache * pCache = (InterfaceDispatchCache*)pCell->GetCache(); - if (pCache != NULL) - { - InterfaceDispatchCacheEntry * pCacheEntry = pCache->m_rgEntries; - for (uint32_t i = 0; i < pCache->m_cEntries; i++, pCacheEntry++) - if (pCacheEntry->m_pInstanceType == pInstanceType) - return pCacheEntry->m_pTargetCode; - } - - return (PCODE)nullptr; -} -FCIMPLEND - -// Given a dispatch cell, get the type and slot associated with it. This function MUST be implemented -// in cooperative native code, as the m_pCache field on the cell is unsafe to access from managed -// code due to its use of the GC state as a lock, and as lifetime control -FCIMPL2(void, RhpGetDispatchCellInfo, InterfaceDispatchCell * pCell, DispatchCellInfo* pDispatchCellInfo) -{ - *pDispatchCellInfo = pCell->GetDispatchCellInfo(); -} -FCIMPLEND #endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.h b/src/coreclr/runtime/CachedInterfaceDispatch.h similarity index 63% rename from src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.h rename to src/coreclr/runtime/CachedInterfaceDispatch.h index ea0f7841164be1..690b1ebaf86be2 100644 --- a/src/coreclr/nativeaot/Runtime/CachedInterfaceDispatch.h +++ b/src/coreclr/runtime/CachedInterfaceDispatch.h @@ -7,10 +7,10 @@ // // ============================================================================ -#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +#ifndef __CACHEDINTERFACEDISPATCH_H__ +#define __CACHEDINTERFACEDISPATCH_H__ -bool InitializeInterfaceDispatch(); -void ReclaimUnusedInterfaceDispatchCaches(); +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH // Interface dispatch caches contain an array of these entries. An instance of a cache is paired with a stub // that implicitly knows how many entries are contained. These entries must be aligned to twice the alignment @@ -46,4 +46,30 @@ struct InterfaceDispatchCache }; #pragma warning(pop) +bool InterfaceDispatch_Initialize(); +PCODE InterfaceDispatch_UpdateDispatchCellCache(InterfaceDispatchCell * pCell, PCODE pTargetCode, MethodTable* pInstanceType, DispatchCellInfo *pNewCellInfo); +void InterfaceDispatch_ReclaimUnusedInterfaceDispatchCaches(); +void InterfaceDispatch_DiscardCache(InterfaceDispatchCache * pCache); +inline void InterfaceDispatch_DiscardCacheHeader(InterfaceDispatchCacheHeader * pCache) +{ + return InterfaceDispatch_DiscardCache((InterfaceDispatchCache*)pCache); +} + +inline PCODE InterfaceDispatch_SearchDispatchCellCache(InterfaceDispatchCell * pCell, MethodTable* pInstanceType) +{ + // This function must be implemented in native code so that we do not take a GC while walking the cache + InterfaceDispatchCache * pCache = (InterfaceDispatchCache*)pCell->GetCache(); + if (pCache != NULL) + { + InterfaceDispatchCacheEntry * pCacheEntry = pCache->m_rgEntries; + for (uint32_t i = 0; i < pCache->m_cEntries; i++, pCacheEntry++) + if (pCacheEntry->m_pInstanceType == pInstanceType) + return pCacheEntry->m_pTargetCode; + } + + return (PCODE)nullptr; +} + #endif // FEATURE_CACHED_INTERFACE_DISPATCH + +#endif // __CACHEDINTERFACEDISPATCH_H__ \ No newline at end of file diff --git a/src/coreclr/nativeaot/Runtime/amd64/StubDispatch.S b/src/coreclr/runtime/amd64/StubDispatch.S similarity index 68% rename from src/coreclr/nativeaot/Runtime/amd64/StubDispatch.S rename to src/coreclr/runtime/amd64/StubDispatch.S index 9e1239d1de0635..3af2bc6ac019b6 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/StubDispatch.S +++ b/src/coreclr/runtime/amd64/StubDispatch.S @@ -2,8 +2,9 @@ // The .NET Foundation licenses this file to you under the MIT license. .intel_syntax noprefix -#include // generated by the build from AsmOffsets.cpp -#include +#include "AsmMacros_Shared.h" + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH // trick to avoid PLT relocation at runtime which corrupts registers #define REL_C_FUNC(name) C_FUNC(name)@gotpcrel @@ -14,9 +15,9 @@ LEAF_ENTRY RhpInterfaceDispatch\entries, _TEXT - // r10 currently contains the indirection cell address. - // load r11 to point to the cache block. - mov r11, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] + // r11 currently contains the indirection cell address. + // load r10 to point to the cache block. + mov r10, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] // Load the MethodTable from the object instance in rdi. ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries @@ -27,14 +28,14 @@ LEAF_ENTRY RhpInterfaceDispatch\entries, _TEXT // For each entry in the cache, see if its MethodTable type matches the MethodTable in rax. // If so, call the second cache entry. If not, skip the InterfaceDispatchCacheEntry. .rept \entries - cmp rax, [r11 + CurrentOffset] + cmp rax, [r10 + CurrentOffset] jne 0f - jmp [r11 + CurrentOffset + 8] + jmp [r10 + CurrentOffset + 8] 0: CurrentOffset = CurrentOffset + 16 .endr - // r10 still contains the indirection cell address. + // r11 still contains the indirection cell address. jmp C_FUNC(RhpInterfaceDispatchSlow) LEAF_END RhpInterfaceDispatch\entries, _TEXT @@ -60,12 +61,6 @@ DEFINE_INTERFACE_DISPATCH_STUB 16 DEFINE_INTERFACE_DISPATCH_STUB 32 DEFINE_INTERFACE_DISPATCH_STUB 64 -// Stub dispatch routine for dispatch to a vtable slot -LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - // UNIXTODO: Implement this function - int 3 -LEAF_END RhpVTableOffsetDispatch, _TEXT - // Initial dispatch on an interface when we don't have a cache yet. LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch @@ -80,13 +75,5 @@ ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch LEAF_END RhpInitialInterfaceDispatch, _TEXT -// Cache miss case, call the runtime to resolve the target and update the cache. -// Use universal transition helper to allow an exception to flow out of resolution -LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT - // r10 contains indirection cell address, move to r11 where it will be passed by - // the universal transition thunk as an argument to RhpCidResolve - mov r11, r10 - mov r10, [rip + REL_C_FUNC(RhpCidResolve)] - jmp qword ptr [rip + REL_C_FUNC(RhpUniversalTransition_DebugStepTailCall)] -LEAF_END RhpInterfaceDispatchSlow, _TEXT +#endif // FEATURE_CACHED_INTERFACE_DISPATCH \ No newline at end of file diff --git a/src/coreclr/nativeaot/Runtime/amd64/StubDispatch.asm b/src/coreclr/runtime/amd64/StubDispatch.asm similarity index 61% rename from src/coreclr/nativeaot/Runtime/amd64/StubDispatch.asm rename to src/coreclr/runtime/amd64/StubDispatch.asm index b93d948ad5d1e1..1863a43c14720d 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/StubDispatch.asm +++ b/src/coreclr/runtime/amd64/StubDispatch.asm @@ -1,21 +1,19 @@ ;; Licensed to the .NET Foundation under one or more agreements. ;; The .NET Foundation licenses this file to you under the MIT license. -include AsmMacros.inc +include AsmMacros_Shared.inc ifdef FEATURE_CACHED_INTERFACE_DISPATCH - -EXTERN RhpCidResolve : PROC -EXTERN RhpUniversalTransition_DebugStepTailCall : PROC +EXTERN RhpInterfaceDispatchSlow : PROC ;; Macro that generates code to check a single cache entry. CHECK_CACHE_ENTRY macro entry NextLabel textequ @CatStr( Attempt, %entry+1 ) - cmp rax, [r11 + OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 16)] + cmp rax, [r10 + OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 16)] jne NextLabel - jmp qword ptr [r11 + OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 16) + 8] + jmp qword ptr [r10 + OFFSETOF__InterfaceDispatchCache__m_rgEntries + (entry * 16) + 8] NextLabel: endm @@ -31,9 +29,9 @@ LEAF_ENTRY StubName, _TEXT ;EXTERN CID_g_cInterfaceDispatches : DWORD ;inc [CID_g_cInterfaceDispatches] - ;; r10 currently contains the indirection cell address. - ;; load r11 to point to the cache block. - mov r11, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] + ;; r11 currently contains the indirection cell address. + ;; load r10 to point to the cache block. + mov r10, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] ;; Load the MethodTable from the object instance in rcx. ALTERNATE_ENTRY StubAVLocation @@ -45,7 +43,7 @@ CurrentEntry = 0 CurrentEntry = CurrentEntry + 1 endm - ;; r10 still contains the indirection cell address. + ;; r11 still contains the indirection cell address. jmp RhpInterfaceDispatchSlow @@ -71,23 +69,6 @@ DEFINE_INTERFACE_DISPATCH_STUB 16 DEFINE_INTERFACE_DISPATCH_STUB 32 DEFINE_INTERFACE_DISPATCH_STUB 64 -;; Stub dispatch routine for dispatch to a vtable slot -LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - ;; r10 currently contains the indirection cell address. - ;; load rax to point to the vtable offset (which is stored in the m_pCache field). - mov rax, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] - - ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset - ;; to get the address in the vtable of what we want to dereference - add rax, [rcx] - - ;; Load the target address of the vtable into rax - mov rax, [rax] - - TAILJMP_RAX -LEAF_END RhpVTableOffsetDispatch, _TEXT - - ;; Initial dispatch on an interface when we don't have a cache yet. LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch @@ -102,18 +83,6 @@ ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch LEAF_END RhpInitialInterfaceDispatch, _TEXT -;; Cache miss case, call the runtime to resolve the target and update the cache. -;; Use universal transition helper to allow an exception to flow out of resolution -LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT - ;; r10 contains indirection cell address, move to r11 where it will be passed by - ;; the universal transition thunk as an argument to RhpCidResolve - mov r11, r10 - lea r10, RhpCidResolve - jmp RhpUniversalTransition_DebugStepTailCall - -LEAF_END RhpInterfaceDispatchSlow, _TEXT - - endif ;; FEATURE_CACHED_INTERFACE_DISPATCH end diff --git a/src/coreclr/nativeaot/Runtime/arm/StubDispatch.S b/src/coreclr/runtime/arm/StubDispatch.S similarity index 98% rename from src/coreclr/nativeaot/Runtime/arm/StubDispatch.S rename to src/coreclr/runtime/arm/StubDispatch.S index 7c2f0bef20afdc..6b9344d3d748e2 100644 --- a/src/coreclr/nativeaot/Runtime/arm/StubDispatch.S +++ b/src/coreclr/runtime/arm/StubDispatch.S @@ -88,6 +88,7 @@ LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT ldr r12, [r12, #OFFSETOF__InterfaceDispatchCell__m_pCache] // Load the MethodTable from the object instance in r0. + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation ldr r1, [r0] // add the vtable offset to the MethodTable pointer diff --git a/src/coreclr/nativeaot/Runtime/arm64/StubDispatch.S b/src/coreclr/runtime/arm64/StubDispatch.S similarity index 66% rename from src/coreclr/nativeaot/Runtime/arm64/StubDispatch.S rename to src/coreclr/runtime/arm64/StubDispatch.S index 5d3d11cf4108f8..1155e6ac257a1a 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/StubDispatch.S +++ b/src/coreclr/runtime/arm64/StubDispatch.S @@ -1,14 +1,10 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -#include -#include "AsmOffsets.inc" +#include "AsmMacros_Shared.h" #ifdef FEATURE_CACHED_INTERFACE_DISPATCH - .extern RhpCidResolve - .extern RhpUniversalTransition_DebugStepTailCall - // Macro that generates code to check a single cache entry. .macro CHECK_CACHE_ENTRY entry // Check a single entry in the cache. @@ -83,37 +79,4 @@ b C_FUNC(RhpInterfaceDispatchSlow) LEAF_END RhpInitialInterfaceDispatch, _TEXT -// -// Stub dispatch routine for dispatch to a vtable slot -// - LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - // x11 contains the interface dispatch cell address. - // load x12 to point to the vtable offset (which is stored in the m_pCache field). - ldr x12, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] - - // Load the MethodTable from the object instance in x0, and add it to the vtable offset - // to get the address in the vtable of what we want to dereference - ldr x13, [x0] - add x12, x12, x13 - - // Load the target address of the vtable into x12 - ldr x12, [x12] - - br x12 - LEAF_END RhpVTableOffsetDispatch, _TEXT - -// -// Cache miss case, call the runtime to resolve the target and update the cache. -// Use universal transition helper to allow an exception to flow out of resolution. -// - LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT - // x11 contains the interface dispatch cell address. - // Calling convention of the universal thunk is: - // xip0: target address for the thunk to call - // xip1: parameter of the thunk's target - PREPARE_EXTERNAL_VAR RhpCidResolve, xip0 - mov xip1, x11 - b C_FUNC(RhpUniversalTransition_DebugStepTailCall) - LEAF_END RhpInterfaceDispatchSlow, _TEXT - #endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/nativeaot/Runtime/arm64/StubDispatch.asm b/src/coreclr/runtime/arm64/StubDispatch.asm similarity index 67% rename from src/coreclr/nativeaot/Runtime/arm64/StubDispatch.asm rename to src/coreclr/runtime/arm64/StubDispatch.asm index 93e6038f1047a0..697d3a10f52e01 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/StubDispatch.asm +++ b/src/coreclr/runtime/arm64/StubDispatch.asm @@ -1,14 +1,13 @@ ;; Licensed to the .NET Foundation under one or more agreements. ;; The .NET Foundation licenses this file to you under the MIT license. -#include "AsmMacros.h" +#include "AsmMacros_Shared.h" TEXTAREA #ifdef FEATURE_CACHED_INTERFACE_DISPATCH - EXTERN RhpCidResolve - EXTERN RhpUniversalTransition_DebugStepTailCall + EXTERN RhpInterfaceDispatchSlow ;; Macro that generates code to check a single cache entry. MACRO @@ -88,39 +87,6 @@ CurrentEntry SETA CurrentEntry + 1 b RhpInterfaceDispatchSlow LEAF_END RhpInitialInterfaceDispatch -;; -;; Stub dispatch routine for dispatch to a vtable slot -;; - LEAF_ENTRY RhpVTableOffsetDispatch - ;; x11 contains the interface dispatch cell address. - ;; load x12 to point to the vtable offset (which is stored in the m_pCache field). - ldr x12, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] - - ;; Load the MethodTable from the object instance in x0, and add it to the vtable offset - ;; to get the address in the vtable of what we want to dereference - ldr x13, [x0] - add x12, x12, x13 - - ;; Load the target address of the vtable into x12 - ldr x12, [x12] - - br x12 - LEAF_END RhpVTableOffsetDispatch - -;; -;; Cache miss case, call the runtime to resolve the target and update the cache. -;; Use universal transition helper to allow an exception to flow out of resolution. -;; - LEAF_ENTRY RhpInterfaceDispatchSlow - ;; x11 contains the interface dispatch cell address. - ;; Calling convention of the universal thunk is: - ;; xip0: target address for the thunk to call - ;; xip1: parameter of the thunk's target - ldr xip0, =RhpCidResolve - mov xip1, x11 - b RhpUniversalTransition_DebugStepTailCall - LEAF_END RhpInterfaceDispatchSlow - #endif // FEATURE_CACHED_INTERFACE_DISPATCH END diff --git a/src/coreclr/nativeaot/Runtime/i386/StubDispatch.S b/src/coreclr/runtime/i386/StubDispatch.S similarity index 100% rename from src/coreclr/nativeaot/Runtime/i386/StubDispatch.S rename to src/coreclr/runtime/i386/StubDispatch.S diff --git a/src/coreclr/nativeaot/Runtime/i386/StubDispatch.asm b/src/coreclr/runtime/i386/StubDispatch.asm similarity index 100% rename from src/coreclr/nativeaot/Runtime/i386/StubDispatch.asm rename to src/coreclr/runtime/i386/StubDispatch.asm diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S b/src/coreclr/runtime/loongarch64/StubDispatch.S similarity index 98% rename from src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S rename to src/coreclr/runtime/loongarch64/StubDispatch.S index 138992ef1a3294..a85cafa3389e00 100644 --- a/src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S +++ b/src/coreclr/runtime/loongarch64/StubDispatch.S @@ -92,6 +92,7 @@ // Load the MethodTable from the object instance in a0, and add it to the vtable offset // to get the address in the vtable of what we want to dereference + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation ld.d $t4, $a0, 0 add.d $t3, $t3, $t4 diff --git a/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S b/src/coreclr/runtime/riscv64/StubDispatch.S similarity index 100% rename from src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S rename to src/coreclr/runtime/riscv64/StubDispatch.S diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs index 593f0158536a5e..73fc45b06e2806 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs @@ -140,7 +140,7 @@ public enum ReadyToRunFixupKind VirtualEntry = 0x16, // For invoking a virtual method VirtualEntry_DefToken = 0x17, // Smaller version of VirtualEntry - method is def token VirtualEntry_RefToken = 0x18, // Smaller version of VirtualEntry - method is ref token - VirtualEntry_Slot = 0x19, // Smaller version of VirtualEntry - type & slot + VirtualEntry_Slot = 0x19, // Smaller version of VirtualEntry - type & slot - OBSOLETE, not currently used, and hasn't ever been used in R2R codegen since crossgen2 was introduced, and may not have ever been used. Helper = 0x1A, // Helper StringHandle = 0x1B, // String handle diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ArrayOfEmbeddedDataNode.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ArrayOfEmbeddedDataNode.cs index 801bd7dbe2ccbd..8fadcc5f3beb0f 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ArrayOfEmbeddedDataNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ArrayOfEmbeddedDataNode.cs @@ -75,10 +75,12 @@ protected virtual void GetElementDataForNodes(ref ObjectDataBuilder builder, Nod } } + protected virtual int GetAlignmentRequirement(NodeFactory factory) { return factory.Target.PointerSize; } + public override ObjectData GetData(NodeFactory factory, bool relocsOnly) { ObjectDataBuilder builder = new ObjectDataBuilder(factory, relocsOnly); - builder.RequireInitialPointerAlignment(); + builder.RequireInitialAlignment(GetAlignmentRequirement(factory)); if (_sorter != null) _nestedNodesList.MergeSort(_sorter); diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperImport.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperImport.cs index 54a85ff42e2cca..4e4e00e9fad763 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperImport.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperImport.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Collections.Generic; +using System.Diagnostics; using Internal.Text; using Internal.TypeSystem; @@ -66,6 +67,15 @@ public override void EncodeData(ref ObjectDataBuilder dataBuilder, NodeFactory f // when loaded by CoreCLR dataBuilder.EmitReloc(_delayLoadHelper, factory.Target.PointerSize == 4 ? RelocType.IMAGE_REL_BASED_HIGHLOW : RelocType.IMAGE_REL_BASED_DIR64, factory.Target.CodeDelta); + + if (Table.EntrySize == (factory.Target.PointerSize * 2)) + { + dataBuilder.EmitNaturalInt(0); + } + else + { + Debug.Assert(Table.EntrySize == factory.Target.PointerSize); + } } public override IEnumerable GetStaticDependencies(NodeFactory factory) diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperMethodImport.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperMethodImport.cs index eb996de2ac2b0a..4c73a0ab08bcab 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperMethodImport.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperMethodImport.cs @@ -28,7 +28,7 @@ public DelayLoadHelperMethodImport( MethodWithToken method, bool useVirtualCall, bool useInstantiatingStub, - Signature instanceSignature, + Signature instanceSignature, MethodDesc callingMethod = null) : base(factory, importSectionNode, helper, instanceSignature, useVirtualCall, useJumpableStub: false, callingMethod) { diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ImportSectionNode.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ImportSectionNode.cs index 45dc3fb4e240ab..e2356ea2816f84 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ImportSectionNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ImportSectionNode.cs @@ -12,11 +12,21 @@ public class ImportSectionNode : EmbeddedObjectNode { private class ImportTable : ArrayOfEmbeddedDataNode { - public ImportTable(string symbol) : base(symbol, nodeSorter: new EmbeddedObjectNodeComparer(CompilerComparer.Instance)) {} + private byte _alignment; + + public ImportTable(string symbol, byte alignment) : base(symbol, nodeSorter: new EmbeddedObjectNodeComparer(CompilerComparer.Instance)) + { + _alignment = alignment; + } public override bool ShouldSkipEmittingObjectNode(NodeFactory factory) => false; public override int ClassCode => (int)ObjectNodeOrder.ImportSectionNode; + + protected override int GetAlignmentRequirement(NodeFactory factory) + { + return _alignment; + } } private readonly ImportTable _imports; @@ -44,7 +54,7 @@ public ImportSectionNode(string name, ReadyToRunImportSectionType importType, Re _emitPrecode = emitPrecode; _emitGCRefMap = emitGCRefMap; - _imports = new ImportTable(_name + "_ImportBegin"); + _imports = new ImportTable(_name + "_ImportBegin", entrySize); _signatures = new ArrayOfEmbeddedPointersNode(_name + "_SigBegin", new EmbeddedObjectNodeComparer(CompilerComparer.Instance)); _signatureList = new List(); _gcRefMap = _emitGCRefMap ? new GCRefMapNode(this) : null; @@ -154,5 +164,7 @@ public override int CompareToImpl(ISortableNode other, CompilerComparer comparer { return _name.CompareTo(((ImportSectionNode)other)._name); } + + public int EntrySize => _entrySize; } } diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs index 507c34b6f899c0..e644439b2bec67 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs @@ -58,6 +58,7 @@ public sealed class NodeFactoryOptimizationFlags public TypeValidationRule TypeValidation; public int DeterminismStress; public bool PrintReproArgs; + public bool EnableCachedInterfaceDispatchSupport; } // To make the code future compatible to the composite R2R story @@ -307,7 +308,7 @@ private void CreateNodeCaches() { return new DelayLoadHelperMethodImport( this, - DispatchImports, + HelperImports, ReadyToRunHelper.DelayLoad_Helper_Obj, key.Method, useVirtualCall: false, @@ -867,7 +868,7 @@ bool HasAnyProfileDataForInput() "DispatchImports", ReadyToRunImportSectionType.StubDispatch, ReadyToRunImportSectionFlags.PCode, - (byte)Target.PointerSize, + this.OptimizationFlags.EnableCachedInterfaceDispatchSupport ? (byte)(2 * Target.PointerSize) : (byte)Target.PointerSize, emitPrecode: false, emitGCRefMap: true); ImportSectionsTable.AddEmbeddedObject(DispatchImports); diff --git a/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs b/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs index 6265d0df45b65b..64b78d35500ac5 100644 --- a/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs +++ b/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs @@ -39,6 +39,8 @@ internal class Crossgen2RootCommand : CliRootCommand new("--optimize-space", "--Os") { Description = SR.OptimizeSpaceOption }; public CliOption OptimizeTime { get; } = new("--optimize-time", "--Ot") { Description = SR.OptimizeSpeedOption }; + public CliOption EnableCachedInterfaceDispatchSupport { get; } = + new("--enable-cached-interface-dispatch-support", "--CID") { Description = SR.EnableCachedInterfaceDispatchSupport }; public CliOption TypeValidation { get; } = new("--type-validation") { DefaultValueFactory = _ => TypeValidationRule.Automatic, Description = SR.TypeValidation, HelpName = "arg" }; public CliOption InputBubble { get; } = @@ -163,6 +165,7 @@ public Crossgen2RootCommand(string[] args) : base(SR.Crossgen2BannerText) Options.Add(OptimizeDisabled); Options.Add(OptimizeSpace); Options.Add(OptimizeTime); + Options.Add(EnableCachedInterfaceDispatchSupport); Options.Add(TypeValidation); Options.Add(InputBubble); Options.Add(InputBubbleReferenceFilePaths); diff --git a/src/coreclr/tools/aot/crossgen2/Program.cs b/src/coreclr/tools/aot/crossgen2/Program.cs index 1453a6cf177521..0b5c00c95c6717 100644 --- a/src/coreclr/tools/aot/crossgen2/Program.cs +++ b/src/coreclr/tools/aot/crossgen2/Program.cs @@ -595,6 +595,7 @@ private void RunSingleCompilation(Dictionary inFilePaths, Instru nodeFactoryFlags.TypeValidation = Get(_command.TypeValidation); nodeFactoryFlags.DeterminismStress = Get(_command.DeterminismStress); nodeFactoryFlags.PrintReproArgs = Get(_command.PrintReproInstructions); + nodeFactoryFlags.EnableCachedInterfaceDispatchSupport = Get(_command.EnableCachedInterfaceDispatchSupport); builder .UseMapFile(Get(_command.Map)) diff --git a/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx b/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx index 59bfc796f397cb..04334beb795018 100644 --- a/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx +++ b/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx @@ -423,4 +423,7 @@ Number of nested occurrences of a potentially cyclic generic type to cut off - + + Enable support for cached interface dispatch + + \ No newline at end of file diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index 88fa4ac9f88414..61a210eb37ef36 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -1,10 +1,13 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON) +set(RUNTIME_DIR ../runtime) + # Needed due to the cmunged files being in the binary folders, the set(CMAKE_INCLUDE_CURRENT_DIR ON) is not enough include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${ARCH_SOURCES_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../interop/inc) include_directories(${CLR_SRC_NATIVE_DIR}) +include_directories(${RUNTIME_DIR}) # needed when zLib compression is used include_directories(${CLR_SRC_NATIVE_DIR}/libs/System.IO.Compression.Native) @@ -41,6 +44,9 @@ if(FEATURE_PERFTRACING) include_directories(${CORECLR_USEREVENTS_SHIM_DIR}) endif(FEATURE_PERFTRACING) +add_compile_definitions($<${FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH}:FEATURE_CACHED_INTERFACE_DISPATCH>) +add_compile_definitions($<${FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH}:FEATURE_VIRTUAL_STUB_DISPATCH>) + set(VM_SOURCES_DAC_AND_WKS_COMMON appdomain.cpp array.cpp @@ -289,6 +295,8 @@ set(VM_SOURCES_WKS assemblynative.cpp assemblyspec.cpp baseassemblyspec.cpp + ${RUNTIME_DIR}/CachedInterfaceDispatch.cpp + CachedInterfaceDispatchCoreclr.cpp cachelinealloc.cpp callconvbuilder.cpp callhelpers.cpp @@ -611,6 +619,7 @@ if(CLR_CMAKE_TARGET_WIN32) if(CLR_CMAKE_TARGET_ARCH_AMD64) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/AsmHelpers.asm + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm ${ARCH_SOURCES_DIR}/CallDescrWorkerAMD64.asm ${ARCH_SOURCES_DIR}/ComCallPreStub.asm ${ARCH_SOURCES_DIR}/GenericComCallStubs.asm @@ -623,6 +632,7 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/patchedcode.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm ${ARCH_SOURCES_DIR}/RedirectedHandledJITCase.asm + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.asm ${ARCH_SOURCES_DIR}/ThePreStubAMD64.asm ${ARCH_SOURCES_DIR}/thunktemplates.asm ${ARCH_SOURCES_DIR}/Context.asm @@ -653,7 +663,9 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) ${ARCH_SOURCES_DIR}/CallDescrWorkerARM64.asm ${ARCH_SOURCES_DIR}/patchedcode.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.asm ${ARCH_SOURCES_DIR}/thunktemplates.asm + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm ) set(VM_HEADERS_WKS_ARCH_ASM @@ -661,11 +673,13 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) ) endif() + set(ASM_SUFFIX asm) else(CLR_CMAKE_TARGET_WIN32) if(CLR_CMAKE_TARGET_ARCH_AMD64) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.S ${ARCH_SOURCES_DIR}/calldescrworkeramd64.S ${ARCH_SOURCES_DIR}/externalmethodfixupthunk.S ${ARCH_SOURCES_DIR}/getstate.S @@ -676,6 +690,7 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/redirectedhandledjitcase.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.S ${ARCH_SOURCES_DIR}/theprestubamd64.S ${ARCH_SOURCES_DIR}/thunktemplates.S ${ARCH_SOURCES_DIR}/Context.S @@ -704,9 +719,11 @@ else(CLR_CMAKE_TARGET_WIN32) elseif(CLR_CMAKE_TARGET_ARCH_ARM64) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S + ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.S ${ARCH_SOURCES_DIR}/calldescrworkerarm64.S ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.S ${ARCH_SOURCES_DIR}/thunktemplates.S ) elseif(CLR_CMAKE_TARGET_ARCH_LOONGARCH64) @@ -725,9 +742,9 @@ else(CLR_CMAKE_TARGET_WIN32) ) endif() + set(ASM_SUFFIX S) endif(CLR_CMAKE_TARGET_WIN32) - if(CLR_CMAKE_TARGET_ARCH_AMD64) set(VM_SOURCES_DAC_AND_WKS_ARCH ${ARCH_SOURCES_DIR}/cgenamd64.cpp diff --git a/src/coreclr/vm/CachedInterfaceDispatchCoreclr.cpp b/src/coreclr/vm/CachedInterfaceDispatchCoreclr.cpp new file mode 100644 index 00000000000000..66a359ffbd3b63 --- /dev/null +++ b/src/coreclr/vm/CachedInterfaceDispatchCoreclr.cpp @@ -0,0 +1,22 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" + +bool InterfaceDispatch_InitializePal() +{ + return true; +} + +// Allocate memory aligned at sizeof(void*)*2 boundaries +void *InterfaceDispatch_AllocDoublePointerAligned(size_t size) +{ + return (void*)SystemDomain::GetGlobalLoaderAllocator()->GetHighFrequencyHeap()->AllocAlignedMem(size, sizeof(TADDR) * 2); +} + +// Allocate memory aligned at sizeof(void*) boundaries + +void *InterfaceDispatch_AllocPointerAligned(size_t size) +{ + return (void*)SystemDomain::GetGlobalLoaderAllocator()->GetHighFrequencyHeap()->AllocAlignedMem(size, sizeof(TADDR)); +} diff --git a/src/coreclr/vm/CachedInterfaceDispatchPal.h b/src/coreclr/vm/CachedInterfaceDispatchPal.h new file mode 100644 index 00000000000000..b5fe783229271b --- /dev/null +++ b/src/coreclr/vm/CachedInterfaceDispatchPal.h @@ -0,0 +1,195 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __CACHEDINTERFACEDISPATCHPAL_H__ +#define __CACHEDINTERFACEDISPATCHPAL_H__ + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + +extern "C" void RhpInitialInterfaceDispatch(); + + +bool InterfaceDispatch_InitializePal(); + +// Allocate memory aligned at sizeof(void*)*2 boundaries +void *InterfaceDispatch_AllocDoublePointerAligned(size_t size); +// Allocate memory aligned at sizeof(void*) boundaries + +void *InterfaceDispatch_AllocPointerAligned(size_t size); + +enum Flags +{ + // The low 2 bits of the m_pCache pointer are treated specially so that we can avoid the need for + // extra fields on this type. + // OR if the m_pCache value is less than 0x1000 then this is a vtable offset and should be used as such + IDC_CachePointerPointsIsVTableOffset = 0x2, + IDC_CachePointerPointsAtCache = 0x0, + IDC_CachePointerMask = 0x3, + IDC_CachePointerMaskShift = 0x2, +}; + +enum class DispatchCellType +{ + InterfaceAndSlot = 0x0, + VTableOffset = 0x2, +}; + +struct DispatchCellInfo +{ +private: + static DispatchCellType CellTypeFromToken(DispatchToken token) + { + if (token.IsThisToken()) + { + return DispatchCellType::VTableOffset; + } + return DispatchCellType::InterfaceAndSlot; + } +public: + + DispatchCellInfo(DispatchToken token, bool hasCache) : + CellType(CellTypeFromToken(token)), + Token(token), + HasCache(hasCache ? 1 : 0) + { + + } + const DispatchCellType CellType; + const DispatchToken Token; + + uintptr_t GetVTableOffset() const + { + if (CellType == DispatchCellType::VTableOffset) + { + // The vtable offset is stored in a pointer sized field, but actually represents 2 values. + // 1. The offset of the first indirection to use. which is stored in the upper half of the + // pointer sized field (bits 16-31 of a 32 bit pointer, or bits 32-63 of a 64 bit pointer). + // + // 2. The offset of the second indirection, which is a stored is the upper half of the lower + // half of the pointer size field (bits 8-15 of a 32 bit pointer, or bits 16-31 of a 64 + // bit pointer) This second offset is always less than 255, so we only really need a single + // byte, and the assembly code on some architectures may take a dependency on that + // so the VTableOffsetToSlot function has a mask to ensure that it is only ever a single byte. + uint32_t slot = Token.GetSlotNumber(); + unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE; + unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE; + + uintptr_t offsetOfIndirectionPortion = (((uintptr_t)offsetOfIndirection) << ((TARGET_POINTER_SIZE * 8) / 2)); + uintptr_t offsetAfterIndirectionPortion = (((uintptr_t)offsetAfterIndirection) << ((TARGET_POINTER_SIZE * 8) / 4)); + uintptr_t flagPortion = (uintptr_t)IDC_CachePointerPointsIsVTableOffset; + + uintptr_t result = offsetOfIndirectionPortion | offsetAfterIndirectionPortion | flagPortion; + _ASSERTE(slot == VTableOffsetToSlot(result)); + return result; + } + return 0; + } + + static unsigned VTableOffsetToSlot(uintptr_t vtableOffset) + { + // See comment in GetVTableOffset() for what we're doing here. + unsigned offsetOfIndirection = (unsigned)(vtableOffset >> ((TARGET_POINTER_SIZE * 8) / 2)); + unsigned offsetAfterIndirection = (unsigned)(vtableOffset >> ((TARGET_POINTER_SIZE * 8) / 4)) & 0xFF; + unsigned slotGroupPerChunk = (offsetOfIndirection - MethodTable::GetVtableOffset()) / TARGET_POINTER_SIZE; + unsigned slot = (slotGroupPerChunk * VTABLE_SLOTS_PER_CHUNK) + (offsetAfterIndirection / TARGET_POINTER_SIZE); + return slot; + } + + const uint8_t HasCache = 0; +}; + +struct InterfaceDispatchCacheHeader +{ +private: + enum Flags + { + CH_TypeAndSlotIndex = 0x0, + CH_MetadataToken = 0x1, + CH_Mask = 0x3, + CH_Shift = 0x2, + }; + +public: + void Initialize(DispatchToken token) + { + m_token = token; + } + + void Initialize(const DispatchCellInfo *pNewCellInfo) + { + m_token = pNewCellInfo->Token; + } + + DispatchCellInfo GetDispatchCellInfo() + { + DispatchCellInfo cellInfo(m_token, true); + return cellInfo; + } + +private: + DispatchToken m_token; + TADDR padding; // Ensure that the size of this structure is a multiple of 2 pointers +}; + +// One of these is allocated per interface call site. It holds the stub to call, data to pass to that stub +// (cache information) and the interface contract, i.e. the interface type and slot being called. +struct InterfaceDispatchCell +{ + // The first two fields must remain together and at the beginning of the structure. This is due to the + // synchronization requirements of the code that updates these at runtime and the instructions generated + // by the binder for interface call sites. + TADDR m_pStub; // Call this code to execute the interface dispatch + Volatile m_pCache; // Context used by the stub above (one or both of the low two bits are set + // for initial dispatch, and if not set, using this as a cache pointer or + // as a vtable offset.) + DispatchCellInfo GetDispatchCellInfo() + { + // Capture m_pCache into a local for safe access (this is a volatile read of a value that may be + // modified on another thread while this function is executing.) + TADDR cachePointerValue = m_pCache; + + if (IsCache(cachePointerValue)) + { + return ((InterfaceDispatchCacheHeader*)cachePointerValue)->GetDispatchCellInfo(); + } + else if (DispatchToken::IsCachedInterfaceDispatchToken(cachePointerValue)) + { + return DispatchCellInfo(DispatchToken::FromCachedInterfaceDispatchToken(cachePointerValue), false); + } + else + { + _ASSERTE(IsVTableOffset(cachePointerValue)); + unsigned slot = DispatchCellInfo::VTableOffsetToSlot(cachePointerValue); + return DispatchCellInfo(DispatchToken::CreateDispatchToken(slot), false); + } + } + + static bool IsCache(TADDR value) + { + return (value & IDC_CachePointerMask) == 0; + } + + static bool IsVTableOffset(TADDR value) + { + return (value & IDC_CachePointerPointsIsVTableOffset) == IDC_CachePointerPointsIsVTableOffset; + } + + InterfaceDispatchCacheHeader* GetCache() const + { + // Capture m_pCache into a local for safe access (this is a volatile read of a value that may be + // modified on another thread while this function is executing.) + TADDR cachePointerValue = m_pCache; + if (IsCache(cachePointerValue)) + { + return (InterfaceDispatchCacheHeader*)cachePointerValue; + } + else + { + return nullptr; + } + } +}; + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + +#endif // __CACHEDINTERFACEDISPATCHPAL_H__ \ No newline at end of file diff --git a/src/coreclr/vm/amd64/AsmMacros_Shared.h b/src/coreclr/vm/amd64/AsmMacros_Shared.h new file mode 100644 index 00000000000000..87920d58b2ac65 --- /dev/null +++ b/src/coreclr/vm/amd64/AsmMacros_Shared.h @@ -0,0 +1,8 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +#include "unixasmmacros.inc" +#include "asmconstants.h" + diff --git a/src/coreclr/vm/amd64/AsmMacros_Shared.inc b/src/coreclr/vm/amd64/AsmMacros_Shared.inc new file mode 100644 index 00000000000000..c7e7ce2f562fbb --- /dev/null +++ b/src/coreclr/vm/amd64/AsmMacros_Shared.inc @@ -0,0 +1,7 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +; This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +include AsmConstants.inc +include AsmMacros.inc \ No newline at end of file diff --git a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S new file mode 100644 index 00000000000000..a3a45be29ddb75 --- /dev/null +++ b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S @@ -0,0 +1,71 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include "unixasmmacros.inc" +#include "asmconstants.h" + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + +// Stub dispatch routine for dispatch to a vtable slot +LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + // r11 currently contains the indirection cell address. + // load r11 to point to the vtable offset (which is stored in the m_pCache field). + mov r11, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] + + // r11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + // to get to the VTable chunk + mov rax, r11 + shr rax, 32 + + // Load the MethodTable from the object instance in rdi, and add it to the vtable offset + // to get the address in the vtable chunk list of what we want to dereference +ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation + add rax, [rdi] + + // Load the target address of the vtable chunk into rax + mov rax, [rax] + + // Compute the chunk offset + shr r11d, 16 + + // Load the target address of the virtual function into rax + mov rax, [rax + r11] + + TAILJMP_RAX +LEAF_END RhpVTableOffsetDispatch, _TEXT + +// On Input: +// r11 contains the address of the indirection cell +// [rsp+0] m_ReturnAddress: contains the return address of caller to stub +NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + lea rdi, [rsp + __PWTB_TransitionBlock] // pTransitionBlock + mov rsi, r11 // indirection cell + + call C_FUNC(CID_ResolveWorker) + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + TAILJMP_RAX + +NESTED_END RhpInterfaceDispatchSlow, _TEXT + +// On Input: +// r11 contains the address of the indirection cell (which is the MethodPtrAux field of the delegate) +NESTED_ENTRY CID_VirtualOpenDelegateDispatch, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + lea rdi, [rsp + __PWTB_TransitionBlock] // pTransitionBlock + mov rsi, r11 // indirection cell + + call C_FUNC(CID_VirtualOpenDelegateDispatchWorker) + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + TAILJMP_RAX + +NESTED_END CID_VirtualOpenDelegateDispatch, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm new file mode 100644 index 00000000000000..f5cacb3207e150 --- /dev/null +++ b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.asm @@ -0,0 +1,76 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +include +include AsmConstants.inc + +ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + extern CID_ResolveWorker:proc + extern CID_VirtualOpenDelegateDispatchWorker:proc + +;; Stub dispatch routine for dispatch to a vtable slot +LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + ;; r11 currently contains the indirection cell address. + ;; load r11 to point to the vtable offset (which is stored in the m_pCache field). + mov r11, [r11 + OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; r11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + ;; to get to the VTable chunk + mov rax, r11 + shr rax, 32 + + ;; Load the MethodTable from the object instance in rcx, and add it to the vtable offset + ;; to get the address in the vtable chunk list of what we want to dereference +ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation + add rax, [rcx] + + ;; Load the target address of the vtable chunk into rax + mov rax, [rax] + + ;; Compute the chunk offset + shr r11d, 16 + + ;; Load the target address of the virtual function into rax + mov rax, [rax + r11] + + TAILJMP_RAX +LEAF_END RhpVTableOffsetDispatch, _TEXT + +;; On Input: +;; r11 contains the address of the indirection cell +;; [rsp+0] m_ReturnAddress: contains the return address of caller to stub +NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT + + PROLOG_WITH_TRANSITION_BLOCK + + lea rcx, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock + mov rdx, r11 ; indirection cell + + call CID_ResolveWorker + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + TAILJMP_RAX + +NESTED_END RhpInterfaceDispatchSlow, _TEXT + +;; On Input: +;; r11 contains the address of the indirection cell (which is the MethodPtrAux field of the delegate) +;; [rsp+0] m_ReturnAddress: contains the return address of caller to stub +NESTED_ENTRY CID_VirtualOpenDelegateDispatch, _TEXT + + PROLOG_WITH_TRANSITION_BLOCK + + lea rcx, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock + mov rdx, r11 ; indirection cell + + call CID_VirtualOpenDelegateDispatchWorker + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + TAILJMP_RAX + +NESTED_END CID_VirtualOpenDelegateDispatch, _TEXT + +endif ;; FEATURE_CACHED_INTERFACE_DISPATCH + + end \ No newline at end of file diff --git a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm index b533789980c510..bbb19107e40715 100644 --- a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm +++ b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm @@ -4,6 +4,8 @@ include include AsmConstants.inc +ifdef FEATURE_VIRTUAL_STUB_DISPATCH + CHAIN_SUCCESS_COUNTER equ ?g_dispatch_cache_chain_success_counter@@3_KA extern VSD_ResolveWorker:proc @@ -83,4 +85,5 @@ Fail: LEAF_END ResolveWorkerChainLookupAsmStub, _TEXT +endif ;; FEATURE_VIRTUAL_STUB_DISPATCH end diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index a0dd103ee52b8c..07e3199ff67f85 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -538,6 +538,14 @@ ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForMethod == offsetof(CallCoun #define CallCountingStubData__TargetForThresholdReached 0x10 ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offsetof(CallCountingStubData, TargetForThresholdReached)) +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +#define OFFSETOF__InterfaceDispatchCache__m_rgEntries 0x20 +ASMCONSTANTS_C_ASSERT(OFFSETOF__InterfaceDispatchCache__m_rgEntries == offsetof(InterfaceDispatchCache, m_rgEntries)) + +#define OFFSETOF__InterfaceDispatchCell__m_pCache 0x08 +ASMCONSTANTS_C_ASSERT(OFFSETOF__InterfaceDispatchCell__m_pCache == offsetof(InterfaceDispatchCell, m_pCache)) +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + #ifdef PROFILING_SUPPORTED #define PROFILE_ENTER 0x1 #define PROFILE_LEAVE 0x2 diff --git a/src/coreclr/vm/amd64/excepamd64.cpp b/src/coreclr/vm/amd64/excepamd64.cpp index c679a67b996fc2..b25dd5c7f09fc8 100644 --- a/src/coreclr/vm/amd64/excepamd64.cpp +++ b/src/coreclr/vm/amd64/excepamd64.cpp @@ -599,26 +599,44 @@ AdjustContextForVirtualStub( PCODE f_IP = GetIP(pContext); - StubCodeBlockKind sk = RangeSectionStubManager::GetStubKind(f_IP); - - if (sk == STUB_CODE_BLOCK_VSD_DISPATCH_STUB) + bool isVirtualStubNullCheck = false; +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (VirtualCallStubManager::isCachedInterfaceDispatchStubAVLocation(f_IP)) { - if ((*PTR_DWORD(f_IP) & 0xffffff) != X64_INSTR_CMP_IND_THIS_REG_RAX) // cmp [THIS_REG], rax - { - _ASSERTE(!"AV in DispatchStub at unknown instruction"); - return FALSE; - } + isVirtualStubNullCheck = true; } - else - if (sk == STUB_CODE_BLOCK_VSD_RESOLVE_STUB) +#endif // FEATURE_CACHED_INTERFACE_DISPATCH +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + if (!isVirtualStubNullCheck) { - if ((*PTR_DWORD(f_IP) & 0xffffff) != X64_INSTR_MOV_RAX_IND_THIS_REG) // mov rax, [THIS_REG] + StubCodeBlockKind sk = RangeSectionStubManager::GetStubKind(f_IP); + + if (sk == STUB_CODE_BLOCK_VSD_DISPATCH_STUB) { - _ASSERTE(!"AV in ResolveStub at unknown instruction"); - return FALSE; + if ((*PTR_DWORD(f_IP) & 0xffffff) != X64_INSTR_CMP_IND_THIS_REG_RAX) // cmp [THIS_REG], rax + { + _ASSERTE(!"AV in DispatchStub at unknown instruction"); + } + else + { + isVirtualStubNullCheck = true; + } + } + else + if (sk == STUB_CODE_BLOCK_VSD_RESOLVE_STUB) + { + if ((*PTR_DWORD(f_IP) & 0xffffff) != X64_INSTR_MOV_RAX_IND_THIS_REG) // mov rax, [THIS_REG] + { + _ASSERTE(!"AV in ResolveStub at unknown instruction"); + } + else + { + isVirtualStubNullCheck = true; + } } } - else +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + if (!isVirtualStubNullCheck) { return FALSE; } diff --git a/src/coreclr/vm/amd64/virtualcallstubamd64.S b/src/coreclr/vm/amd64/virtualcallstubamd64.S index 09c2d608442564..822eaaf2718f6e 100644 --- a/src/coreclr/vm/amd64/virtualcallstubamd64.S +++ b/src/coreclr/vm/amd64/virtualcallstubamd64.S @@ -4,6 +4,8 @@ .intel_syntax noprefix #include "unixasmmacros.inc" +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + // This is the number of times a successful chain lookup will occur before the // entry is promoted to the front of the chain. This is declared as extern because // the default value (CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT) is defined in the header. @@ -87,3 +89,4 @@ Fail_RWCLAS: LEAF_END ResolveWorkerChainLookupAsmStub, _TEXT +#endif // FEATURE_VIRTUAL_STUB_DISPATCH \ No newline at end of file diff --git a/src/coreclr/vm/arm64/AsmMacros_Shared.h b/src/coreclr/vm/arm64/AsmMacros_Shared.h new file mode 100644 index 00000000000000..06a05595cb977e --- /dev/null +++ b/src/coreclr/vm/arm64/AsmMacros_Shared.h @@ -0,0 +1,13 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible + +#ifdef TARGET_WINDOWS +#include "ksarm64.h" +#include "asmconstants.h" +#include "asmmacros.h" +#else +#include "asmconstants.h" +#include "unixasmmacros.inc" +#endif diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S new file mode 100644 index 00000000000000..f0d7f3bf433017 --- /dev/null +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.S @@ -0,0 +1,75 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "asmconstants.h" +#include "unixasmmacros.inc" + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + +// +// Stub dispatch routine for dispatch to a vtable slot +// + LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + + // x11 currently contains the indirection cell address. + // load x11 to point to the vtable offset (which is stored in the m_pCache field). + ldr x11, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + // x11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + // to get to the VTable chunk + lsr x10, x11, #32 + + // Load the MethodTable from the object instance in x0, and add it to the vtable offset + // to get the address in the vtable chunk list of what we want to dereference + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation + ldr x9, [x0] + add x9, x10, x9 + + // Load the target address of the vtable chunk into x9 + ldr x9, [x9] + + // Compute the chunk offset + ubfx x10, x11, #16, #16 + + // Load the target address of the virtual function into x9 + ldr x9, [x9, x10] + + EPILOG_BRANCH_REG x9 + LEAF_END RhpVTableOffsetDispatch, _TEXT + +// +// Cache miss case, call the runtime to resolve the target and update the cache. +// x11 contains the interface dispatch cell address. +// + NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + add x0, sp, #__PWTB_TransitionBlock // pTransitionBlock + mov x1, x11 // indirection cell + + bl C_FUNC(CID_ResolveWorker) + + mov x9, x0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + EPILOG_BRANCH_REG x9 + NESTED_END RhpInterfaceDispatchSlow, _TEXT + +// x11 contains the address of the indirection cell (which is the MethodPtrAux field of the delegate) + NESTED_ENTRY CID_VirtualOpenDelegateDispatch, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + add x0, sp, #__PWTB_TransitionBlock // pTransitionBlock + mov x1, x11 // indirection cell + + bl C_FUNC(CID_VirtualOpenDelegateDispatchWorker) + + mov x9, x0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + EPILOG_BRANCH_REG x9 + NESTED_END CID_VirtualOpenDelegateDispatch, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm new file mode 100644 index 00000000000000..4b117a0336e6b2 --- /dev/null +++ b/src/coreclr/vm/arm64/CachedInterfaceDispatchCoreCLR.asm @@ -0,0 +1,83 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +#include "ksarm64.h" +#include "asmconstants.h" +#include "asmmacros.h" + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + TEXTAREA + + EXTERN CID_ResolveWorker + EXTERN CID_VirtualOpenDelegateDispatchWorker + +;; +;; Stub dispatch routine for dispatch to a vtable slot +;; + LEAF_ENTRY RhpVTableOffsetDispatch + + ;; x11 currently contains the indirection cell address. + ;; load x11 to point to the vtable offset (which is stored in the m_pCache field). + ldr x11, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + ;; x11 now contains the VTableOffset where the upper 32 bits are the offset to adjust + ;; to get to the VTable chunk + lsr x10, x11, #32 + + ;; Load the MethodTable from the object instance in x0, and add it to the vtable offset + ;; to get the address in the vtable chunk list of what we want to dereference + ALTERNATE_ENTRY RhpVTableOffsetDispatchAVLocation + ldr x9, [x0] + add x9, x10, x9 + + ;; Load the target address of the vtable chunk into x9 + ldr x9, [x9] + + ;; Compute the chunk offset + ubfx x10, x11, #16, #16 + + ;; Load the target address of the virtual function into x9 + ldr x9, [x9, x10] + + EPILOG_BRANCH_REG x9 + LEAF_END RhpVTableOffsetDispatch + +;; +;; Cache miss case, call the runtime to resolve the target and update the cache. +;; x11 contains the interface dispatch cell address. +;; + NESTED_ENTRY RhpInterfaceDispatchSlow + + PROLOG_WITH_TRANSITION_BLOCK + + add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock + mov x1, x11 ; indirection cell + + bl CID_ResolveWorker + + mov x9, x0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + EPILOG_BRANCH_REG x9 + NESTED_END RhpInterfaceDispatchSlow + +;; x11 contains the address of the indirection cell (which is the MethodPtrAux field of the delegate) + NESTED_ENTRY CID_VirtualOpenDelegateDispatch + + PROLOG_WITH_TRANSITION_BLOCK + + add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock + mov x1, x11 ; indirection cell + + bl CID_VirtualOpenDelegateDispatchWorker + + mov x9, x0 + + EPILOG_WITH_TRANSITION_BLOCK_TAILCALL + EPILOG_BRANCH_REG x9 + NESTED_END CID_VirtualOpenDelegateDispatch + +#endif ;; FEATURE_CACHED_INTERFACE_DISPATCH + + END diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index d71c71984a76bc..476bd6f7f0b3c9 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -173,12 +173,12 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__FixupPrecode == sizeof(FixupPrecode)); ASMCONSTANTS_C_ASSERT(MethodDesc_ALIGNMENT_SHIFT == MethodDesc::ALIGNMENT_SHIFT); //ASMCONSTANTS_C_ASSERT((1<GetEntryPoint(); } +extern "C" PCODE CID_VirtualOpenDelegateDispatch(TransitionBlock * pTransitionBlock); + static PCODE GetVirtualCallStub(MethodDesc *method, TypeHandle scopeType) { CONTRACTL @@ -967,12 +969,16 @@ static PCODE GetVirtualCallStub(MethodDesc *method, TypeHandle scopeType) COMPlusThrow(kNotSupportedException); } - // need to grab a virtual dispatch stub - // method can be on a canonical MethodTable, we need to allocate the stub on the loader allocator associated with the exact type instantiation. - VirtualCallStubManager *pVirtualStubManager = scopeType.GetMethodTable()->GetLoaderAllocator()->GetVirtualCallStubManager(); - PCODE pTargetCall = pVirtualStubManager->GetCallStub(scopeType, method); - _ASSERTE(pTargetCall); - return pTargetCall; + INTERFACE_DISPATCH_CACHED_OR_VSD( + return (PCODE)CID_VirtualOpenDelegateDispatch; + , + // need to grab a virtual dispatch stub + // method can be on a canonical MethodTable, we need to allocate the stub on the loader allocator associated with the exact type instantiation. + VirtualCallStubManager *pVirtualStubManager = scopeType.GetMethodTable()->GetLoaderAllocator()->GetVirtualCallStubManager(); + PCODE pTargetCall = pVirtualStubManager->GetCallStub(scopeType, method); + _ASSERTE(pTargetCall); + return pTargetCall; + ); } extern "C" BOOL QCALLTYPE Delegate_BindToMethodName(QCall::ObjectHandleOnStack d, QCall::ObjectHandleOnStack target, @@ -1781,6 +1787,19 @@ extern "C" void QCALLTYPE Delegate_Construct(QCall::ObjectHandleOnStack _this, Q END_QCALL; } +MethodDesc *COMDelegate::GetMethodDescForOpenVirtualDelegate(OBJECTREF orDelegate) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_COOPERATIVE; + } + CONTRACTL_END; + + return (MethodDesc*)((DELEGATEREF)orDelegate)->GetInvocationCount(); +} + MethodDesc *COMDelegate::GetMethodDesc(OBJECTREF orDelegate) { CONTRACTL @@ -1834,7 +1853,7 @@ MethodDesc *COMDelegate::GetMethodDesc(OBJECTREF orDelegate) } if (fOpenVirtualDelegate) - pMethodHandle = (MethodDesc*)thisDel->GetInvocationCount(); + pMethodHandle = GetMethodDescForOpenVirtualDelegate(thisDel); else pMethodHandle = FindDelegateInvokeMethod(thisDel->GetMethodTable()); } diff --git a/src/coreclr/vm/comdelegate.h b/src/coreclr/vm/comdelegate.h index 1b6ba80b7adb27..e7eca166258c94 100644 --- a/src/coreclr/vm/comdelegate.h +++ b/src/coreclr/vm/comdelegate.h @@ -67,6 +67,7 @@ class COMDelegate static Stub* GetInvokeMethodStub(EEImplMethodDesc* pMD); static MethodDesc * __fastcall GetMethodDesc(OBJECTREF obj); + static MethodDesc* GetMethodDescForOpenVirtualDelegate(OBJECTREF orDelegate); static OBJECTREF GetTargetObject(OBJECTREF obj); static BOOL IsTrueMulticastDelegate(OBJECTREF delegate); diff --git a/src/coreclr/vm/contractimpl.h b/src/coreclr/vm/contractimpl.h index f8d7d81856f467..c1784c4226a871 100644 --- a/src/coreclr/vm/contractimpl.h +++ b/src/coreclr/vm/contractimpl.h @@ -159,31 +159,58 @@ struct DispatchToken // token is really a DispatchTokenFat*, and to recover the pointer // we just shift left by 1; correspondingly, when storing a // DispatchTokenFat* in a DispatchToken, we shift right by 1. +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + static const UINT_PTR MASK_TYPE_ID = 0x00003FFF; +#else // FEATURE_CACHED_INTERFACE_DISPATCH static const UINT_PTR MASK_TYPE_ID = 0x00007FFF; +#endif // FEATURE_CACHED_INTERFACE_DISPATCH static const UINT_PTR MASK_SLOT_NUMBER = 0x0000FFFF; static const UINT_PTR SHIFT_TYPE_ID = 0x10; static const UINT_PTR SHIFT_SLOT_NUMBER = 0x0; #ifdef FAT_DISPATCH_TOKENS +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + static const UINT_PTR FAT_TOKEN_FLAG = 0x40000000; +#else static const UINT_PTR FAT_TOKEN_FLAG = 0x80000000; +#endif //FEATURE_CACHED_INTERFACE_DISPATCH #endif // FAT_DISPATCH_TOKENS +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + static const UINT_PTR INVALID_TOKEN = 0x3FFFFFFF; +#else // FEATURE_CACHED_INTERFACE_DISPATCH static const UINT_PTR INVALID_TOKEN = 0x7FFFFFFF; +#endif // FEATURE_CACHED_INTERFACE_DISPATCH #else //TARGET_64BIT static const UINT_PTR MASK_SLOT_NUMBER = UI64(0x000000000000FFFF); static const UINT_PTR SHIFT_TYPE_ID = 0x20; static const UINT_PTR SHIFT_SLOT_NUMBER = 0x0; +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +#ifdef FAT_DISPATCH_TOKENS + static const UINT_PTR MASK_TYPE_ID = UI64(0x000000003FFFFFFF); + static const UINT_PTR FAT_TOKEN_FLAG = UI64(0x4000000000000000); + static const UINT_PTR DISPATCH_TOKEN_FLAG= UI64(0x8000000000000000); +#else + static const UINT_PTR MASK_TYPE_ID = UI64(0x000000007FFFFFFF); + static const UINT_PTR DISPATCH_TOKEN_FLAG= UI64(0x8000000000000000); +#endif // FAT_DISPATCH_TOKENS +#else // FEATURE_CACHED_INTERFACE_DISPATCH #ifdef FAT_DISPATCH_TOKENS static const UINT_PTR MASK_TYPE_ID = UI64(0x000000007FFFFFFF); static const UINT_PTR FAT_TOKEN_FLAG = UI64(0x8000000000000000); #else static const UINT_PTR MASK_TYPE_ID = UI64(0x00000000FFFFFFFF); #endif // FAT_DISPATCH_TOKENS +#endif // FEATURE_CACHED_INTERFACE_DISPATCH +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + static const UINT_PTR INVALID_TOKEN = 0x3FFFFFFFFFFFFFFF; +#else // FEATURE_CACHED_INTERFACE_DISPATCH static const UINT_PTR INVALID_TOKEN = 0x7FFFFFFFFFFFFFFF; +#endif // FEATURE_CACHED_INTERFACE_DISPATCH #endif //TARGET_64BIT #ifdef FAT_DISPATCH_TOKENS @@ -242,13 +269,27 @@ struct DispatchToken public: #ifdef FAT_DISPATCH_TOKENS -#if !defined(TARGET_64BIT) - static const UINT32 MAX_TYPE_ID_SMALL = 0x00007FFF; -#else - static const UINT32 MAX_TYPE_ID_SMALL = 0x7FFFFFFF; -#endif + static const UINT32 MAX_TYPE_ID_SMALL = MASK_TYPE_ID; #endif // FAT_DISPATCH_TOKENS +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + //------------------------------------------------------------------------ + // A Cached Interface DispatchToken uses the low bit to indicate that it is a dispatch token, and not a cache entry + static inline BOOL IsCachedInterfaceDispatchToken(UINT_PTR maybeToken) + { + return maybeToken & 0x1; + } + static inline DispatchToken FromCachedInterfaceDispatchToken(UINT_PTR token) + { + return DispatchToken(token >> 1); + } + static inline UINT_PTR ToCachedInterfaceDispatchToken(DispatchToken token) + { + return (token.m_token << 1) | 0x1; + } +#endif + + //------------------------------------------------------------------------ DispatchToken() { diff --git a/src/coreclr/vm/dynamicmethod.cpp b/src/coreclr/vm/dynamicmethod.cpp index 7f51aa3690ef24..5285bc76e7af1c 100644 --- a/src/coreclr/vm/dynamicmethod.cpp +++ b/src/coreclr/vm/dynamicmethod.cpp @@ -15,6 +15,8 @@ #include "nibblemapmacros.h" #include "stringliteralmap.h" #include "virtualcallstub.h" +#include "CachedInterfaceDispatchPal.h" +#include "CachedInterfaceDispatch.h" #ifndef DACCESS_COMPILE @@ -954,6 +956,19 @@ void LCGMethodResolver::RecycleIndCells() cellcurr = list->indcell; _ASSERTE(cellcurr != NULL); +#if defined (FEATURE_CACHED_INTERFACE_DISPATCH) + // Cached dispatch dispatch uses dynamically allocated caches that need to be freed individually + if (UseCachedInterfaceDispatch()) + { + InterfaceDispatchCell *pDispatchCell = (InterfaceDispatchCell*)cellcurr; + InterfaceDispatchCacheHeader* cellCacheHeader = pDispatchCell->GetCache(); + if (cellCacheHeader != NULL) + { + InterfaceDispatch_DiscardCacheHeader(cellCacheHeader); + pDispatchCell->m_pCache = 0; + } + } +#endif if (cellprev) *((BYTE**)cellprev) = cellcurr; diff --git a/src/coreclr/vm/eeconfig.cpp b/src/coreclr/vm/eeconfig.cpp index bd07afadb861ff..967d0d4f94b23f 100644 --- a/src/coreclr/vm/eeconfig.cpp +++ b/src/coreclr/vm/eeconfig.cpp @@ -769,6 +769,11 @@ HRESULT EEConfig::sync() #if defined(FEATURE_GDBJIT_FRAME) fGDBJitEmitDebugFrame = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_GDBJitEmitDebugFrame) != 0; #endif + +#if defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) + fUseCachedInterfaceDispatch = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_UseCachedInterfaceDispatch) != 0; +#endif // defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) + return hr; } diff --git a/src/coreclr/vm/eeconfig.h b/src/coreclr/vm/eeconfig.h index 1f66a86fec1269..d46b4b5db4cfa3 100644 --- a/src/coreclr/vm/eeconfig.h +++ b/src/coreclr/vm/eeconfig.h @@ -124,6 +124,10 @@ class EEConfig } #endif // FEATURE_GDBJIT && _DEBUG +#if defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) + bool UseCachedInterfaceDispatch() const { LIMITED_METHOD_CONTRACT; return fUseCachedInterfaceDispatch; } +#endif // defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) + #if defined(FEATURE_GDBJIT_FRAME) inline bool ShouldEmitDebugFrame(void) const {LIMITED_METHOD_CONTRACT; return fGDBJitEmitDebugFrame;} #endif // FEATURE_GDBJIT_FRAME @@ -642,6 +646,11 @@ class EEConfig #if defined(FEATURE_GDBJIT_FRAME) bool fGDBJitEmitDebugFrame; #endif + +#if defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) + bool fUseCachedInterfaceDispatch; +#endif // defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) + public: enum BitForMask { diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 5734d184610eee..769c142e25888a 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6025,6 +6025,12 @@ BOOL IsIPinVirtualStub(PCODE f_IP) return FALSE; } +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (VirtualCallStubManager::isCachedInterfaceDispatchStubAVLocation(f_IP)) + return TRUE; +#endif + +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH StubCodeBlockKind sk = RangeSectionStubManager::GetStubKind(f_IP); if (sk == STUB_CODE_BLOCK_VSD_DISPATCH_STUB) @@ -6039,6 +6045,9 @@ BOOL IsIPinVirtualStub(PCODE f_IP) else { return FALSE; } +#else // FEATURE_VIRTUAL_STUB_DISPATCH + return FALSE; +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } // Check if the passed in instruction pointer is in one of the diff --git a/src/coreclr/vm/genericdict.cpp b/src/coreclr/vm/genericdict.cpp index 9d335522ca1eeb..eea8def4f4ed9c 100644 --- a/src/coreclr/vm/genericdict.cpp +++ b/src/coreclr/vm/genericdict.cpp @@ -1041,7 +1041,8 @@ Dictionary::PopulateEntry( if (fRequiresDispatchStub) { - // Generate a dispatch stub and store it in the dictionary. + LoaderAllocator * pDictLoaderAllocator = (pMT != NULL) ? pMT->GetLoaderAllocator() : pMD->GetLoaderAllocator(); + // Generate a dispatch stub and gather a slot. // // We generate an indirection so we don't have to write to the dictionary // when we do updates, and to simplify stub indirect callsites. Stubs stored in @@ -1053,16 +1054,12 @@ Dictionary::PopulateEntry( // dictionary entry to the caller, still using "call [eax]", and then the // stub dispatch mechanism can update the dictitonary itself and we don't // need an indirection. - LoaderAllocator * pDictLoaderAllocator = (pMT != NULL) ? pMT->GetLoaderAllocator() : pMD->GetLoaderAllocator(); - - VirtualCallStubManager * pMgr = pDictLoaderAllocator->GetVirtualCallStubManager(); - + // // We indirect through a cell so that updates can take place atomically. // The call stub and the indirection cell have the same lifetime as the dictionary itself, i.e. - // are allocated in the domain of the dicitonary. - PCODE addr = pMgr->GetCallStub(ownerType, methodSlot); + // are allocated in the domain of the dictionary. - result = (CORINFO_GENERIC_HANDLE)pMgr->GenerateStubIndirection(addr); + result = (CORINFO_GENERIC_HANDLE)GenerateDispatchStubCellEntrySlot(pDictLoaderAllocator, ownerType, methodSlot, NULL); break; } diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 87c0b89e593d04..5d85d98fc4acbc 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -5283,31 +5283,17 @@ void CEEInfo::getCallInfo( // We shouldn't be using GetLoaderAllocator here because for LCG, we need to get the // VirtualCallStubManager from where the stub will be used. // For normal methods there is no difference. - LoaderAllocator *pLoaderAllocator = m_pMethodBeingCompiled->GetLoaderAllocator(); - VirtualCallStubManager *pMgr = pLoaderAllocator->GetVirtualCallStubManager(); - - PCODE addr = pMgr->GetCallStub(exactType, pTargetMD); - // Now we want to indirect through a cell so that updates can take place atomically. + LoaderAllocator *pLoaderAllocator = m_pMethodBeingCompiled->GetLoaderAllocator(); + LCGMethodResolver *pResolver = NULL; if (m_pMethodBeingCompiled->IsLCGMethod()) { - // LCG methods should use recycled indcells to prevent leaks. - indcell = pMgr->GenerateStubIndirection(addr, TRUE); - - // Add it to the per DM list so that we can recycle them when the resolver is finalized - LCGMethodResolver *pResolver = m_pMethodBeingCompiled->AsDynamicMethodDesc()->GetLCGMethodResolver(); - pResolver->AddToUsedIndCellList(indcell); - } - else - { - // Normal methods should avoid recycled cells to preserve the locality of all indcells - // used by one method. - indcell = pMgr->GenerateStubIndirection(addr, FALSE); + pResolver = m_pMethodBeingCompiled->AsDynamicMethodDesc()->GetLCGMethodResolver(); } // We use an indirect call pResult->stubLookup.constLookup.accessType = IAT_PVALUE; - pResult->stubLookup.constLookup.addr = indcell; + pResult->stubLookup.constLookup.addr = GenerateDispatchStubCellEntryMethodDesc(m_pMethodBeingCompiled->GetLoaderAllocator(), exactType, pTargetMD, pResolver); } #endif // STUB_DISPATCH_PORTABLE } @@ -13677,25 +13663,6 @@ BOOL LoadDynamicInfoEntry(Module *currentModule, } break; - case ENCODE_VIRTUAL_ENTRY_SLOT: - { - DWORD slot = CorSigUncompressData(pBlob); - - TypeHandle ownerType = ZapSig::DecodeType(currentModule, pInfoModule, pBlob); - - LOG((LF_ZAP, LL_INFO100000, " Fixup stub dispatch\n")); - - VirtualCallStubManager * pMgr = currentModule->GetLoaderAllocator()->GetVirtualCallStubManager(); - - // - // We should be generating a stub indirection here, but the zapper already uses one level - // of indirection, i.e. we would have to return IAT_PPVALUE to the JIT, and on the whole the JITs - // aren't quite set up to accept that. Furthermore the call sequences would be different - at - // the moment an indirection cell uses "call [cell-addr]" on x86, and instead we would want the - // euqivalent of "call [[call-addr]]". This could perhaps be implemented as "call [eax]" - result = pMgr->GetCallStub(ownerType, slot); - } - break; #ifdef FEATURE_READYTORUN case ENCODE_READYTORUN_HELPER: { diff --git a/src/coreclr/vm/method.hpp b/src/coreclr/vm/method.hpp index 11e369e35e95f6..73db7083022bd9 100644 --- a/src/coreclr/vm/method.hpp +++ b/src/coreclr/vm/method.hpp @@ -2389,7 +2389,6 @@ inline MethodDescChunk *MethodDesc::GetMethodDescChunk() const MethodDesc* NonVirtualEntry2MethodDesc(PCODE entryPoint); - typedef DPTR(class StoredSigMethodDesc) PTR_StoredSigMethodDesc; class StoredSigMethodDesc : public MethodDesc { diff --git a/src/coreclr/vm/methodtable.h b/src/coreclr/vm/methodtable.h index 0149bc37fa6e5a..5bd6d77a70f807 100644 --- a/src/coreclr/vm/methodtable.h +++ b/src/coreclr/vm/methodtable.h @@ -1625,9 +1625,11 @@ class MethodTable typedef DPTR(VTableIndir2_t) VTableIndir_t; static DWORD GetIndexOfVtableIndirection(DWORD slotNum); + static DWORD GetStartSlotForVtableIndirection(UINT32 indirectionIndex, DWORD wNumVirtuals); static DWORD GetEndSlotForVtableIndirection(UINT32 indirectionIndex, DWORD wNumVirtuals); static UINT32 GetIndexAfterVtableIndirection(UINT32 slotNum); + static UINT32 IndexAfterVtableIndirectionToSlot(UINT32 slotNum); static DWORD GetNumVtableIndirections(DWORD wNumVirtuals); DPTR(VTableIndir_t) GetVtableIndirections(); DWORD GetNumVtableIndirections(); diff --git a/src/coreclr/vm/prestub.cpp b/src/coreclr/vm/prestub.cpp index cb7b0cea405f56..6458e86998ce71 100644 --- a/src/coreclr/vm/prestub.cpp +++ b/src/coreclr/vm/prestub.cpp @@ -23,6 +23,7 @@ #include "virtualcallstub.h" #include "../debug/ee/debugger.h" +#include "CachedInterfaceDispatchPal.h" #ifdef FEATURE_INTERPRETER #include "interpexec.h" #endif @@ -3222,8 +3223,10 @@ EXTERN_C PCODE STDCALL ExternalMethodFixupWorker(TransitionBlock * pTransitionBl } _ASSERTE(pImportSection != NULL); - _ASSERTE(pImportSection->EntrySize == sizeof(TADDR)); - COUNT_T index = (rva - pImportSection->Section.VirtualAddress) / sizeof(TADDR); + COUNT_T index; + + index = (rva - pImportSection->Section.VirtualAddress) / pImportSection->EntrySize; + _ASSERTE((pImportSection->EntrySize == sizeof(TADDR)) || (pImportSection->EntrySize == 2*sizeof(TADDR))); PTR_DWORD pSignatures = dac_cast(pNativeImage->GetRvaData(pImportSection->Signatures)); @@ -3342,15 +3345,6 @@ EXTERN_C PCODE STDCALL ExternalMethodFixupWorker(TransitionBlock * pTransitionBl goto VirtualEntry; } - case ENCODE_VIRTUAL_ENTRY_SLOT: - { - slot = CorSigUncompressData(pBlob); - pMT = ZapSig::DecodeType(pModule, pInfoModule, pBlob).GetMethodTable(); - - fVirtual = true; - break; - } - default: _ASSERTE(!"Unexpected CORCOMPILE_FIXUP_BLOB_KIND"); ThrowHR(COR_E_BADIMAGEFORMAT); @@ -3369,22 +3363,75 @@ EXTERN_C PCODE STDCALL ExternalMethodFixupWorker(TransitionBlock * pTransitionBl COMPlusThrow(kNullReferenceException); } - DispatchToken token; - if (pMT->IsInterface()) +#if defined(FEATURE_VIRTUAL_STUB_DISPATCH) && defined(FEATURE_CACHED_INTERFACE_DISPATCH) + if (UseCachedInterfaceDispatch()) +#endif +#if defined(FEATURE_CACHED_INTERFACE_DISPATCH) { + if (ALIGN_UP(rva, sizeof(TADDR) * 2) == rva && pImportSection->EntrySize == sizeof(TADDR) * 2) + { + // The entry is aligned and the size is correct, so we can use the cached interface dispatch mechanism + // to speed up further uses of this interface dispatch slot + DispatchToken token = VirtualCallStubManager::GetTokenFromOwnerAndSlot(pMT, slot); + + uintptr_t addr = (uintptr_t)RhpInitialInterfaceDispatch; + uintptr_t pCache = (uintptr_t)DispatchToken::ToCachedInterfaceDispatchToken(token); +#ifdef TARGET_64BIT + int64_t rgComparand[2] = { *(volatile int64_t*)pIndirection , *(((volatile int64_t*)pIndirection) + 1) }; + // We need to only update if the indirection cell is still pointing to the initial R2R stub + // But we don't have the address of the initial R2R stub, as that is part of the R2R image + // However, we can rely on the detail that the cache value will never be 0 once it is updated + // So we read the indirection cell data, and if the cache portion is 0, we attempt to update the complete cell + if (rgComparand[1] == 0 && PalInterlockedCompareExchange128((int64_t*)pIndirection, rgComparand[1], rgComparand[0], rgComparand) && rgComparand[1] == 0) + { + PalInterlockedCompareExchange128((int64_t*)pIndirection, pCache, addr, rgComparand); + } +#else + // Stuff the two pointers into a 64-bit value as the proposed new value for the CompareExchange64 below. + uint64_t oldValue = *(volatile uint64_t*)pIndirection; + if ((oldValue >> 32) == 0) + { + // The cache portion is 0, so we attempt to update the complete cell + int64_t iNewValue = (int64_t)((uint64_t)(uintptr_t)addr | ((uint64_t)(uintptr_t)pCache << 32)); + PalInterlockedCompareExchange64((int64_t*)pIndirection, iNewValue, oldValue); + } +#endif + } + + // We lost the race or the R2R image was generated without cached interface dispatch support, simply do the resolution in pure C++ + DispatchToken token; if (pMT->IsInterface()) + { token = pMT->GetLoaderAllocator()->GetDispatchToken(pMT->GetTypeID(), slot); + MethodTable* objectType = (*protectedObj)->GetMethodTable(); + VirtualCallStubManager::Resolver(objectType, token, protectedObj, &pCode, TRUE /* throwOnConflict */); + } else - token = DispatchToken::CreateDispatchToken(slot); - - StubCallSite callSite(pIndirection, pEMFrame->GetReturnAddress()); - pCode = pMgr->ResolveWorker(&callSite, protectedObj, token, STUB_CODE_BLOCK_VSD_LOOKUP_STUB); + { + pCode = (*protectedObj)->GetMethodTable()->GetRestoredSlot(slot); // Ensure that the target slot has an entrypoint + } } +#endif // FEATURE_CACHED_INTERFACE_DISPATCH +#if defined(FEATURE_VIRTUAL_STUB_DISPATCH) && defined(FEATURE_CACHED_INTERFACE_DISPATCH) else +#endif +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH { - pCode = pMgr->GetVTableCallStub(slot); - *(TADDR *)pIndirection = pCode; + DispatchToken token; + if (pMT->IsInterface()) + { + token = pMT->GetLoaderAllocator()->GetDispatchToken(pMT->GetTypeID(), slot); + + StubCallSite callSite(pIndirection, pEMFrame->GetReturnAddress()); + pCode = pMgr->ResolveWorker(&callSite, protectedObj, token, STUB_CODE_BLOCK_VSD_LOOKUP_STUB); + } + else + { + pCode = pMgr->GetVTableCallStub(slot); + *(TADDR *)pIndirection = pCode; + } } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH _ASSERTE(pCode != (PCODE)NULL); } else @@ -3781,7 +3828,7 @@ PCODE DynamicHelperFixup(TransitionBlock * pTransitionBlock, TADDR * pCell, DWOR _ASSERTE(pImportSection->EntrySize == sizeof(TADDR)); - COUNT_T index = (rva - pImportSection->Section.VirtualAddress) / sizeof(TADDR); + COUNT_T index = (rva - pImportSection->Section.VirtualAddress) / pImportSection->EntrySize; PTR_DWORD pSignatures = dac_cast(pNativeImage->GetRvaData(pImportSection->Signatures)); diff --git a/src/coreclr/vm/readytoruninfo.cpp b/src/coreclr/vm/readytoruninfo.cpp index 34cd10559aa1ba..bbc5948412df14 100644 --- a/src/coreclr/vm/readytoruninfo.cpp +++ b/src/coreclr/vm/readytoruninfo.cpp @@ -485,7 +485,7 @@ static bool AcquireImage(Module * pModule, PEImageLayout * pLayout, READYTORUN_H // Found an eager fixup section. Check the signature of each fixup in this section. PVOID *pFixups = (PVOID *)((PBYTE)pLayout->GetBase() + pCurSection->Section.VirtualAddress); - DWORD nFixups = pCurSection->Section.Size / TARGET_POINTER_SIZE; + DWORD nFixups = pCurSection->Section.Size / pCurSection->EntrySize; DWORD *pSignatures = (DWORD *)((PBYTE)pLayout->GetBase() + pCurSection->Signatures); for (DWORD i = 0; i < nFixups; i++) { diff --git a/src/coreclr/vm/riscv64/asmconstants.h b/src/coreclr/vm/riscv64/asmconstants.h index 692e2901da3778..f817791062aba2 100644 --- a/src/coreclr/vm/riscv64/asmconstants.h +++ b/src/coreclr/vm/riscv64/asmconstants.h @@ -152,8 +152,6 @@ ASMCONSTANTS_C_ASSERT(FaultingExceptionFrame__m_fFilterExecuted == offsetof(Faul ASMCONSTANTS_C_ASSERT(SIZEOF__FixupPrecode == sizeof(FixupPrecode)); ASMCONSTANTS_C_ASSERT(MethodDesc_ALIGNMENT_SHIFT == MethodDesc::ALIGNMENT_SHIFT); -#define ResolveCacheElem__pMT 0x00 -#define ResolveCacheElem__token 0x08 #define ResolveCacheElem__target 0x10 #define ResolveCacheElem__pNext 0x18 ASMCONSTANTS_C_ASSERT(ResolveCacheElem__target == offsetof(ResolveCacheElem, target)); diff --git a/src/coreclr/vm/stubmgr.cpp b/src/coreclr/vm/stubmgr.cpp index df6b385d30ccd8..4b652513b6aade 100644 --- a/src/coreclr/vm/stubmgr.cpp +++ b/src/coreclr/vm/stubmgr.cpp @@ -1505,10 +1505,12 @@ BOOL RangeSectionStubManager::CheckIsStub_Internal(PCODE stubStartAddress) case STUB_CODE_BLOCK_JUMPSTUB: case STUB_CODE_BLOCK_STUBLINK: case STUB_CODE_BLOCK_METHOD_CALL_THUNK: +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH case STUB_CODE_BLOCK_VSD_DISPATCH_STUB: case STUB_CODE_BLOCK_VSD_RESOLVE_STUB: case STUB_CODE_BLOCK_VSD_LOOKUP_STUB: case STUB_CODE_BLOCK_VSD_VTABLE_STUB: +#endif // FEATURE_VIRTUAL_STUB_DISPATCH return TRUE; default: break; @@ -1540,11 +1542,13 @@ BOOL RangeSectionStubManager::DoTraceStub(PCODE stubStartAddress, TraceDestinati case STUB_CODE_BLOCK_STUBLINK: return StubLinkStubManager::g_pManager->DoTraceStub(stubStartAddress, trace); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH case STUB_CODE_BLOCK_VSD_DISPATCH_STUB: case STUB_CODE_BLOCK_VSD_RESOLVE_STUB: case STUB_CODE_BLOCK_VSD_LOOKUP_STUB: case STUB_CODE_BLOCK_VSD_VTABLE_STUB: return VirtualCallStubManagerManager::GlobalManager()->DoTraceStub(stubStartAddress, trace); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH case STUB_CODE_BLOCK_METHOD_CALL_THUNK: #ifdef DACCESS_COMPILE @@ -1580,6 +1584,7 @@ LPCWSTR RangeSectionStubManager::GetStubManagerName(PCODE addr) case STUB_CODE_BLOCK_METHOD_CALL_THUNK: return W("MethodCallThunk"); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH case STUB_CODE_BLOCK_VSD_DISPATCH_STUB: return W("VSD_DispatchStub"); @@ -1591,6 +1596,7 @@ LPCWSTR RangeSectionStubManager::GetStubManagerName(PCODE addr) case STUB_CODE_BLOCK_VSD_VTABLE_STUB: return W("VSD_VTableStub"); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH default: break; @@ -2206,7 +2212,9 @@ VirtualCallStubManager::DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags) WRAPPER_NO_CONTRACT; DAC_ENUM_VTHIS(); EMEM_OUT(("MEM: %p VirtualCallStubManager\n", dac_cast(this))); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH GetCacheEntryRangeList()->EnumMemoryRegions(flags); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } #if defined(TARGET_X86) && !defined(UNIX_X86_ABI) diff --git a/src/coreclr/vm/stubmgr.h b/src/coreclr/vm/stubmgr.h index a79ad2eb88110a..4a9b449ff2e97c 100644 --- a/src/coreclr/vm/stubmgr.h +++ b/src/coreclr/vm/stubmgr.h @@ -814,6 +814,21 @@ class StubManagerHelpers #endif } +#if !defined(TARGET_X86) + static TADDR GetIndirectionCellArg(T_CONTEXT *pContext) + { +#if defined(TARGET_AMD64) + return pContext->R11; +#elif defined(TARGET_ARM) + return pContext->R4; +#elif defined(TARGET_ARM64) + return pContext->X11; +#else + PORTABILITY_ASSERT("StubManagerHelpers::GetIndirectionCellArg"); + return (TADDR)NULL; +#endif + } +#endif // !defined(TARGET_X86) }; #endif // !__stubmgr_h__ diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index a2bfb6f2c49604..4b84dd4b5d6576 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -15,6 +15,9 @@ #include "common.h" #include "array.h" +#include "CachedInterfaceDispatchPal.h" +#include "CachedInterfaceDispatch.h" +#include "comdelegate.h" #ifdef FEATURE_PERFMAP #include "perfmap.h" @@ -95,14 +98,51 @@ extern size_t g_dispatch_cache_chain_success_counter; SPTR_IMPL_INIT(VirtualCallStubManagerManager, VirtualCallStubManagerManager, g_pManager, NULL); +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +struct CachedIndirectionCellBlockListNode +{ + CachedIndirectionCellBlockListNode *m_pNext; + TADDR m_pFiller; // Used to ensure that the Indirection Cells are double pointer aligned + InterfaceDispatchCell m_rgIndCells[0]; +}; +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + #ifndef DACCESS_COMPILE +BYTE* GenerateDispatchStubCellEntryMethodDesc(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, MethodDesc *pMD, LCGMethodResolver *pResolver) +{ + return GenerateDispatchStubCellEntrySlot(pLoaderAllocator, ownerType, pMD->GetSlot(), pResolver); +} + +BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, int methodSlot, LCGMethodResolver *pResolver) +{ + VirtualCallStubManager * pMgr = pLoaderAllocator->GetVirtualCallStubManager(); + + DispatchToken token = VirtualCallStubManager::GetTokenFromOwnerAndSlot(ownerType, methodSlot); + + PCODE addr; + INTERFACE_DISPATCH_CACHED_OR_VSD( + addr = (PCODE)RhpInitialInterfaceDispatch // Always use the initial dispatch stub for cached interface dispatch + , + addr = pMgr->GetCallStub(token)) // Acquire a stub which is token specific in the VSD case + + BYTE* indcell = pMgr->GenerateStubIndirection(addr, token, pResolver != NULL); + + if (pResolver != NULL) + { + pResolver->AddToUsedIndCellList(indcell); + } + + return indcell; +} + #ifdef STUB_LOGGING UINT32 STUB_MISS_COUNT_VALUE = 100; UINT32 STUB_COLLIDE_WRITE_PCT = 100; UINT32 STUB_COLLIDE_MONO_PCT = 0; #endif // STUB_LOGGING +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH FastTable::NumCallStubs_t FastTable::NumCallStubs; FastTable* BucketTable::dead = NULL; //linked list of the abandoned buckets @@ -110,6 +150,7 @@ FastTable* BucketTable::dead = NULL; //linked list of the abandoned buckets DispatchCache *g_resolveCache = NULL; //cache of dispatch stubs for in line lookup by resolve stubs. size_t g_dispatch_cache_chain_success_counter = CALL_STUB_CACHE_INITIAL_SUCCESS_COUNT; +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #ifdef STUB_LOGGING UINT32 g_resetCacheCounter; @@ -179,7 +220,9 @@ void VirtualCallStubManager::LoggingDump() it.Current()->LogStats(); } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH g_resolveCache->LogStats(); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // Temp space to use for formatting the output. static const int FMT_STR_SIZE = 160; @@ -363,9 +406,9 @@ void VirtualCallStubManager::LoggingDump() sprintf_s(szPrintStr, ARRAY_SIZE(szPrintStr), "\r\ncache data\r\n"); WriteFile (g_hStubLogFile, szPrintStr, (DWORD) strlen(szPrintStr), &dwWriteByte, NULL); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH size_t total, used; g_resolveCache->GetLoadFactor(&total, &used); - sprintf_s(szPrintStr, ARRAY_SIZE(szPrintStr), OUTPUT_FORMAT_SIZE, "cache_entry_used", used); WriteFile (g_hStubLogFile, szPrintStr, (DWORD) strlen(szPrintStr), &dwWriteByte, NULL); sprintf_s(szPrintStr, ARRAY_SIZE(szPrintStr), OUTPUT_FORMAT_INT, "cache_entry_counter", g_cache_entry_counter); @@ -402,6 +445,7 @@ void VirtualCallStubManager::LoggingDump() sprintf_s(szPrintStr, ARRAY_SIZE(szPrintStr), "\r\n"); WriteFile (g_hStubLogFile, szPrintStr, (DWORD) strlen(szPrintStr), &dwWriteByte, NULL); #endif // STUB_LOGGING +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #if 0 for (unsigned i = 0; i < ContractImplMap::max_delta_count; i++) @@ -456,6 +500,7 @@ void VirtualCallStubManager::ResetCache() } CONTRACTL_END +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH g_resolveCache->LogStats(); g_insert_cache_external = 0; @@ -475,7 +520,7 @@ void VirtualCallStubManager::ResetCache() { it.UnlinkEntry(); } - +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) @@ -494,6 +539,7 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) m_indCellLock.Init(CrstVSDIndirectionCellLock, CRST_UNSAFE_ANYMODE); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH // // Now allocate all BucketTables // @@ -503,6 +549,7 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) NewHolder lookups_holder(new BucketTable(CALL_STUB_MIN_BUCKETS)); NewHolder vtableCallers_holder(new BucketTable(CALL_STUB_MIN_BUCKETS)); NewHolder cache_entries_holder(new BucketTable(CALL_STUB_MIN_BUCKETS)); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // // Now allocate our LoaderHeaps @@ -514,8 +561,10 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) // DWORD indcell_heap_reserve_size; DWORD indcell_heap_commit_size; +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH DWORD cache_entry_heap_reserve_size; DWORD cache_entry_heap_commit_size; +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // // Setup an expected number of items to commit and reserve @@ -526,7 +575,9 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) // indcell_heap_commit_size = 16; indcell_heap_reserve_size = 2000; +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH cache_entry_heap_commit_size = 16; cache_entry_heap_reserve_size = 800; +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // // Convert the number of items into a size in bytes to commit and reserve @@ -534,8 +585,10 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) indcell_heap_reserve_size *= sizeof(void *); indcell_heap_commit_size *= sizeof(void *); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH cache_entry_heap_reserve_size *= sizeof(ResolveCacheElem); cache_entry_heap_commit_size *= sizeof(ResolveCacheElem); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // // Align up all of the commit and reserve sizes @@ -543,15 +596,20 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) indcell_heap_reserve_size = (DWORD) ALIGN_UP(indcell_heap_reserve_size, GetOsPageSize()); indcell_heap_commit_size = (DWORD) ALIGN_UP(indcell_heap_commit_size, GetOsPageSize()); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH cache_entry_heap_reserve_size = (DWORD) ALIGN_UP(cache_entry_heap_reserve_size, GetOsPageSize()); cache_entry_heap_commit_size = (DWORD) ALIGN_UP(cache_entry_heap_commit_size, GetOsPageSize()); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH BYTE * initReservedMem = NULL; if (!m_loaderAllocator->IsCollectible()) { - DWORD dwTotalReserveMemSizeCalc = indcell_heap_reserve_size + - cache_entry_heap_reserve_size; + DWORD dwTotalReserveMemSizeCalc = indcell_heap_reserve_size +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + + cache_entry_heap_reserve_size +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + ; DWORD dwTotalReserveMemSize = (DWORD) ALIGN_UP(dwTotalReserveMemSizeCalc, VIRTUAL_ALLOC_RESERVE_GRANULARITY); @@ -567,12 +625,20 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) DWORD cPagesRemainder = cWastedPages % 2; // We'll throw this at the cache entry heap indcell_heap_reserve_size += cPagesPerHeap * GetOsPageSize(); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH cache_entry_heap_reserve_size += (cPagesPerHeap + cPagesRemainder) * GetOsPageSize(); +#else + indcell_heap_reserve_size += (cPagesPerHeap + cPagesRemainder) * GetOsPageSize(); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH CONSISTENCY_CHECK((indcell_heap_reserve_size + cache_entry_heap_reserve_size)== dwTotalReserveMemSize); +#else + CONSISTENCY_CHECK(indcell_heap_reserve_size == dwTotalReserveMemSize); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } initReservedMem = (BYTE*)ExecutableAllocator::Instance()->Reserve(dwTotalReserveMemSize); @@ -587,12 +653,20 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) indcell_heap_reserve_size = GetOsPageSize(); indcell_heap_commit_size = GetOsPageSize(); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH cache_entry_heap_reserve_size = GetOsPageSize(); cache_entry_heap_commit_size = GetOsPageSize(); +#else + // If we don't support VSD, use a slightly bigger heap size to avoid wasting memory + indcell_heap_reserve_size = 2 * GetOsPageSize(); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #ifdef _DEBUG - DWORD dwTotalReserveMemSizeCalc = indcell_heap_reserve_size + - cache_entry_heap_reserve_size; + DWORD dwTotalReserveMemSizeCalc = indcell_heap_reserve_size +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + + cache_entry_heap_reserve_size +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + ; #endif DWORD dwActualVSDSize = 0; @@ -607,13 +681,21 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) } // Hot memory, Writable, No-Execute, infrequent writes + RangeList* pIndCellRangeList = NULL; +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (UseCachedInterfaceDispatch()) + { + pIndCellRangeList = &indcell_rangeList; + } +#endif NewHolder indcell_heap_holder( new LoaderHeap(indcell_heap_reserve_size, indcell_heap_commit_size, initReservedMem, indcell_heap_reserve_size, - NULL, UnlockedLoaderHeap::HeapKind::Data)); + pIndCellRangeList, UnlockedLoaderHeap::HeapKind::Data)); initReservedMem += indcell_heap_reserve_size; +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH // Hot memory, Writable, No-Execute, infrequent writes NewHolder cache_entry_heap_holder( new LoaderHeap(cache_entry_heap_reserve_size, cache_entry_heap_commit_size, @@ -637,6 +719,7 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) // Hot memory, Writable, Execute, write exactly once NewHolder vtable_heap_holder( new CodeFragmentHeap(pLoaderAllocator, STUB_CODE_BLOCK_VSD_VTABLE_STUB)); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // Allocate the initial counter block NewHolder m_counters_holder(new counter_block); @@ -646,6 +729,7 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) // indcell_heap = indcell_heap_holder; indcell_heap_holder.SuppressRelease(); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH lookup_heap = lookup_heap_holder; lookup_heap_holder.SuppressRelease(); dispatch_heap = dispatch_heap_holder; dispatch_heap_holder.SuppressRelease(); resolve_heap = resolve_heap_holder; resolve_heap_holder.SuppressRelease(); @@ -657,6 +741,7 @@ void VirtualCallStubManager::Init(LoaderAllocator *pLoaderAllocator) lookups = lookups_holder; lookups_holder.SuppressRelease(); vtableCallers = vtableCallers_holder; vtableCallers_holder.SuppressRelease(); cache_entries = cache_entries_holder; cache_entries_holder.SuppressRelease(); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH m_counters = m_counters_holder; m_counters_holder.SuppressRelease(); @@ -690,6 +775,7 @@ VirtualCallStubManager::~VirtualCallStubManager() LogStats(); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH // Go through each cache entry and if the cache element there is in // the cache entry heap of the manager being deleted, then we just // set the cache entry to empty. @@ -705,8 +791,30 @@ VirtualCallStubManager::~VirtualCallStubManager() } it.Next(); } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (m_loaderAllocator->IsCollectible() && UseCachedInterfaceDispatch()) + { + CachedIndirectionCellBlockListNode * pBlockNode = m_indirectionBlocks; + while (pBlockNode != NULL) + { + for (UINT32 i = 0; i < INDCELLS_PER_BLOCK; i++) + { + InterfaceDispatchCacheHeader* cache = pBlockNode->m_rgIndCells[i].GetCache(); + if (cache != NULL) + { + InterfaceDispatch_DiscardCacheHeader(cache); + } + } + + pBlockNode = pBlockNode->m_pNext; + } + } +#endif if (indcell_heap) { delete indcell_heap; indcell_heap = NULL;} +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH if (lookup_heap) { delete lookup_heap; lookup_heap = NULL;} if (dispatch_heap) { delete dispatch_heap; dispatch_heap = NULL;} if (resolve_heap) { delete resolve_heap; resolve_heap = NULL;} @@ -718,6 +826,7 @@ VirtualCallStubManager::~VirtualCallStubManager() if (lookups) { delete lookups; lookups = NULL;} if (vtableCallers) { delete vtableCallers; vtableCallers = NULL;} if (cache_entries) { delete cache_entries; cache_entries = NULL;} +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // Now get rid of the memory taken by the counter_blocks while (m_counters != NULL) @@ -741,6 +850,10 @@ void VirtualCallStubManager::InitStatic() { STANDARD_VM_CONTRACT; +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + InterfaceDispatch_Initialize(); +#endif + #ifdef STUB_LOGGING // Note if you change these values using environment variables then you must use hex values :-( STUB_MISS_COUNT_VALUE = (INT32) CLRConfig::GetConfigValue(CLRConfig::INTERNAL_VirtualCallStubMissCount); @@ -752,6 +865,7 @@ void VirtualCallStubManager::InitStatic() g_resetCacheIncr = (INT32) CLRConfig::GetConfigValue(CLRConfig::INTERNAL_VirtualCallStubResetCacheIncr); #endif // STUB_LOGGING +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH #ifndef STUB_DISPATCH_PORTABLE DispatchHolder::InitializeStatic(); ResolveHolder::InitializeStatic(); @@ -759,6 +873,7 @@ void VirtualCallStubManager::InitStatic() LookupHolder::InitializeStatic(); g_resolveCache = new DispatchCache(); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH if(CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_VirtualCallStubLogging)) StartupLogging(); @@ -785,7 +900,9 @@ void VirtualCallStubManager::LogFinalStats() it.Current()->LogStats(); } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH g_resolveCache->LogStats(); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH FinishLogging(); } @@ -799,6 +916,7 @@ void VirtualCallStubManager::ReclaimAll() STATIC_CONTRACT_GC_NOTRIGGER; STATIC_CONTRACT_FORBID_FAULT; +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH /* @todo: if/when app domain unloading is supported, and when we have app domain specific stub heaps, we can complete the unloading of an app domain stub heap at this point, and make any patches to existing stubs that are @@ -814,6 +932,10 @@ void VirtualCallStubManager::ReclaimAll() { it.Current()->Reclaim(); } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + InterfaceDispatch_ReclaimUnusedInterfaceDispatchCaches(); +#endif // FEATURE_CACHED_INTERFACE_DISPATCH g_reclaim_counter++; } @@ -854,6 +976,7 @@ void VirtualCallStubManager::Reclaim() //---------------------------------------------------------------------------- /* static */ +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH VirtualCallStubManager *VirtualCallStubManager::FindStubManager(PCODE stubAddress, StubCodeBlockKind* wbStubKind) { CONTRACTL { @@ -891,6 +1014,7 @@ VirtualCallStubManager *VirtualCallStubManager::FindStubManager(PCODE stubAddres return NULL; } } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH /* for use by debugger. */ @@ -943,7 +1067,7 @@ BOOL VirtualCallStubManager::TraceManager(Thread *thread, *pRetAddr = (BYTE *)StubManagerHelpers::GetReturnAddress(pContext); // Get the token from the stub - DispatchToken token(GetTokenFromStub(pStub)); + DispatchToken token(GetTokenFromStub(pStub, pContext)); // Get the this object from ECX Object *pObj = StubManagerHelpers::GetThisPtr(pContext); @@ -954,6 +1078,30 @@ BOOL VirtualCallStubManager::TraceManager(Thread *thread, #ifndef DACCESS_COMPILE +DispatchToken VirtualCallStubManager::GetTokenFromOwnerAndSlot(TypeHandle ownerType, uint32_t slot) +{ + CONTRACTL + { + THROWS; + GC_TRIGGERS; + MODE_ANY; + INJECT_FAULT(COMPlusThrowOM();); + } + CONTRACTL_END + + MethodTable * pMT = ownerType.GetMethodTable(); + pMT->GetRestoredSlot(slot); + + DispatchToken token; + if (pMT->IsInterface()) + token = pMT->GetLoaderAllocator()->GetDispatchToken(pMT->GetTypeID(), slot); + else + token = DispatchToken::CreateDispatchToken(slot); + + return token; +} + +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH PCODE VirtualCallStubManager::GetCallStub(TypeHandle ownerType, MethodDesc *pMD) { CONTRACTL { @@ -965,11 +1113,13 @@ PCODE VirtualCallStubManager::GetCallStub(TypeHandle ownerType, MethodDesc *pMD) INJECT_FAULT(COMPlusThrowOM();); } CONTRACTL_END; - return GetCallStub(ownerType, pMD->GetSlot()); + DispatchToken token = GetTokenFromOwnerAndSlot(ownerType, pMD->GetSlot()); + + return GetCallStub(token); } //find or create a stub -PCODE VirtualCallStubManager::GetCallStub(TypeHandle ownerType, DWORD slot) +PCODE VirtualCallStubManager::GetCallStub(DispatchToken token) { CONTRACT (PCODE) { THROWS; @@ -981,15 +1131,6 @@ PCODE VirtualCallStubManager::GetCallStub(TypeHandle ownerType, DWORD slot) GCX_COOP(); // This is necessary for BucketTable synchronization - MethodTable * pMT = ownerType.GetMethodTable(); - pMT->GetRestoredSlot(slot); - - DispatchToken token; - if (pMT->IsInterface()) - token = pMT->GetLoaderAllocator()->GetDispatchToken(pMT->GetTypeID(), slot); - else - token = DispatchToken::CreateDispatchToken(slot); - //get a stub from lookups, make if necessary PCODE stub = CALL_STUB_EMPTY_ENTRY; PCODE addrOfResolver = GetEEFuncEntryPoint(ResolveWorkerAsmStub); @@ -1070,6 +1211,7 @@ VTableCallHolder* VirtualCallStubManager::GenerateVTableCallStub(DWORD slot) RETURN(pHolder); } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH //+---------------------------------------------------------------------------- // @@ -1085,7 +1227,14 @@ VTableCallHolder* VirtualCallStubManager::GenerateVTableCallStub(DWORD slot) // m_RecycledIndCellList when it is finalized. // //+---------------------------------------------------------------------------- -BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, BOOL fUseRecycledCell /* = FALSE*/ ) +BYTE* GetStubIndirectionCell(BYTE** pBlocksStart, UINT32 index, UINT32 sizeOfIndCell) +{ + LIMITED_METHOD_CONTRACT; + + return ((BYTE*)pBlocksStart) + (index * sizeOfIndCell); +} + +BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, DispatchToken token, BOOL fUseRecycledCell /* = FALSE*/ ) { CONTRACT (BYTE*) { THROWS; @@ -1095,7 +1244,7 @@ BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, BOOL fUseRec POSTCONDITION(CheckPointer(RETVAL)); } CONTRACT_END; - _ASSERTE(isStubStatic(target)); + _ASSERTE(UseCachedInterfaceDispatch() || isStubStatic(target)); CrstHolder lh(&m_indCellLock); @@ -1103,6 +1252,9 @@ BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, BOOL fUseRec BYTE * ret = NULL; UINT32 cellsPerBlock = INDCELLS_PER_BLOCK; + UINT32 sizeOfIndCell; + INTERFACE_DISPATCH_CACHED_OR_VSD(sizeOfIndCell = sizeof(InterfaceDispatchCell), sizeOfIndCell = sizeof(BYTE *)); + // First try the recycled indirection cell list for Dynamic methods if (fUseRecycledCell) ret = GetOneRecycledIndCell(); @@ -1114,27 +1266,57 @@ BYTE *VirtualCallStubManager::GenerateStubIndirection(PCODE target, BOOL fUseRec // Allocate from loader heap if (!ret) { + size_t alignment; + INTERFACE_DISPATCH_CACHED_OR_VSD(alignment = sizeof(TADDR) * 2, alignment = sizeof(TADDR)); + // Free list is empty, allocate a block of indcells from indcell_heap and insert it into the free list. - BYTE ** pBlock = (BYTE **) (void *) indcell_heap->AllocMem(S_SIZE_T(cellsPerBlock) * S_SIZE_T(sizeof(BYTE *))); + size_t cellsAllocationSize = cellsPerBlock * sizeOfIndCell; + size_t allocationSize = cellsAllocationSize; + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (m_loaderAllocator->IsCollectible() && UseCachedInterfaceDispatch()) + { + allocationSize += sizeof(CachedIndirectionCellBlockListNode); + } +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + BYTE ** pBlock = (BYTE **) (void *) indcell_heap->AllocAlignedMem(allocationSize, alignment); + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (m_loaderAllocator->IsCollectible() && UseCachedInterfaceDispatch()) + { + CachedIndirectionCellBlockListNode * pBlockNode = (CachedIndirectionCellBlockListNode *)pBlock; + pBlockNode->m_pNext = m_indirectionBlocks; + m_indirectionBlocks = pBlockNode; + pBlock = (BYTE **)(&pBlockNode->m_rgIndCells[0]); + } +#endif // FEATURE_CACHED_INTERFACE_DISPATCH // return the first cell in the block and add the rest to the free list ret = (BYTE *)pBlock; // link all the cells together // we don't need to null terminate the linked list, InsertIntoFreeIndCellList will do it. - for (UINT32 i = 1; i < cellsPerBlock - 1; ++i) + for (UINT32 i = 1; i < cellsPerBlock - 1; ++i) // Setup linked list between entries 1 to n { - pBlock[i] = (BYTE *)&(pBlock[i+1]); + *(BYTE**)GetStubIndirectionCell(pBlock, i, sizeOfIndCell) = GetStubIndirectionCell(pBlock, i + 1, sizeOfIndCell); } // insert the list into the free indcell list. - InsertIntoFreeIndCellList((BYTE *)&pBlock[1], (BYTE*)&pBlock[cellsPerBlock - 1]); + InsertIntoFreeIndCellList(GetStubIndirectionCell(pBlock, 1, sizeOfIndCell), GetStubIndirectionCell(pBlock, cellsPerBlock - 1, sizeOfIndCell)); } - *((PCODE *)ret) = target; + INTERFACE_DISPATCH_CACHED_OR_VSD( + InterfaceDispatchCell * pCell = (InterfaceDispatchCell *)ret; + pCell->m_pStub = target; + pCell->m_pCache = DispatchToken::ToCachedInterfaceDispatchToken(token); + , + *((PCODE *)ret) = target; + ) + RETURN ret; } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH ResolveCacheElem *VirtualCallStubManager::GetResolveCacheElem(void *pMT, size_t token, void *target) @@ -1171,10 +1353,11 @@ ResolveCacheElem *VirtualCallStubManager::GetResolveCacheElem(void *pMT, _ASSERTE(elem && (elem != CALL_STUB_EMPTY_ENTRY)); return elem; } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #endif // !DACCESS_COMPILE -size_t VirtualCallStubManager::GetTokenFromStub(PCODE stub) +size_t VirtualCallStubManager::GetTokenFromStub(PCODE stub, T_CONTEXT *pContext) { CONTRACTL { @@ -1184,13 +1367,35 @@ size_t VirtualCallStubManager::GetTokenFromStub(PCODE stub) } CONTRACTL_END +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (isCachedInterfaceDispatchStub(stub)) + { + TADDR indcell = StubManagerHelpers::GetIndirectionCellArg(pContext); + VirtualCallStubManagerIterator it = + VirtualCallStubManagerManager::GlobalManager()->IterateVirtualCallStubManagers(); + while (it.Next()) + { + if (it.Current()->indcell_rangeList.IsInRange(indcell)) + { + InterfaceDispatchCell * pCell = (InterfaceDispatchCell *)indcell; + return pCell->GetDispatchCellInfo().Token.To_SIZE_T(); + } + } + } +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH _ASSERTE(stub != (PCODE)NULL); StubCodeBlockKind stubKind = STUB_CODE_BLOCK_UNKNOWN; VirtualCallStubManager * pMgr = FindStubManager(stub, &stubKind); return GetTokenFromStubQuick(pMgr, stub, stubKind); +#else + return 0; +#endif } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH size_t VirtualCallStubManager::GetTokenFromStubQuick(VirtualCallStubManager * pMgr, PCODE stub, StubCodeBlockKind kind) { CONTRACTL @@ -1244,6 +1449,7 @@ size_t VirtualCallStubManager::GetTokenFromStubQuick(VirtualCallStubManager * pM return 0; } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #ifndef DACCESS_COMPILE @@ -1262,6 +1468,195 @@ ResolveCacheElem* __fastcall VirtualCallStubManager::PromoteChainEntry(ResolveCa } #endif // CHAIN_LOOKUP +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +PCODE CachedInterfaceDispatchResolveWorker(StubCallSite* pCallSite, OBJECTREF *protectedObj, DispatchToken token) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + MODE_COOPERATIVE; + INJECT_FAULT(COMPlusThrowOM();); + PRECONDITION(protectedObj != NULL); + PRECONDITION(*protectedObj != NULL); + PRECONDITION(IsProtectedByGCFrame(protectedObj)); + } CONTRACTL_END; + + MethodTable* objectType = (*protectedObj)->GetMethodTable(); + CONSISTENCY_CHECK(CheckPointer(objectType)); + + PCODE target = (PCODE)NULL; + BOOL patch = VirtualCallStubManager::Resolver(objectType, token, protectedObj, &target, TRUE /* throwOnConflict */); + +#if defined(_DEBUG) + if (!objectType->IsComObjectType() + && !objectType->IsIDynamicInterfaceCastable()) + { + CONSISTENCY_CHECK(!MethodTable::GetMethodDescForSlotAddress(target)->IsGenericMethodDefinition()); + } +#endif // _DEBUG + + if (patch && pCallSite != NULL) + { + DispatchCellInfo cellInfo = ((InterfaceDispatchCell*)pCallSite->GetIndirectCell())->GetDispatchCellInfo(); + InterfaceDispatch_UpdateDispatchCellCache((InterfaceDispatchCell*)pCallSite->GetIndirectCell(), target, objectType, &cellInfo); + } + + return target; +} + +// Resolve a dispatch on a virtual open delegate without updating any pointers +extern "C" PCODE CID_VirtualOpenDelegateDispatchWorker(TransitionBlock * pTransitionBlock, PCODE* ppMethodPtrAuxField) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + INJECT_FAULT(COMPlusThrowOM();); + PRECONDITION(CheckPointer(pTransitionBlock)); + MODE_COOPERATIVE; + } CONTRACTL_END; + + OBJECTREF delegateObj = ObjectToOBJECTREF((Object*)(((BYTE*)ppMethodPtrAuxField) - DelegateObject::GetOffsetOfMethodPtrAux())); + MAKE_CURRENT_THREAD_AVAILABLE(); + +#ifdef _DEBUG + Thread::ObjectRefFlush(CURRENT_THREAD); +#endif + + StubDispatchFrame frame(pTransitionBlock); + StubDispatchFrame * pSDFrame = &frame; + + OBJECTREF *protectedObj = pSDFrame->GetThisPtr(); + _ASSERTE(protectedObj != NULL); + OBJECTREF pObj = *protectedObj; + + PCODE target = (PCODE)NULL; + + if (pObj == NULL) { + pSDFrame->SetForNullReferenceException(); + pSDFrame->Push(CURRENT_THREAD); + INSTALL_MANAGED_EXCEPTION_DISPATCHER; + INSTALL_UNWIND_AND_CONTINUE_HANDLER; + COMPlusThrow(kNullReferenceException); + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER; + UNINSTALL_MANAGED_EXCEPTION_DISPATCHER; + _ASSERTE(!"Throw returned"); + } + + MethodDesc *pTargetMD = COMDelegate::GetMethodDescForOpenVirtualDelegate(delegateObj); + pSDFrame->SetFunction(pTargetMD); + + pSDFrame->Push(CURRENT_THREAD); + INSTALL_MANAGED_EXCEPTION_DISPATCHER; + INSTALL_UNWIND_AND_CONTINUE_HANDLER; + + GCStress::MaybeTriggerAndProtect(pObj); + + DispatchToken token = VirtualCallStubManager::GetTokenFromOwnerAndSlot(TypeHandle(pTargetMD->GetMethodTable()), pTargetMD->GetSlot()); + target = CachedInterfaceDispatchResolveWorker(NULL, protectedObj, token); + +#if _DEBUG + if (pSDFrame->GetGCRefMap() != NULL) + { + GCX_PREEMP(); + _ASSERTE(CheckGCRefMapEqual(pSDFrame->GetGCRefMap(), pSDFrame->GetFunction(), true)); + } +#endif // _DEBUG + + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER; + UNINSTALL_MANAGED_EXCEPTION_DISPATCHER; + pSDFrame->Pop(CURRENT_THREAD); + + return target; +} + +/* Resolve to a method and return its address or NULL if there is none + Our return value is the target address that control should continue to. Our caller will + enter the target address as if a direct call with the original stack frame had been made from + the actual call site. Hence our strategy is to either return a target address + of the actual method implementation, or the prestub if we cannot find the actual implementation. + If we are returning a real method address, we may patch the original cell site to point to different + stub. Note, if we encounter a method that hasn't been jitted + yet, we will return the prestub, which should cause it to be jitted and we will + be able to build the dispatching stub on a later call thru the call site. If we encounter + any other kind of problem, rather than throwing an exception, we will also return the + prestub, unless we are unable to find the method at all, in which case we return NULL. + */ +extern "C" PCODE CID_ResolveWorker(TransitionBlock * pTransitionBlock, + InterfaceDispatchCell* indirectionCell) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + INJECT_FAULT(COMPlusThrowOM();); + PRECONDITION(CheckPointer(pTransitionBlock)); + MODE_COOPERATIVE; + } CONTRACTL_END; + + MAKE_CURRENT_THREAD_AVAILABLE(); + +#ifdef _DEBUG + Thread::ObjectRefFlush(CURRENT_THREAD); +#endif + + StubDispatchFrame frame(pTransitionBlock); + StubDispatchFrame * pSDFrame = &frame; + + PCODE returnAddress = pSDFrame->GetUnadjustedReturnAddress(); + + StubCallSite callSite((TADDR)indirectionCell, returnAddress); + + OBJECTREF *protectedObj = pSDFrame->GetThisPtr(); + _ASSERTE(protectedObj != NULL); + OBJECTREF pObj = *protectedObj; + + PCODE target = (PCODE)NULL; + + if (pObj == NULL) { + pSDFrame->SetForNullReferenceException(); + pSDFrame->Push(CURRENT_THREAD); + INSTALL_MANAGED_EXCEPTION_DISPATCHER; + INSTALL_UNWIND_AND_CONTINUE_HANDLER; + COMPlusThrow(kNullReferenceException); + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER; + UNINSTALL_MANAGED_EXCEPTION_DISPATCHER; + _ASSERTE(!"Throw returned"); + } + + pSDFrame->SetCallSite(NULL, (TADDR)callSite.GetIndirectCell()); + + DispatchToken representativeToken(indirectionCell->GetDispatchCellInfo().Token); + MethodTable * pRepresentativeMT = pObj->GetMethodTable(); + if (representativeToken.IsTypedToken()) + { + pRepresentativeMT = AppDomain::GetCurrentDomain()->LookupType(representativeToken.GetTypeID()); + CONSISTENCY_CHECK(CheckPointer(pRepresentativeMT)); + } + + pSDFrame->SetRepresentativeSlot(pRepresentativeMT, representativeToken.GetSlotNumber()); + + pSDFrame->Push(CURRENT_THREAD); + INSTALL_MANAGED_EXCEPTION_DISPATCHER; + INSTALL_UNWIND_AND_CONTINUE_HANDLER; + + GCStress::MaybeTriggerAndProtect(pObj); + + target = CachedInterfaceDispatchResolveWorker(&callSite, protectedObj, representativeToken); + +#if _DEBUG + if (pSDFrame->GetGCRefMap() != NULL) + { + GCX_PREEMP(); + _ASSERTE(CheckGCRefMapEqual(pSDFrame->GetGCRefMap(), pSDFrame->GetFunction(), true)); + } +#endif // _DEBUG + + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER; + UNINSTALL_MANAGED_EXCEPTION_DISPATCHER; + pSDFrame->Pop(CURRENT_THREAD); + + return target; +} +#endif // FEATURE_CACHED_INTERFACE_DISPATCH bool IsCallDescrWorkerInternalReturnAddress(PCODE pCode); /* Resolve to a method and return its address or NULL if there is none. @@ -1276,6 +1671,7 @@ bool IsCallDescrWorkerInternalReturnAddress(PCODE pCode); any other kind of problem, rather than throwing an exception, we will also return the prestub, unless we are unable to find the method at all, in which case we return NULL. */ +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH PCODE VSD_ResolveWorker(TransitionBlock * pTransitionBlock, TADDR siteAddrForRegisterIndirect, size_t token @@ -1858,6 +2254,7 @@ PCODE VirtualCallStubManager::ResolveWorker(StubCallSite* pCallSite, return target; } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH /* Resolve the token in the context of the method table, and set the target to point to @@ -2146,6 +2543,7 @@ MethodDesc *VirtualCallStubManager::GetInterfaceMethodDescFromToken(DispatchToke #ifndef DACCESS_COMPILE +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH //---------------------------------------------------------------------------- // This will check to see if a match is in the cache. // Returns the target on success, otherwise NULL. @@ -2163,7 +2561,7 @@ PCODE VirtualCallStubManager::CacheLookup(size_t token, UINT16 tokenHash, Method // If the element matches, return the target - we're done! return (PCODE)(pElem != NULL ? pElem->target : NULL); } - +#endif // FEATURE_VIRTUAL_STUB_DISPATCH //---------------------------------------------------------------------------- /* static */ @@ -2191,11 +2589,11 @@ VirtualCallStubManager::GetTarget( PCODE target = (PCODE)NULL; -#ifndef STUB_DISPATCH_PORTABLE +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH target = CacheLookup(token.To_SIZE_T(), DispatchCache::INVALID_HASH, pMT); if (target != (PCODE)NULL) return target; -#endif // !STUB_DISPATCH_PORTABLE +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // No match, now do full resolve BOOL fPatch; @@ -2204,7 +2602,7 @@ VirtualCallStubManager::GetTarget( fPatch = Resolver(pMT, token, NULL, &target, throwOnConflict); _ASSERTE(!throwOnConflict || target != (PCODE)NULL); -#ifndef STUB_DISPATCH_PORTABLE +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH if (fPatch) { ResolveCacheElem *pCacheElem = pMT->GetLoaderAllocator()->GetVirtualCallStubManager()-> @@ -2222,7 +2620,7 @@ VirtualCallStubManager::GetTarget( { g_external_call_no_patch++; } -#endif // !STUB_DISPATCH_PORTABLE +#endif // FEATURE_VIRTUAL_STUB_DISPATCH return target; } @@ -2284,6 +2682,7 @@ VirtualCallStubManager::TraceResolver( #ifndef DACCESS_COMPILE +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH //---------------------------------------------------------------------------- /* Change the call site. It is failing the expected MT test in the dispatcher stub too often. @@ -2663,10 +3062,12 @@ LookupHolder *VirtualCallStubManager::GenerateLookupStub(PCODE addrOfResolver, s RETURN (holder); } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH //---------------------------------------------------------------------------- /* Generate a cache entry */ +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH ResolveCacheElem *VirtualCallStubManager::GenerateResolveCacheElem(void *addrOfCode, void *pMTExpected, size_t token, @@ -2714,6 +3115,7 @@ ResolveCacheElem *VirtualCallStubManager::GenerateResolveCacheElem(void *addrOfC return e; } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH //------------------------------------------------------------------ // Adds the stub manager to our linked list of virtual stub managers @@ -2776,7 +3178,7 @@ void VirtualCallStubManager::LogStats() // Our Init routine assignes all fields atomically so testing one field should suffice to // test whehter the Init succeeded. - if (!resolvers) + if (!m_counters) { return; } @@ -2810,6 +3212,7 @@ void VirtualCallStubManager::LogStats() sprintf_s(szPrintStr, ARRAY_SIZE(szPrintStr), OUTPUT_FORMAT_INT, "stub_space", stats.stub_space); WriteFile (g_hStubLogFile, szPrintStr, (DWORD) strlen(szPrintStr), &dwWriteByte, NULL); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH size_t total, used; g_resolveCache->GetLoadFactor(&total, &used); @@ -2822,14 +3225,17 @@ void VirtualCallStubManager::LogStats() sprintf_s(szPrintStr, ARRAY_SIZE(szPrintStr), "\r\ncache_load:\t%zu used, %zu total, utilization %#5.2f%%\r\n", used, total, 100.0 * double(used) / double(total)); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH WriteFile (g_hStubLogFile, szPrintStr, (DWORD) strlen(szPrintStr), &dwWriteByte, NULL); } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH resolvers->LogStats(); dispatchers->LogStats(); lookups->LogStats(); vtableCallers->LogStats(); cache_entries->LogStats(); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH g_site_counter += stats.site_counter; g_stub_lookup_counter += stats.stub_lookup_counter; @@ -2862,6 +3268,7 @@ void VirtualCallStubManager::LogStats() stats.cache_entry_space = 0; } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH void Prober::InitProber(size_t key1, size_t key2, size_t* table) { CONTRACTL { @@ -3520,6 +3927,7 @@ void DispatchCache::LogStats() stats.insert_cache_collide = 0; stats.insert_cache_write = 0; } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH /* The following tablse have bits that have the following properties: 1. Each entry has 12-bits with 5,6 or 7 one bits and 5,6 or 7 zero bits. @@ -3553,6 +3961,7 @@ static const UINT16 tokenHashBits[32] = #endif // HOST_64BIT }; +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH /*static*/ UINT16 DispatchCache::HashToken(size_t token) { LIMITED_METHOD_CONTRACT; @@ -3645,10 +4054,12 @@ void DispatchCache::Iterator::NextValidBucket() NextBucket(); } while (IsValid() && *m_ppCurElem == m_pCache->empty); } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #endif // !DACCESS_COMPILE ///////////////////////////////////////////////////////////////////////////////////////////// +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH VirtualCallStubManager *VirtualCallStubManagerManager::FindVirtualCallStubManager(PCODE stubAddress) { CONTRACTL { @@ -3660,6 +4071,7 @@ VirtualCallStubManager *VirtualCallStubManagerManager::FindVirtualCallStubManage return VirtualCallStubManager::FindStubManager(stubAddress); } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH static VirtualCallStubManager * const IT_START = (VirtualCallStubManager *)(-1); @@ -3694,6 +4106,29 @@ VirtualCallStubManager *VirtualCallStubManagerIterator::Current() } #ifndef DACCESS_COMPILE + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +extern "C" void RhpInterfaceDispatch1(); +extern "C" void RhpInterfaceDispatch2(); +extern "C" void RhpInterfaceDispatch4(); +extern "C" void RhpInterfaceDispatch8(); +extern "C" void RhpInterfaceDispatch16(); +extern "C" void RhpInterfaceDispatch32(); +extern "C" void RhpInterfaceDispatch64(); + +extern "C" void RhpVTableOffsetDispatch(); + +extern "C" void RhpInterfaceDispatchAVLocation1(); +extern "C" void RhpInterfaceDispatchAVLocation2(); +extern "C" void RhpInterfaceDispatchAVLocation4(); +extern "C" void RhpInterfaceDispatchAVLocation8(); +extern "C" void RhpInterfaceDispatchAVLocation16(); +extern "C" void RhpInterfaceDispatchAVLocation32(); +extern "C" void RhpInterfaceDispatchAVLocation64(); +extern "C" void RhpVTableOffsetDispatchAVLocation(); + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + ///////////////////////////////////////////////////////////////////////////////////////////// VirtualCallStubManagerManager::VirtualCallStubManagerManager() : m_pManagers(NULL), @@ -3701,6 +4136,41 @@ VirtualCallStubManagerManager::VirtualCallStubManagerManager() m_RWLock(COOPERATIVE_OR_PREEMPTIVE, LOCK_TYPE_DEFAULT) { LIMITED_METHOD_CONTRACT; + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +#define CACHED_INTERFACE_DISPATCH_HELPER_COUNT 9 + size_t helperCount = 0; + +#define RECORD_CACHED_INTERFACE_DISPATCH_HELPER(helper) _ASSERTE(helperCount < CACHED_INTERFACE_DISPATCH_HELPER_COUNT); pCachedInterfaceDispatchHelpers[helperCount++] = (PCODE)helper; +#define RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(helper) _ASSERTE(helperCount < CACHED_INTERFACE_DISPATCH_HELPER_COUNT); pCachedInterfaceDispatchHelpersAVLocation[helperCount++] = (PCODE)helper; + + pCachedInterfaceDispatchHelpers = new PCODE[CACHED_INTERFACE_DISPATCH_HELPER_COUNT]; + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch1); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch2); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch4); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch8); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch16); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch32); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInterfaceDispatch64); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpVTableOffsetDispatch); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER(RhpInitialInterfaceDispatch); + _ASSERTE(helperCount == CACHED_INTERFACE_DISPATCH_HELPER_COUNT); + + helperCount = 0; + pCachedInterfaceDispatchHelpersAVLocation = new PCODE[CACHED_INTERFACE_DISPATCH_HELPER_COUNT]; + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpInterfaceDispatchAVLocation1); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpInterfaceDispatchAVLocation2); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpInterfaceDispatchAVLocation4); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpInterfaceDispatchAVLocation8); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpInterfaceDispatchAVLocation16); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpInterfaceDispatchAVLocation32); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpInterfaceDispatchAVLocation64); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpVTableOffsetDispatchAVLocation); + RECORD_CACHED_INTERFACE_DISPATCH_HELPER_AVLOCATION(RhpInitialInterfaceDispatch); + _ASSERTE(helperCount == CACHED_INTERFACE_DISPATCH_HELPER_COUNT); + + countCachedInterfaceDispatchHelpers = helperCount; +#endif // FEATURE_CACHED_INTERFACE_DISPATCH } ///////////////////////////////////////////////////////////////////////////////////////////// @@ -3731,7 +4201,13 @@ BOOL VirtualCallStubManagerManager::CheckIsStub_Internal( WRAPPER_NO_CONTRACT; SUPPORTS_DAC; - // Forwarded to from RangeSectionStubManager +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (UseCachedInterfaceDispatch()) + { + return isCachedInterfaceDispatchStub(stubStartAddress); + } +#endif + // Forwarded to from RangeSectionStubManager for other cases return FALSE; } @@ -3742,16 +4218,32 @@ BOOL VirtualCallStubManagerManager::DoTraceStub( { WRAPPER_NO_CONTRACT; - // Find the owning manager. We should succeed, since presumably someone already - // called CheckIsStub on us to find out that we own the address, and already - // called TraceManager to initiate a trace. - VirtualCallStubManager *pMgr = FindVirtualCallStubManager(stubStartAddress); + VirtualCallStubManager *pMgr = NULL; +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (UseCachedInterfaceDispatch()) + { + // Always use the global loader allocator, and find the correct one during the trace itself + pMgr = SystemDomain::GetGlobalLoaderAllocator()->GetVirtualCallStubManager(); + } +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + if (!UseCachedInterfaceDispatch()) + { + // Find the owning manager. We should succeed, since presumably someone already + // called CheckIsStub on us to find out that we own the address, and already + // called TraceManager to initiate a trace. + pMgr = FindVirtualCallStubManager(stubStartAddress); + } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + CONSISTENCY_CHECK(CheckPointer(pMgr)); return pMgr->DoTraceStub(stubStartAddress, trace); } #ifndef DACCESS_COMPILE +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH ///////////////////////////////////////////////////////////////////////////////////////////// MethodDesc *VirtualCallStubManagerManager::Entry2MethodDesc( PCODE stubStartAddress, @@ -3784,6 +4276,7 @@ MethodDesc *VirtualCallStubManagerManager::Entry2MethodDesc( return pMT->GetMethodDescForSlotAddress(target); } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #endif #ifdef DACCESS_COMPILE @@ -3806,11 +4299,50 @@ BOOL VirtualCallStubManagerManager::TraceManager( { WRAPPER_NO_CONTRACT; - // Find the owning manager. We should succeed, since presumably someone already - // called CheckIsStub on us to find out that we own the address. - VirtualCallStubManager *pMgr = FindVirtualCallStubManager(GetIP(pContext)); + VirtualCallStubManager *pMgr = NULL; + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (UseCachedInterfaceDispatch()) + { + // Always use the global loader allocator, and find the correct one during the trace itself + pMgr = SystemDomain::GetGlobalLoaderAllocator()->GetVirtualCallStubManager(); + } +#endif + +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + if (!UseCachedInterfaceDispatch()) + { + // Find the owning manager. We should succeed, since presumably someone already + // called CheckIsStub on us to find out that we own the address. + pMgr = FindVirtualCallStubManager(GetIP(pContext)); + } +#endif // FEATURE_CACHED_INTERFACE_DISPATCH CONSISTENCY_CHECK(CheckPointer(pMgr)); // Forward the call to the appropriate manager. return pMgr->TraceManager(thread, trace, pContext, pRetAddr); } + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +bool VirtualCallStubManager::isCachedInterfaceDispatchStub(PCODE addr) +{ + LIMITED_METHOD_DAC_CONTRACT; + + VirtualCallStubManagerManager *pGlobalManager = VirtualCallStubManagerManager::GlobalManager(); + + if (pGlobalManager == NULL) + return false; + return pGlobalManager->isCachedInterfaceDispatchStub(addr); +} + +bool VirtualCallStubManager::isCachedInterfaceDispatchStubAVLocation(PCODE addr) +{ + LIMITED_METHOD_DAC_CONTRACT; + + VirtualCallStubManagerManager *pGlobalManager = VirtualCallStubManagerManager::GlobalManager(); + + if (pGlobalManager == NULL) + return false; + return pGlobalManager->isCachedInterfaceDispatchStubAVLocation(addr); +} +#endif \ No newline at end of file diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h index 7638f2aec1eb29..b714683302c9ca 100644 --- a/src/coreclr/vm/virtualcallstub.h +++ b/src/coreclr/vm/virtualcallstub.h @@ -11,7 +11,9 @@ #ifndef _VIRTUAL_CALL_STUB_H #define _VIRTUAL_CALL_STUB_H +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH #define CHAIN_LOOKUP +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #if defined(TARGET_X86) // If this is uncommented, leaves a file "StubLog_.log" with statistics on the behavior @@ -19,16 +21,20 @@ //#define STUB_LOGGING #endif +bool UseCachedInterfaceDispatch(); + #include "stubmgr.h" ///////////////////////////////////////////////////////////////////////////////////// // Forward class declarations +class VirtualCallStubManager; +class VirtualCallStubManagerManager; + +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH class FastTable; class BucketTable; class Entry; class Prober; -class VirtualCallStubManager; -class VirtualCallStubManagerManager; struct LookupHolder; struct DispatchHolder; struct ResolveHolder; @@ -93,6 +99,7 @@ enum e_resolveCacheElem_offset_target = e_resolveCacheElem_offset_token + e_resolveCacheElem_sizeof_token, e_resolveCacheElem_offset_next = e_resolveCacheElem_offset_target + e_resolveCacheElem_sizeof_target, }; +#endif // FEATURE_VIRTUAL_STUB_DISPATCH ///////////////////////////////////////////////////////////////////////////////////// // A utility class to help manipulate a call site @@ -143,6 +150,8 @@ struct StubCallSite PCODE GetReturnAddress() { LIMITED_METHOD_CONTRACT; return m_returnAddr; } }; +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH + // These are the assembly language entry points that the stubs use when they want to go into the EE extern "C" void ResolveWorkerAsmStub(); // resolve a token and transfer control to that method @@ -155,9 +164,15 @@ extern "C" void BackPatchWorkerStaticStub(PCODE returnAddr, TADDR siteAddrForReg #endif // TARGET_UNIX #endif // TARGET_X86 +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager; +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH +struct CachedIndirectionCellBlockListNode; +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + // VirtualCallStubManager is the heart of the stub dispatch logic. See the book of the runtime entry // // file:../../doc/BookOfTheRuntime/ClassLoader/VirtualStubDispatchDesign.doc @@ -167,6 +182,7 @@ typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager; // // call [DispatchCell] // +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH // Where we make sure 'DispatchCell' points at stubs that will do the right thing. DispatchCell is writable // so we can update the code over time. There are three basic types of stubs that the dispatch cell can point // to. @@ -202,6 +218,8 @@ typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager; // (in)efficiency forever. // // see code:#StubDispatchNotes for more +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + class VirtualCallStubManager : public StubManager { friend class VirtualCallStubManagerManager; @@ -219,6 +237,7 @@ class VirtualCallStubManager : public StubManager virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "VirtualCallStubManager"; } #endif +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH // The reason for our existence, return a callstub for type id and slot number // where type id = 0 for the class contract (i.e. a virtual call), and type id > 0 for an // interface invoke where the id indicates which interface it is. @@ -226,13 +245,15 @@ class VirtualCallStubManager : public StubManager // The function is idempotent, i.e. // you'll get the same callstub twice if you call it with identical inputs. PCODE GetCallStub(TypeHandle ownerType, MethodDesc *pMD); - PCODE GetCallStub(TypeHandle ownerType, DWORD slot); - + PCODE GetCallStub(DispatchToken token); // Stubs for vtable-based virtual calls with no lookups PCODE GetVTableCallStub(DWORD slot); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + + static DispatchToken GetTokenFromOwnerAndSlot(TypeHandle ownerType, uint32_t slot); // Generate an fresh indirection cell. - BYTE* GenerateStubIndirection(PCODE stub, BOOL fUseRecycledCell = FALSE); + BYTE* GenerateStubIndirection(PCODE stub, DispatchToken token, BOOL fUseRecycledCell = FALSE); // Set up static data structures - called during EEStartup static void InitStatic(); @@ -261,12 +282,18 @@ class VirtualCallStubManager : public StubManager #ifndef DACCESS_COMPILE VirtualCallStubManager() : StubManager(), +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH cache_entry_rangeList(), +#endif +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + indcell_rangeList(), +#endif m_loaderAllocator(NULL), m_initialReservedMemForHeaps(NULL), m_FreeIndCellList(NULL), m_RecycledIndCellList(NULL), indcell_heap(NULL), +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH cache_entry_heap(NULL), lookup_heap(NULL), dispatch_heap(NULL), @@ -278,11 +305,15 @@ class VirtualCallStubManager : public StubManager cache_entries(NULL), dispatchers(NULL), resolvers(NULL), +#endif // FEATURE_VIRTUAL_STUB_DISPATCH m_counters(NULL), m_cur_counter_block(NULL), m_cur_counter_block_for_reclaim(NULL), m_cur_counter_block_for_reclaim_index(0), m_pNext(NULL) +#if defined FEATURE_CACHED_INTERFACE_DISPATCH + , m_indirectionBlocks (0) +#endif { LIMITED_METHOD_CONTRACT; ZeroMemory(&stats, sizeof(stats)); @@ -291,17 +322,31 @@ class VirtualCallStubManager : public StubManager ~VirtualCallStubManager(); #endif // !DACCESS_COMPILE + static bool isCachedInterfaceDispatchStub(PCODE addr); + static bool isCachedInterfaceDispatchStubAVLocation(PCODE addr); + static BOOL isStubStatic(PCODE addr) { WRAPPER_NO_CONTRACT; + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (isCachedInterfaceDispatchStub(addr)) + return TRUE; +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH StubCodeBlockKind sk = RangeSectionStubManager::GetStubKind(addr); return sk == STUB_CODE_BLOCK_VSD_DISPATCH_STUB || sk == STUB_CODE_BLOCK_VSD_LOOKUP_STUB || sk == STUB_CODE_BLOCK_VSD_RESOLVE_STUB || sk == STUB_CODE_BLOCK_VSD_VTABLE_STUB; +#else + return FALSE; +#endif // FEATURE_VIRTUAL_STUB_DISPATCH } +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH static BOOL isDispatchingStubStatic(PCODE addr) { WRAPPER_NO_CONTRACT; @@ -344,9 +389,16 @@ class VirtualCallStubManager : public StubManager TADDR addr = PTR_HOST_MEMBER_TADDR(VirtualCallStubManager, this, cache_entry_rangeList); return PTR_RangeList(addr); } +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + //use range lists to track the chunks of memory that are part of each heap + LockedRangeList indcell_rangeList; +#endif private: +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH //allocate and initialize a stub of the desired kind DispatchHolder *GenerateDispatchStub(PCODE addrOfCode, PCODE addrOfFail, @@ -386,6 +438,12 @@ class VirtualCallStubManager : public StubManager ResolveCacheElem *GetResolveCacheElem(void *pMT, size_t token, void *target); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + + // This can be used to find a target without needing the ability to throw + static BOOL TraceResolver(Object *pObj, DispatchToken token, TraceDestination *trace); + +public: //Given a dispatch token, an object and a method table, determine the //target address to go to. The return value (BOOL) states whether this address @@ -396,24 +454,22 @@ class VirtualCallStubManager : public StubManager PCODE * ppTarget, BOOL throwOnConflict); - // This can be used to find a target without needing the ability to throw - static BOOL TraceResolver(Object *pObj, DispatchToken token, TraceDestination *trace); - -public: // Return the MethodDesc corresponding to this token. static MethodDesc *GetRepresentativeMethodDescFromToken(DispatchToken token, MethodTable *pMT); static MethodDesc *GetInterfaceMethodDescFromToken(DispatchToken token); static MethodTable *GetTypeFromToken(DispatchToken token); //This is used to get the token out of a stub - static size_t GetTokenFromStub(PCODE stub); + static size_t GetTokenFromStub(PCODE stub, T_CONTEXT *pContext); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH //This is used to get the token out of a stub and we know the stub manager and stub kind static size_t GetTokenFromStubQuick(VirtualCallStubManager * pMgr, PCODE stub, StubCodeBlockKind kind); // General utility functions // Quick lookup in the cache. NOTHROW, GC_NOTRIGGER static PCODE CacheLookup(size_t token, UINT16 tokenHash, MethodTable *pMT); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // Full exhaustive lookup. THROWS, GC_TRIGGERS static PCODE GetTarget(DispatchToken token, MethodTable *pMT, BOOL throwOnConflict); @@ -425,6 +481,7 @@ class VirtualCallStubManager : public StubManager // Given a dispatch token, return true if the token represents a slot on the target. static BOOL IsClassToken(DispatchToken token); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH #ifdef CHAIN_LOOKUP static ResolveCacheElem* __fastcall PromoteChainEntry(ResolveCacheElem *pElem); #endif @@ -458,7 +515,7 @@ class VirtualCallStubManager : public StubManager //Change the callsite to point to stub void BackPatchSite(StubCallSite* pCallSite, PCODE stub); - +#endif // VIRTUAL_STUB_DISPATCH public: /* the following two public functions are to support tracing or stepping thru stubs via the debugger. */ @@ -473,8 +530,10 @@ class VirtualCallStubManager : public StubManager size_t retval=0; if(indcell_heap) retval+=indcell_heap->GetSize(); +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH if(cache_entry_heap) retval+=cache_entry_heap->GetSize(); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH return retval; }; @@ -561,6 +620,16 @@ class VirtualCallStubManager : public StubManager PRECONDITION(m_indCellLock.OwnedByCurrentThread()); } CONTRACTL_END; +#ifdef DEBUG + // Assert that head and tail are actually linked together + BYTE **p = (BYTE**)head; + while (p != (BYTE**)tail) + { + p = (BYTE **)*p; + _ASSERTE(p != NULL); + } +#endif // DEBUG + BYTE * temphead = *ppList; *((BYTE**)tail) = temphead; *ppList = head; @@ -568,6 +637,8 @@ class VirtualCallStubManager : public StubManager #endif // !DACCESS_COMPILE PTR_LoaderHeap indcell_heap; // indirection cells go here + +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH PTR_LoaderHeap cache_entry_heap; // resolve cache elem entries go here PTR_CodeFragmentHeap lookup_heap; // lookup stubs go here PTR_CodeFragmentHeap dispatch_heap; // dispatch stubs go here @@ -594,6 +665,7 @@ class VirtualCallStubManager : public StubManager BucketTable * dispatchers; // hash table of dispatching stubs keyed by tokens/actualtype BucketTable * resolvers; // hash table of resolvers keyed by tokens/resolverstub BucketTable * vtableCallers; // hash table of vtable call stubs keyed by slot values +#endif // FEATURE_VIRTUAL_STUB_DISPATCH // This structure is used to keep track of the fail counters. // We only need one fail counter per ResolveStub, @@ -616,10 +688,16 @@ class VirtualCallStubManager : public StubManager // Used to keep track of all the VCSManager objects in the system. PTR_VirtualCallStubManager m_pNext; // Linked list pointer +#if defined FEATURE_CACHED_INTERFACE_DISPATCH + CachedIndirectionCellBlockListNode *m_indirectionBlocks; +#endif + public: +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH // Given a stub address, find the VCSManager that owns it. static VirtualCallStubManager *FindStubManager(PCODE addr, StubCodeBlockKind* wbStubKind = NULL); +#endif // FEATURE_VIRTUAL_STUB_DISPATCH #ifndef DACCESS_COMPILE // insert a linked list of indirection cells at the beginning of m_RecycledIndCellList @@ -698,7 +776,19 @@ class VirtualCallStubManagerManager : public StubManager #ifdef DACCESS_COMPILE virtual void DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags); virtual LPCWSTR GetStubManagerName(PCODE addr) - { WRAPPER_NO_CONTRACT; return FindVirtualCallStubManager(addr)->GetStubManagerName(addr); } + { + WRAPPER_NO_CONTRACT; +#ifndef FEATURE_VIRTUAL_STUB_DISPATCH + return W("CachedInterfaceDispatchStubManagerManager"); +#else +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + if (UseCachedInterfaceDispatch() && VirtualCallStubManager::isCachedInterfaceDispatchStub(addr)) + return W("CachedInterfaceDispatchStubManagerManager"); + else +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + return FindVirtualCallStubManager(addr)->GetStubManagerName(addr); +#endif + } #endif private: @@ -716,9 +806,17 @@ class VirtualCallStubManagerManager : public StubManager // RW lock for reading entries and removing them. SimpleRWLock m_RWLock; +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + DPTR(PCODE) pCachedInterfaceDispatchHelpers; + DPTR(PCODE) pCachedInterfaceDispatchHelpersAVLocation; + size_t countCachedInterfaceDispatchHelpers = 0; +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + // This will look through all the managers in an intelligent fashion to // find the manager that owns the address. +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH VirtualCallStubManager *FindVirtualCallStubManager(PCODE stubAddress); +#endif protected: // Add a VCSManager to the linked list. @@ -742,6 +840,30 @@ class VirtualCallStubManagerManager : public StubManager VirtualCallStubManagerIterator IterateVirtualCallStubManagers(); +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + bool isCachedInterfaceDispatchStub(PCODE addr) + { + LIMITED_METHOD_DAC_CONTRACT; + for (size_t i = 0; i < countCachedInterfaceDispatchHelpers; i++) + { + if (pCachedInterfaceDispatchHelpers[i] == addr) + return true; + } + return false; + } + + bool isCachedInterfaceDispatchStubAVLocation(PCODE addr) + { + LIMITED_METHOD_DAC_CONTRACT; + for (size_t i = 0; i < countCachedInterfaceDispatchHelpers; i++) + { + if (pCachedInterfaceDispatchHelpersAVLocation[i] == addr) + return true; + } + return false; + } +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + #ifdef _DEBUG // Debug helper to help identify stub-managers. virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "VirtualCallStubManagerManager"; } @@ -888,7 +1010,7 @@ class Entry }; /* define the platform specific Stubs and stub holders */ - +#ifdef FEATURE_VIRTUAL_STUB_DISPATCH #include #if USES_LOOKUP_STUBS @@ -1098,7 +1220,7 @@ class DispatchEntry : public Entry { ResolveHolder * resolveHolder = ResolveHolder::FromFailEntry(stub->failTarget()); size_t token = resolveHolder->stub()->token(); - _ASSERTE(token == VirtualCallStubManager::GetTokenFromStub((PCODE)stub)); + _ASSERTE(token == VirtualCallStubManager::GetTokenFromStub((PCODE)stub, NULL)); return token; } else @@ -1535,5 +1657,25 @@ class BucketTable static FastTable* dead; //linked list head of to be deleted (abandoned) buckets }; +#endif // FEATURE_VIRTUAL_STUB_DISPATCH + +BYTE* GenerateDispatchStubCellEntryMethodDesc(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, MethodDesc *pMD, LCGMethodResolver *pResolver); +BYTE* GenerateDispatchStubCellEntrySlot(LoaderAllocator *pLoaderAllocator, TypeHandle ownerType, int methodSlot, LCGMethodResolver *pResolver); + + +#if defined(FEATURE_CACHED_INTERFACE_DISPATCH) && defined(FEATURE_VIRTUAL_STUB_DISPATCH) +inline bool UseCachedInterfaceDispatch() { return g_pConfig->UseCachedInterfaceDispatch(); } + +// INTERFACE_DISPATCH_CACHED_OR_VSD is a macro used to swap between cached interface dispatch and virtual stub dispatch. +#define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispatch) if (UseCachedInterfaceDispatch()) { cachedDispatch; } else { vsdDispatch; } +#elif defined(FEATURE_CACHED_INTERFACE_DISPATCH) +inline bool UseCachedInterfaceDispatch() { return true; } +#define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispatch) { cachedDispatch; } +#elif defined(FEATURE_VIRTUAL_STUB_DISPATCH) +inline bool UseCachedInterfaceDispatch() { return false; } +#define INTERFACE_DISPATCH_CACHED_OR_VSD(cachedDispatch, vsdDispatch) { vsdDispatch; } +#else +#error "No dispatch mechanism defined" +#endif #endif // !_VIRTUAL_CALL_STUB_H diff --git a/src/tests/Loader/CollectibleAssemblies/Statics/CollectibleTLSStaticCollection.cs b/src/tests/Loader/CollectibleAssemblies/Statics/CollectibleTLSStaticCollection.cs index d84d3d05769c80..bac360c836e626 100644 --- a/src/tests/Loader/CollectibleAssemblies/Statics/CollectibleTLSStaticCollection.cs +++ b/src/tests/Loader/CollectibleAssemblies/Statics/CollectibleTLSStaticCollection.cs @@ -11,6 +11,19 @@ namespace CollectibleThreadStaticShutdownRace { + public interface IGetAnInt + { + int GetInt(); + } + + public class GetAnInt : IGetAnInt + { + public int GetInt() + { + return 1; + } + } + public class CollectibleThreadStaticShutdownRace { Action? UseTLSStaticFromLoaderAllocator = null; @@ -40,6 +53,10 @@ void ThreadThatWaitsForLoaderAllocatorToDisappear() } } + public static IGetAnInt s_getAnInt = new GetAnInt(); + static FieldInfo s_getAnIntField; + static MethodInfo s_getAnIntMethod; + void CreateLoaderAllocatorWithTLS() { ulong collectibleIndex = s_collectibleIndex++; @@ -66,7 +83,8 @@ void CreateLoaderAllocatorWithTLS() "Method", MethodAttributes.Public | MethodAttributes.Static); var ilg = mb.GetILGenerator(); - ilg.Emit(OpCodes.Ldc_I4_1); + ilg.Emit(OpCodes.Ldsfld, s_getAnIntField); + ilg.Emit(OpCodes.Callvirt, s_getAnIntMethod); ilg.Emit(OpCodes.Stsfld, fb); ilg.Emit(OpCodes.Ret); } @@ -96,6 +114,9 @@ void ForceCollectibleTLSStaticToGoThroughThreadTermination() [Fact] public static void TestEntryPoint() { + s_getAnIntField = typeof(CollectibleThreadStaticShutdownRace).GetField("s_getAnInt"); + s_getAnIntMethod = typeof(IGetAnInt).GetMethod("GetInt"); + new CollectibleThreadStaticShutdownRace().ForceCollectibleTLSStaticToGoThroughThreadTermination(); } }