Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More efficient executable allocator #83632

Merged
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
507e19d
Add multi-element caching scheme to ExecutableAllocator
davidwrighton Mar 15, 2023
819930f
CodePageGenerator generated code for LoaderHeaps should not cache the…
davidwrighton Mar 15, 2023
3b69965
Fix build breaks introduced by executable mapping cache changes
davidwrighton Mar 16, 2023
b3d6fc1
Fix build breaks caused by changes to introduce the concept of execut…
davidwrighton Mar 16, 2023
61c7033
Move VSD executable heaps from being LoaderHeaps to being CodeFragmen…
davidwrighton Mar 17, 2023
b5973d5
Add statistics gathering features to ExecutableAllocator
davidwrighton Mar 18, 2023
8ea3b48
In progress
davidwrighton Mar 21, 2023
762bcf0
Fix Dac api failing when called early in process startup
davidwrighton Mar 21, 2023
64da1cc
Implement interleaved stubs as 16KB pages instead of 4KB pages
davidwrighton Mar 21, 2023
306fa57
Remove api incorrectly added
davidwrighton Mar 21, 2023
b361aae
Adjust cache size down to 3, and leave a breadcrumb for enabling more…
davidwrighton Mar 22, 2023
b5901a2
Fix x86 build
davidwrighton Mar 22, 2023
6640678
Tweaks to make it all build and fix some bugs
davidwrighton Mar 22, 2023
3567881
Add statistics for linked list walk lengths
davidwrighton Mar 22, 2023
6cdd4f7
Reorder linked list on access
davidwrighton Mar 22, 2023
b79081d
Fix some more asserts and build breaks
davidwrighton Mar 22, 2023
c7bef3a
Fix Arm build for real this time, and fix unix arm64 miscalculation o…
davidwrighton Mar 23, 2023
215b571
Update based on code review comments
davidwrighton Mar 24, 2023
7527070
More code review feedback
davidwrighton Mar 24, 2023
a9e173d
Fix oops
davidwrighton Mar 24, 2023
89bef94
Attempt to fix Unix Arm64 build
davidwrighton Mar 27, 2023
9838460
Try tweaking the number of cached mappings to see if the illegal inst…
davidwrighton Mar 30, 2023
f6f08c9
Revert "Try tweaking the number of cached mappings to see if the ille…
davidwrighton Mar 31, 2023
df651dc
Fix last code review comment
davidwrighton Apr 12, 2023
630dc3f
Merge branch 'main' of github.com:dotnet/runtime into more_efficient_…
davidwrighton Apr 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions src/coreclr/debug/daccess/dacdbiimpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3617,11 +3617,7 @@ void DacDbiInterfaceImpl::EnumerateMemRangesForLoaderAllocator(PTR_LoaderAllocat
if (pVcsMgr)
{
if (pVcsMgr->indcell_heap != NULL) heapsToEnumerate.Push(pVcsMgr->indcell_heap);
if (pVcsMgr->lookup_heap != NULL) heapsToEnumerate.Push(pVcsMgr->lookup_heap);
if (pVcsMgr->resolve_heap != NULL) heapsToEnumerate.Push(pVcsMgr->resolve_heap);
if (pVcsMgr->dispatch_heap != NULL) heapsToEnumerate.Push(pVcsMgr->dispatch_heap);
if (pVcsMgr->cache_entry_heap != NULL) heapsToEnumerate.Push(pVcsMgr->cache_entry_heap);
if (pVcsMgr->vtable_heap != NULL) heapsToEnumerate.Push(pVcsMgr->vtable_heap);
}

TADDR rangeAccumAsTaddr = TO_TADDR(rangeAcummulator);
Expand Down Expand Up @@ -4519,7 +4515,9 @@ void DacDbiInterfaceImpl::EnumerateModulesInAssembly(
// Debugger isn't notified of Resource / Inspection-only modules.
if (pDomainAssembly->GetModule()->IsVisibleToDebugger())
{
_ASSERTE(pDomainAssembly->IsLoaded());
// If domain assembly isn't yet loaded, just return
if (!pDomainAssembly->IsLoaded())
return;

VMPTR_DomainAssembly vmDomainAssembly = VMPTR_DomainAssembly::NullPtr();
vmDomainAssembly.SetHostPtr(pDomainAssembly);
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/debug/daccess/fntableaccess.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ struct FakeHeapList
DWORD_PTR pHdrMap; // changed from DWORD*
size_t maxCodeHeapSize;
size_t reserveForJumpStubs;
DWORD_PTR pLoaderAllocator;
#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
DWORD_PTR CLRPersonalityRoutine;
#endif
Expand Down
24 changes: 0 additions & 24 deletions src/coreclr/debug/daccess/request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3519,26 +3519,10 @@ ClrDataAccess::TraverseVirtCallStubHeap(CLRDATA_ADDRESS pAppDomain, VCSHeapType
pLoaderHeap = pVcsMgr->indcell_heap;
break;

case LookupHeap:
pLoaderHeap = pVcsMgr->lookup_heap;
break;

case ResolveHeap:
pLoaderHeap = pVcsMgr->resolve_heap;
break;

case DispatchHeap:
pLoaderHeap = pVcsMgr->dispatch_heap;
break;

case CacheEntryHeap:
pLoaderHeap = pVcsMgr->cache_entry_heap;
break;

case VtableHeap:
pLoaderHeap = pVcsMgr->vtable_heap;
break;

default:
hr = E_INVALIDARG;
}
Expand Down Expand Up @@ -3585,11 +3569,7 @@ static const char *LoaderAllocatorLoaderHeapNames[] =
"FixupPrecodeHeap",
"NewStubPrecodeHeap",
"IndcellHeap",
"LookupHeap",
"ResolveHeap",
"DispatchHeap",
"CacheEntryHeap",
"VtableHeap",
};


Expand Down Expand Up @@ -3632,11 +3612,7 @@ HRESULT ClrDataAccess::GetLoaderAllocatorHeaps(CLRDATA_ADDRESS loaderAllocatorAd
else
{
pLoaderHeaps[i++] = HOST_CDADDR(pVcsMgr->indcell_heap);
pLoaderHeaps[i++] = HOST_CDADDR(pVcsMgr->lookup_heap);
pLoaderHeaps[i++] = HOST_CDADDR(pVcsMgr->resolve_heap);
pLoaderHeaps[i++] = HOST_CDADDR(pVcsMgr->dispatch_heap);
pLoaderHeaps[i++] = HOST_CDADDR(pVcsMgr->cache_entry_heap);
pLoaderHeaps[i++] = HOST_CDADDR(pVcsMgr->vtable_heap);
}

// All of the above are "LoaderHeap" and not the ExplicitControl version.
Expand Down
52 changes: 42 additions & 10 deletions src/coreclr/inc/executableallocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,16 @@
// This class is responsible for allocation of all the executable memory in the runtime.
class ExecutableAllocator
{
public:

enum CacheableMapping
{
AddToCache,
DoNotAddToCache,
};

private:

// RX address range block descriptor
struct BlockRX
{
Expand Down Expand Up @@ -61,6 +71,11 @@ class ExecutableAllocator

static int64_t g_releaseCount;
static int64_t g_reserveCount;

static int64_t g_MapRW_Calls;
static int64_t g_MapRW_CallsWithCacheMiss;
static int64_t g_MapRW_LinkedListWalkDepth;
static int64_t g_LinkedListTotalDepth;
#endif
// Instance of the allocator
static ExecutableAllocator* g_instance;
Expand Down Expand Up @@ -102,9 +117,23 @@ class ExecutableAllocator
// for platforms that don't use shared memory.
size_t m_freeOffset = 0;

// Last RW mapping cached so that it can be reused for the next mapping
// Uncomment these to gather information to better choose caching parameters
//#define VARIABLE_SIZED_CACHEDMAPPING_SIZE

// Last RW mappings cached so that it can be reused for the next mapping
// request if it goes into the same range.
BlockRW* m_cachedMapping = NULL;
// This is handled as a 3 element cache with an LRU replacement policy
#ifdef VARIABLE_SIZED_CACHEDMAPPING_SIZE
// If variable sized mappings enabled, make the cache physically big enough to cover all interesting sizes
static int g_cachedMappingSize;
BlockRW* m_cachedMapping[16] = { 0 };
#else
#if defined(HOST_OSX) && defined(HOST_AMD64)
BlockRW* m_cachedMapping[1] = { 0 }; // OSX Amd64 doesn't behave correctly with more than one cached mapping.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

David, I can see some coreclr tests in the CI failing on x64 macOS with illegal instruction, so maybe the failure you are seeing is unrelated to your changes. This is from my recent PR:

/private/tmp/helix/working/A18108BE/w/B44209DD/e /private/tmp/helix/working/A18108BE/w/B44209DD/e
  Discovering: System.Runtime.Tests (method display = ClassAndMethod, method display options = None)
  Discovered:  System.Runtime.Tests (found 9140 of 9186 test cases)
  Starting:    System.Runtime.Tests (parallel test collections = on, max threads = 4)
./RunTests.sh: line 168: 76212 Illegal instruction: 4  "$RUNTIME_PATH/dotnet" exec --runtimeconfig System.Runtime.Tests.runtimeconfig.json --depsfile System.Runtime.Tests.deps.json xunit.console.dll System.Runtime.Tests.dll -xml testResults.xml -nologo -nocolor -notrait category=AdditionalTimezoneChecks -notrait category=IgnoreForCI -notrait category=OuterLoop -notrait category=failing $RSP_FILE
/private/tmp/helix/working/A18108BE/w/B44209DD/e
----- end Wed Mar 29 16:33:56 EDT 2023 ----- exit code 132 ----------------------------------------------------------
exit code 132 means SIGILL Illegal Instruction. Core dumped. Likely codegen issue.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@janvorli This looks like #84010 that was fixed 2 days ago.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems likely. I've reverted my cache size tweak. @janvorli could you review this change and sign off? I'm looking to check this in today/Monday. If you think I need someone else to signoff on the type system stuff, please let me know.

#else
BlockRW* m_cachedMapping[3] = { 0 };
#endif
#endif

// Synchronization of the public allocator methods
CRITSEC_COOKIE m_CriticalSection;
Expand All @@ -114,15 +143,18 @@ class ExecutableAllocator
// and replaces it by the passed in one.
void UpdateCachedMapping(BlockRW *pBlock);

// Remove the cached mapping
void RemoveCachedMapping();
// Remove the cached mapping (1 based indexing)
void RemoveCachedMapping(size_t indexToRemove);

// Find an overlapped cached mapping with pBlock, or return 0
size_t FindOverlappingCachedMapping(BlockRX* pBlock);

// Find existing RW block that maps the whole specified range of RX memory.
// Return NULL if no such block exists.
void* FindRWBlock(void* baseRX, size_t size);
void* FindRWBlock(void* baseRX, size_t size, CacheableMapping cacheMapping);

// Add RW block to the list of existing RW blocks
bool AddRWBlock(void* baseRW, void* baseRX, size_t size);
bool AddRWBlock(void* baseRW, void* baseRX, size_t size, CacheableMapping cacheMapping);

// Remove RW block from the list of existing RW blocks and return the base
// address and size the underlying memory was mapped at.
Expand Down Expand Up @@ -230,7 +262,7 @@ class ExecutableAllocator
void Release(void* pRX);

// Map the specified block of executable memory as RW
void* MapRW(void* pRX, size_t size);
void* MapRW(void* pRX, size_t size, CacheableMapping cacheMapping);

// Unmap the RW mapping at the specified address
void UnmapRW(void* pRW);
Expand Down Expand Up @@ -290,14 +322,14 @@ class ExecutableWriterHolder
{
}

ExecutableWriterHolder(T* addressRX, size_t size)
ExecutableWriterHolder(T* addressRX, size_t size, ExecutableAllocator::CacheableMapping cacheMapping = ExecutableAllocator::AddToCache)
{
m_addressRX = addressRX;
#if defined(HOST_OSX) && defined(HOST_ARM64)
m_addressRW = addressRX;
PAL_JitWriteProtect(true);
#else
m_addressRW = (T *)ExecutableAllocator::Instance()->MapRW((void*)addressRX, size);
m_addressRW = (T *)ExecutableAllocator::Instance()->MapRW((void*)addressRX, size, cacheMapping);
#endif
}

Expand All @@ -320,7 +352,7 @@ class ExecutableWriterHolder

#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS
#undef ExecutableWriterHolder
#ifdef TARGET_UNIX
#ifdef HOST_UNIX
#define ExecutableWriterHolder ExecutableAllocator::LogUsage(__FILE__, __LINE__, __PRETTY_FUNCTION__); ExecutableWriterHolderNoLog
#define AssignExecutableWriterHolder(addressRX, size) AssignExecutableWriterHolder(addressRX, size); ExecutableAllocator::LogUsage(__FILE__, __LINE__, __PRETTY_FUNCTION__);
#else
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/inc/holder.h
Original file line number Diff line number Diff line change
Expand Up @@ -946,7 +946,7 @@ FORCEINLINE void StubRelease(TYPE* value)
if (value)
{
#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS
#ifdef TARGET_UNIX
#ifdef HOST_UNIX
LOGGER::LogUsage(__FILE__, __LINE__, __PRETTY_FUNCTION__);
#else
LOGGER::LogUsage(__FILE__, __LINE__, __FUNCTION__);
Expand Down
20 changes: 16 additions & 4 deletions src/coreclr/inc/loaderheap.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,17 @@ struct LoaderHeapEvent;



// When an interleaved LoaderHeap is constructed, this is the interleaving size
inline UINT32 GetStubCodePageSize()
{
#if defined(TARGET_ARM64) && defined(TARGET_UNIX)
return max(16*1024, GetOsPageSize());
#elif defined(TARGET_ARM)
return 4096; // ARM is special as the 32bit instruction set does not easily permit a 16KB offset
#else
return 16*1024;
#endif
}



Expand Down Expand Up @@ -185,6 +196,7 @@ class UnlockedLoaderHeap
{
#ifdef _DEBUG
friend class LoaderHeapSniffer;
friend struct LoaderHeapFreeBlock;
#endif

#ifdef DACCESS_COMPILE
Expand Down Expand Up @@ -276,7 +288,7 @@ class UnlockedLoaderHeap

public:
BOOL m_fExplicitControl; // Am I a LoaderHeap or an ExplicitControlLoaderHeap?
void (*m_codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX);
void (*m_codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T size);

#ifdef DACCESS_COMPILE
public:
Expand All @@ -298,7 +310,7 @@ class UnlockedLoaderHeap
SIZE_T dwReservedRegionSize,
RangeList *pRangeList = NULL,
HeapKind kind = HeapKind::Data,
void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX) = NULL,
void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T size) = NULL,
DWORD dwGranularity = 1);

~UnlockedLoaderHeap();
Expand Down Expand Up @@ -467,7 +479,7 @@ class LoaderHeap : public UnlockedLoaderHeap, public ILoaderHeapBackout
RangeList *pRangeList = NULL,
UnlockedLoaderHeap::HeapKind kind = UnlockedLoaderHeap::HeapKind::Data,
BOOL fUnlocked = FALSE,
void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX) = NULL,
void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T size) = NULL,
DWORD dwGranularity = 1
)
: UnlockedLoaderHeap(dwReserveBlockSize,
Expand All @@ -491,7 +503,7 @@ class LoaderHeap : public UnlockedLoaderHeap, public ILoaderHeapBackout
RangeList *pRangeList = NULL,
UnlockedLoaderHeap::HeapKind kind = UnlockedLoaderHeap::HeapKind::Data,
BOOL fUnlocked = FALSE,
void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX) = NULL,
void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T size) = NULL,
DWORD dwGranularity = 1
)
: UnlockedLoaderHeap(dwReserveBlockSize,
Expand Down
Loading