From c5c99642cdd11687c7cb4fafc39009694371ae82 Mon Sep 17 00:00:00 2001 From: Sam Schweigel Date: Fri, 7 Nov 2025 12:07:23 -0800 Subject: [PATCH] Add JLJITLinkMemoryManager (ports memory manager to JITLink) Ports our RTDyLD memory manager to JITLink in order to avoid memory use regressions after switching to JITLink everywhere (#60031). This is a direct port: finalization must happen all at once, because it invalidates all allocation `wr_ptr`s. I decided it wasn't worth it to associate `OnFinalizedFunction` callbacks with each block, since they are large enough to make it extremely likely that all in-flight allocations land in the same block; everything must be relocated before finalization can happen. I plan to add support for DualMapAllocator on ARM64 macOS, as well as an alternative for executable memory later. For now, we fall back to the old MapperJITLinkMemoryManager. Release JLJITLinkMemoryManager lock when calling FinalizedCallbacks --- src/cgmemmgr.cpp | 233 +++++++++++++++++++++++++++++++++++++--------- src/jitlayers.cpp | 7 +- 2 files changed, 188 insertions(+), 52 deletions(-) diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp index 99f78b81bf0b2..e36f9f80cfccf 100644 --- a/src/cgmemmgr.cpp +++ b/src/cgmemmgr.cpp @@ -3,7 +3,11 @@ #include "llvm-version.h" #include "platform.h" +#include +#include +#include #include + #include "julia.h" #include "julia_internal.h" @@ -460,18 +464,27 @@ struct Block { } }; +struct Allocation { + // Address to write to (the one returned by the allocation function) + void *wr_addr; + // Runtime address + void *rt_addr; + size_t sz; + bool relocated; +}; + class RWAllocator { static constexpr int nblocks = 8; Block blocks[nblocks]{}; public: RWAllocator() JL_NOTSAFEPOINT = default; - void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT + Allocation alloc(size_t size, size_t align) JL_NOTSAFEPOINT { size_t min_size = (size_t)-1; int min_id = 0; for (int i = 0;i < nblocks && blocks[i].ptr;i++) { if (void *ptr = blocks[i].alloc(size, align)) - return ptr; + return {ptr, ptr, size, false}; if (blocks[i].avail < min_size) { min_size = blocks[i].avail; min_id = i; @@ -479,7 +492,8 @@ class RWAllocator { } size_t block_size = get_block_size(size); blocks[min_id].reset(map_anon_page(block_size), block_size); - return blocks[min_id].alloc(size, align); + void *ptr = blocks[min_id].alloc(size, align); + return {ptr, ptr, size, false}; } }; @@ -519,16 +533,6 @@ struct SplitPtrBlock : public Block { } }; -struct Allocation { - // Address to write to (the one returned by the allocation function) - void *wr_addr; - // Runtime address - void *rt_addr; - size_t sz; - bool relocated; -}; - -template class ROAllocator { protected: static constexpr int nblocks = 8; @@ -556,7 +560,7 @@ class ROAllocator { } // Allocations that have not been finalized yet. SmallVector allocations; - void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT + Allocation alloc(size_t size, size_t align) JL_NOTSAFEPOINT { size_t min_size = (size_t)-1; int min_id = 0; @@ -572,8 +576,9 @@ class ROAllocator { wr_ptr = get_wr_ptr(block, ptr, size, align); } block.state |= SplitPtrBlock::Alloc; - allocations.push_back(Allocation{wr_ptr, ptr, size, false}); - return wr_ptr; + Allocation a{wr_ptr, ptr, size, false}; + allocations.push_back(a); + return a; } if (block.avail < min_size) { min_size = block.avail; @@ -594,18 +599,21 @@ class ROAllocator { #ifdef _OS_WINDOWS_ block.state = SplitPtrBlock::Alloc; void *wr_ptr = get_wr_ptr(block, ptr, size, align); - allocations.push_back(Allocation{wr_ptr, ptr, size, false}); + Allocation a{wr_ptr, ptr, size, false}; + allocations.push_back(a); ptr = wr_ptr; #else block.state = SplitPtrBlock::Alloc | SplitPtrBlock::InitAlloc; - allocations.push_back(Allocation{ptr, ptr, size, false}); + Allocation a{ptr, ptr, size, false}; + allocations.push_back(a); #endif - return ptr; + return a; } }; -template -class DualMapAllocator : public ROAllocator { +class DualMapAllocator : public ROAllocator { + bool exec; + protected: void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override JL_NOTSAFEPOINT { @@ -666,7 +674,7 @@ class DualMapAllocator : public ROAllocator { } } public: - DualMapAllocator() JL_NOTSAFEPOINT + DualMapAllocator(bool exec) JL_NOTSAFEPOINT : exec(exec) { assert(anon_hdl != -1); } @@ -679,13 +687,13 @@ class DualMapAllocator : public ROAllocator { finalize_block(block, true); block.reset(nullptr, 0); } - ROAllocator::finalize(); + ROAllocator::finalize(); } }; #ifdef _OS_LINUX_ -template -class SelfMemAllocator : public ROAllocator { +class SelfMemAllocator : public ROAllocator { + bool exec; SmallVector temp_buff; protected: void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, @@ -722,9 +730,7 @@ class SelfMemAllocator : public ROAllocator { } } public: - SelfMemAllocator() JL_NOTSAFEPOINT - : ROAllocator(), - temp_buff() + SelfMemAllocator(bool exec) JL_NOTSAFEPOINT : exec(exec), temp_buff() { assert(get_self_mem_fd() != -1); } @@ -758,11 +764,25 @@ class SelfMemAllocator : public ROAllocator { } if (cached) temp_buff.resize(1); - ROAllocator::finalize(); + ROAllocator::finalize(); } }; #endif // _OS_LINUX_ +std::pair, std::unique_ptr> +get_preferred_allocators() JL_NOTSAFEPOINT +{ +#ifdef _OS_LINUX_ + if (get_self_mem_fd() != -1) + return {std::make_unique(false), + std::make_unique(true)}; +#endif + if (init_shared_map() != -1) + return {std::make_unique(false), + std::make_unique(true)}; + return {}; +} + class RTDyldMemoryManagerJL : public SectionMemoryManager { struct EHFrame { uint8_t *addr; @@ -772,8 +792,8 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { void operator=(const RTDyldMemoryManagerJL&) = delete; SmallVector pending_eh; RWAllocator rw_alloc; - std::unique_ptr> ro_alloc; - std::unique_ptr> exe_alloc; + std::unique_ptr ro_alloc; + std::unique_ptr exe_alloc; size_t total_allocated; public: @@ -781,20 +801,9 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { : SectionMemoryManager(), pending_eh(), rw_alloc(), - ro_alloc(), - exe_alloc(), total_allocated(0) { -#ifdef _OS_LINUX_ - if (!ro_alloc && get_self_mem_fd() != -1) { - ro_alloc.reset(new SelfMemAllocator()); - exe_alloc.reset(new SelfMemAllocator()); - } -#endif - if (!ro_alloc && init_shared_map() != -1) { - ro_alloc.reset(new DualMapAllocator()); - exe_alloc.reset(new DualMapAllocator()); - } + std::tie(ro_alloc, exe_alloc) = get_preferred_allocators(); } ~RTDyldMemoryManagerJL() override JL_NOTSAFEPOINT { @@ -847,7 +856,7 @@ uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size, jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size); jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, Size); if (exe_alloc) - return (uint8_t*)exe_alloc->alloc(Size, Alignment); + return (uint8_t*)exe_alloc->alloc(Size, Alignment).wr_addr; return SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID, SectionName); } @@ -862,9 +871,9 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size, jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size); jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, Size); if (!isReadOnly) - return (uint8_t*)rw_alloc.alloc(Size, Alignment); + return (uint8_t*)rw_alloc.alloc(Size, Alignment).wr_addr; if (ro_alloc) - return (uint8_t*)ro_alloc->alloc(Size, Alignment); + return (uint8_t*)ro_alloc->alloc(Size, Alignment).wr_addr; return SectionMemoryManager::allocateDataSection(Size, Alignment, SectionID, SectionName, isReadOnly); } @@ -919,6 +928,133 @@ void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr, } #endif +class JLJITLinkMemoryManager : public jitlink::JITLinkMemoryManager { + using OnFinalizedFunction = + jitlink::JITLinkMemoryManager::InFlightAlloc::OnFinalizedFunction; + + std::mutex Mutex; + RWAllocator RWAlloc; + std::unique_ptr ROAlloc; + std::unique_ptr ExeAlloc; + SmallVector FinalizedCallbacks; + uint32_t InFlight{0}; + +public: + class InFlightAlloc; + + static std::unique_ptr Create() + { + auto [ROAlloc, ExeAlloc] = get_preferred_allocators(); + if (ROAlloc && ExeAlloc) + return std::unique_ptr( + new JLJITLinkMemoryManager(std::move(ROAlloc), std::move(ExeAlloc))); + + return cantFail( + orc::MapperJITLinkMemoryManager::CreateWithMapper( + /*Reservation Granularity*/ 16 * 1024 * 1024)); + } + + void allocate(const jitlink::JITLinkDylib *JD, jitlink::LinkGraph &G, + OnAllocatedFunction OnAllocated) override; + + void deallocate(std::vector Allocs, + OnDeallocatedFunction OnDeallocated) override + { + jl_unreachable(); + } + +protected: + JLJITLinkMemoryManager(std::unique_ptr ROAlloc, + std::unique_ptr ExeAlloc) + : ROAlloc(std::move(ROAlloc)), ExeAlloc(std::move(ExeAlloc)) + { + } + + void finalize(OnFinalizedFunction OnFinalized) + { + SmallVector Callbacks; + { + std::unique_lock Lock{Mutex}; + FinalizedCallbacks.push_back(std::move(OnFinalized)); + + if (--InFlight > 0) + return; + + ROAlloc->finalize(); + ExeAlloc->finalize(); + Callbacks = std::move(FinalizedCallbacks); + } + + for (auto &CB : Callbacks) + std::move(CB)(FinalizedAlloc{}); + } +}; + +class JLJITLinkMemoryManager::InFlightAlloc + : public jitlink::JITLinkMemoryManager::InFlightAlloc { + JLJITLinkMemoryManager &MM; + jitlink::LinkGraph &G; + +public: + InFlightAlloc(JLJITLinkMemoryManager &MM, jitlink::LinkGraph &G) : MM(MM), G(G) {} + + void abandon(OnAbandonedFunction OnAbandoned) override { jl_unreachable(); } + + void finalize(OnFinalizedFunction OnFinalized) override + { + auto *GP = &G; + MM.finalize([GP, OnFinalized = + std::move(OnFinalized)](Expected FA) mutable { + if (!FA) + return OnFinalized(FA.takeError()); + // Need to handle dealloc actions when we GC code + auto E = orc::shared::runFinalizeActions(GP->allocActions()); + if (!E) + return OnFinalized(E.takeError()); + OnFinalized(std::move(FA)); + }); + } +}; + +using orc::MemProt; + +void JLJITLinkMemoryManager::allocate(const jitlink::JITLinkDylib *JD, + jitlink::LinkGraph &G, + OnAllocatedFunction OnAllocated) +{ + jitlink::BasicLayout BL{G}; + + { + std::unique_lock Lock{Mutex}; + for (auto &[AG, Seg] : BL.segments()) { + if (AG.getMemLifetime() == orc::MemLifetime::NoAlloc) + continue; + assert(AG.getMemLifetime() == orc::MemLifetime::Standard); + + auto Prot = AG.getMemProt(); + uint64_t Alignment = Seg.Alignment.value(); + uint64_t Size = Seg.ContentSize + Seg.ZeroFillSize; + Allocation Alloc; + if (Prot == (MemProt::Read | MemProt::Write)) + Alloc = RWAlloc.alloc(Size, Alignment); + else if (Prot == MemProt::Read) + Alloc = ROAlloc->alloc(Size, Alignment); + else if (Prot == (MemProt::Read | MemProt::Exec)) + Alloc = ExeAlloc->alloc(Size, Alignment); + else + abort(); + + Seg.Addr = orc::ExecutorAddr::fromPtr(Alloc.rt_addr); + Seg.WorkingMem = (char *)Alloc.wr_addr; + } + } + + if (auto Err = BL.apply()) + return OnAllocated(std::move(Err)); + + ++InFlight; + OnAllocated(std::make_unique(*this, G)); +} } RTDyldMemoryManager* createRTDyldMemoryManager() JL_NOTSAFEPOINT @@ -930,3 +1066,8 @@ size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT { return ((RTDyldMemoryManagerJL*)mm)->getTotalBytes(); } + +std::unique_ptr createJITLinkMemoryManager() +{ + return JLJITLinkMemoryManager::Create(); +} diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 0773d1a6c16a1..90091cc1f38db 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -1208,12 +1208,6 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin { #pragma clang diagnostic ignored "-Wunused-function" #endif -// TODO: Port our memory management optimisations to JITLink instead of using the -// default InProcessMemoryManager. -std::unique_ptr createJITLinkMemoryManager() JL_NOTSAFEPOINT { - return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper(/*Reservation Granularity*/ 16 * 1024 * 1024)); -} - #ifdef _COMPILER_CLANG_ #pragma clang diagnostic pop #endif @@ -1237,6 +1231,7 @@ class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar { }; RTDyldMemoryManager *createRTDyldMemoryManager(void) JL_NOTSAFEPOINT; +std::unique_ptr createJITLinkMemoryManager() JL_NOTSAFEPOINT; // A simple forwarding class, since OrcJIT v2 needs a unique_ptr, while we have a shared_ptr class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {