diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp index 99f78b81bf0b2..e36f9f80cfccf 100644 --- a/src/cgmemmgr.cpp +++ b/src/cgmemmgr.cpp @@ -3,7 +3,11 @@ #include "llvm-version.h" #include "platform.h" +#include +#include +#include #include + #include "julia.h" #include "julia_internal.h" @@ -460,18 +464,27 @@ struct Block { } }; +struct Allocation { + // Address to write to (the one returned by the allocation function) + void *wr_addr; + // Runtime address + void *rt_addr; + size_t sz; + bool relocated; +}; + class RWAllocator { static constexpr int nblocks = 8; Block blocks[nblocks]{}; public: RWAllocator() JL_NOTSAFEPOINT = default; - void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT + Allocation alloc(size_t size, size_t align) JL_NOTSAFEPOINT { size_t min_size = (size_t)-1; int min_id = 0; for (int i = 0;i < nblocks && blocks[i].ptr;i++) { if (void *ptr = blocks[i].alloc(size, align)) - return ptr; + return {ptr, ptr, size, false}; if (blocks[i].avail < min_size) { min_size = blocks[i].avail; min_id = i; @@ -479,7 +492,8 @@ class RWAllocator { } size_t block_size = get_block_size(size); blocks[min_id].reset(map_anon_page(block_size), block_size); - return blocks[min_id].alloc(size, align); + void *ptr = blocks[min_id].alloc(size, align); + return {ptr, ptr, size, false}; } }; @@ -519,16 +533,6 @@ struct SplitPtrBlock : public Block { } }; -struct Allocation { - // Address to write to (the one returned by the allocation function) - void *wr_addr; - // Runtime address - void *rt_addr; - size_t sz; - bool relocated; -}; - -template class ROAllocator { protected: static constexpr int nblocks = 8; @@ -556,7 +560,7 @@ class ROAllocator { } // Allocations that have not been finalized yet. SmallVector allocations; - void *alloc(size_t size, size_t align) JL_NOTSAFEPOINT + Allocation alloc(size_t size, size_t align) JL_NOTSAFEPOINT { size_t min_size = (size_t)-1; int min_id = 0; @@ -572,8 +576,9 @@ class ROAllocator { wr_ptr = get_wr_ptr(block, ptr, size, align); } block.state |= SplitPtrBlock::Alloc; - allocations.push_back(Allocation{wr_ptr, ptr, size, false}); - return wr_ptr; + Allocation a{wr_ptr, ptr, size, false}; + allocations.push_back(a); + return a; } if (block.avail < min_size) { min_size = block.avail; @@ -594,18 +599,21 @@ class ROAllocator { #ifdef _OS_WINDOWS_ block.state = SplitPtrBlock::Alloc; void *wr_ptr = get_wr_ptr(block, ptr, size, align); - allocations.push_back(Allocation{wr_ptr, ptr, size, false}); + Allocation a{wr_ptr, ptr, size, false}; + allocations.push_back(a); ptr = wr_ptr; #else block.state = SplitPtrBlock::Alloc | SplitPtrBlock::InitAlloc; - allocations.push_back(Allocation{ptr, ptr, size, false}); + Allocation a{ptr, ptr, size, false}; + allocations.push_back(a); #endif - return ptr; + return a; } }; -template -class DualMapAllocator : public ROAllocator { +class DualMapAllocator : public ROAllocator { + bool exec; + protected: void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override JL_NOTSAFEPOINT { @@ -666,7 +674,7 @@ class DualMapAllocator : public ROAllocator { } } public: - DualMapAllocator() JL_NOTSAFEPOINT + DualMapAllocator(bool exec) JL_NOTSAFEPOINT : exec(exec) { assert(anon_hdl != -1); } @@ -679,13 +687,13 @@ class DualMapAllocator : public ROAllocator { finalize_block(block, true); block.reset(nullptr, 0); } - ROAllocator::finalize(); + ROAllocator::finalize(); } }; #ifdef _OS_LINUX_ -template -class SelfMemAllocator : public ROAllocator { +class SelfMemAllocator : public ROAllocator { + bool exec; SmallVector temp_buff; protected: void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, @@ -722,9 +730,7 @@ class SelfMemAllocator : public ROAllocator { } } public: - SelfMemAllocator() JL_NOTSAFEPOINT - : ROAllocator(), - temp_buff() + SelfMemAllocator(bool exec) JL_NOTSAFEPOINT : exec(exec), temp_buff() { assert(get_self_mem_fd() != -1); } @@ -758,11 +764,25 @@ class SelfMemAllocator : public ROAllocator { } if (cached) temp_buff.resize(1); - ROAllocator::finalize(); + ROAllocator::finalize(); } }; #endif // _OS_LINUX_ +std::pair, std::unique_ptr> +get_preferred_allocators() JL_NOTSAFEPOINT +{ +#ifdef _OS_LINUX_ + if (get_self_mem_fd() != -1) + return {std::make_unique(false), + std::make_unique(true)}; +#endif + if (init_shared_map() != -1) + return {std::make_unique(false), + std::make_unique(true)}; + return {}; +} + class RTDyldMemoryManagerJL : public SectionMemoryManager { struct EHFrame { uint8_t *addr; @@ -772,8 +792,8 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { void operator=(const RTDyldMemoryManagerJL&) = delete; SmallVector pending_eh; RWAllocator rw_alloc; - std::unique_ptr> ro_alloc; - std::unique_ptr> exe_alloc; + std::unique_ptr ro_alloc; + std::unique_ptr exe_alloc; size_t total_allocated; public: @@ -781,20 +801,9 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager { : SectionMemoryManager(), pending_eh(), rw_alloc(), - ro_alloc(), - exe_alloc(), total_allocated(0) { -#ifdef _OS_LINUX_ - if (!ro_alloc && get_self_mem_fd() != -1) { - ro_alloc.reset(new SelfMemAllocator()); - exe_alloc.reset(new SelfMemAllocator()); - } -#endif - if (!ro_alloc && init_shared_map() != -1) { - ro_alloc.reset(new DualMapAllocator()); - exe_alloc.reset(new DualMapAllocator()); - } + std::tie(ro_alloc, exe_alloc) = get_preferred_allocators(); } ~RTDyldMemoryManagerJL() override JL_NOTSAFEPOINT { @@ -847,7 +856,7 @@ uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size, jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size); jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, Size); if (exe_alloc) - return (uint8_t*)exe_alloc->alloc(Size, Alignment); + return (uint8_t*)exe_alloc->alloc(Size, Alignment).wr_addr; return SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID, SectionName); } @@ -862,9 +871,9 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size, jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size); jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, Size); if (!isReadOnly) - return (uint8_t*)rw_alloc.alloc(Size, Alignment); + return (uint8_t*)rw_alloc.alloc(Size, Alignment).wr_addr; if (ro_alloc) - return (uint8_t*)ro_alloc->alloc(Size, Alignment); + return (uint8_t*)ro_alloc->alloc(Size, Alignment).wr_addr; return SectionMemoryManager::allocateDataSection(Size, Alignment, SectionID, SectionName, isReadOnly); } @@ -919,6 +928,133 @@ void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr, } #endif +class JLJITLinkMemoryManager : public jitlink::JITLinkMemoryManager { + using OnFinalizedFunction = + jitlink::JITLinkMemoryManager::InFlightAlloc::OnFinalizedFunction; + + std::mutex Mutex; + RWAllocator RWAlloc; + std::unique_ptr ROAlloc; + std::unique_ptr ExeAlloc; + SmallVector FinalizedCallbacks; + uint32_t InFlight{0}; + +public: + class InFlightAlloc; + + static std::unique_ptr Create() + { + auto [ROAlloc, ExeAlloc] = get_preferred_allocators(); + if (ROAlloc && ExeAlloc) + return std::unique_ptr( + new JLJITLinkMemoryManager(std::move(ROAlloc), std::move(ExeAlloc))); + + return cantFail( + orc::MapperJITLinkMemoryManager::CreateWithMapper( + /*Reservation Granularity*/ 16 * 1024 * 1024)); + } + + void allocate(const jitlink::JITLinkDylib *JD, jitlink::LinkGraph &G, + OnAllocatedFunction OnAllocated) override; + + void deallocate(std::vector Allocs, + OnDeallocatedFunction OnDeallocated) override + { + jl_unreachable(); + } + +protected: + JLJITLinkMemoryManager(std::unique_ptr ROAlloc, + std::unique_ptr ExeAlloc) + : ROAlloc(std::move(ROAlloc)), ExeAlloc(std::move(ExeAlloc)) + { + } + + void finalize(OnFinalizedFunction OnFinalized) + { + SmallVector Callbacks; + { + std::unique_lock Lock{Mutex}; + FinalizedCallbacks.push_back(std::move(OnFinalized)); + + if (--InFlight > 0) + return; + + ROAlloc->finalize(); + ExeAlloc->finalize(); + Callbacks = std::move(FinalizedCallbacks); + } + + for (auto &CB : Callbacks) + std::move(CB)(FinalizedAlloc{}); + } +}; + +class JLJITLinkMemoryManager::InFlightAlloc + : public jitlink::JITLinkMemoryManager::InFlightAlloc { + JLJITLinkMemoryManager &MM; + jitlink::LinkGraph &G; + +public: + InFlightAlloc(JLJITLinkMemoryManager &MM, jitlink::LinkGraph &G) : MM(MM), G(G) {} + + void abandon(OnAbandonedFunction OnAbandoned) override { jl_unreachable(); } + + void finalize(OnFinalizedFunction OnFinalized) override + { + auto *GP = &G; + MM.finalize([GP, OnFinalized = + std::move(OnFinalized)](Expected FA) mutable { + if (!FA) + return OnFinalized(FA.takeError()); + // Need to handle dealloc actions when we GC code + auto E = orc::shared::runFinalizeActions(GP->allocActions()); + if (!E) + return OnFinalized(E.takeError()); + OnFinalized(std::move(FA)); + }); + } +}; + +using orc::MemProt; + +void JLJITLinkMemoryManager::allocate(const jitlink::JITLinkDylib *JD, + jitlink::LinkGraph &G, + OnAllocatedFunction OnAllocated) +{ + jitlink::BasicLayout BL{G}; + + { + std::unique_lock Lock{Mutex}; + for (auto &[AG, Seg] : BL.segments()) { + if (AG.getMemLifetime() == orc::MemLifetime::NoAlloc) + continue; + assert(AG.getMemLifetime() == orc::MemLifetime::Standard); + + auto Prot = AG.getMemProt(); + uint64_t Alignment = Seg.Alignment.value(); + uint64_t Size = Seg.ContentSize + Seg.ZeroFillSize; + Allocation Alloc; + if (Prot == (MemProt::Read | MemProt::Write)) + Alloc = RWAlloc.alloc(Size, Alignment); + else if (Prot == MemProt::Read) + Alloc = ROAlloc->alloc(Size, Alignment); + else if (Prot == (MemProt::Read | MemProt::Exec)) + Alloc = ExeAlloc->alloc(Size, Alignment); + else + abort(); + + Seg.Addr = orc::ExecutorAddr::fromPtr(Alloc.rt_addr); + Seg.WorkingMem = (char *)Alloc.wr_addr; + } + } + + if (auto Err = BL.apply()) + return OnAllocated(std::move(Err)); + + ++InFlight; + OnAllocated(std::make_unique(*this, G)); +} } RTDyldMemoryManager* createRTDyldMemoryManager() JL_NOTSAFEPOINT @@ -930,3 +1066,8 @@ size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT { return ((RTDyldMemoryManagerJL*)mm)->getTotalBytes(); } + +std::unique_ptr createJITLinkMemoryManager() +{ + return JLJITLinkMemoryManager::Create(); +} diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 0773d1a6c16a1..90091cc1f38db 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -1208,12 +1208,6 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin { #pragma clang diagnostic ignored "-Wunused-function" #endif -// TODO: Port our memory management optimisations to JITLink instead of using the -// default InProcessMemoryManager. -std::unique_ptr createJITLinkMemoryManager() JL_NOTSAFEPOINT { - return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper(/*Reservation Granularity*/ 16 * 1024 * 1024)); -} - #ifdef _COMPILER_CLANG_ #pragma clang diagnostic pop #endif @@ -1237,6 +1231,7 @@ class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar { }; RTDyldMemoryManager *createRTDyldMemoryManager(void) JL_NOTSAFEPOINT; +std::unique_ptr createJITLinkMemoryManager() JL_NOTSAFEPOINT; // A simple forwarding class, since OrcJIT v2 needs a unique_ptr, while we have a shared_ptr class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {