From 8533a1c40a62e2ae874ae4e3eb6719af24aa34e8 Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Sun, 5 Jun 2016 02:09:22 -0400 Subject: [PATCH] Implement custom memory manager for LLVM Use various ways to reuse the page that has the page protection set in order to avoid wasting a page of memory for each JIT event. Fixes #14626 --- src/Makefile | 2 +- src/cgmemmgr.cpp | 860 +++++++++++++++++++++++++++++++++++++++++ src/codegen.cpp | 3 - src/codegen_internal.h | 22 +- src/debuginfo.cpp | 55 ++- src/jitlayers.cpp | 47 +-- 6 files changed, 923 insertions(+), 66 deletions(-) create mode 100644 src/cgmemmgr.cpp diff --git a/src/Makefile b/src/Makefile index c80c3c4497b44..1617281c73eeb 100644 --- a/src/Makefile +++ b/src/Makefile @@ -44,7 +44,7 @@ SRCS := \ LLVMLINK := ifeq ($(JULIACODEGEN),LLVM) -SRCS += codegen disasm debuginfo llvm-simdloop llvm-gcroot +SRCS += codegen disasm debuginfo llvm-simdloop llvm-gcroot cgmemmgr FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir) LLVM_LIBS := all ifeq ($(USE_POLLY),1) diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp new file mode 100644 index 0000000000000..c1332f87e52a4 --- /dev/null +++ b/src/cgmemmgr.cpp @@ -0,0 +1,860 @@ +// This file is a part of Julia. License is MIT: http://julialang.org/license + +#include "llvm-version.h" +#include "platform.h" +#include "options.h" + +#ifdef USE_MCJIT +#include +#include "julia.h" +#include "julia_internal.h" + +#ifdef LLVM37 +#ifndef LLVM38 +# include +#endif +#ifdef _OS_LINUX_ +# include +# ifdef __NR_memfd_create +# include +# endif +#endif +#ifndef _OS_WINDOWS_ +# include +# include +# include +# include +# if defined(_OS_DARWIN_) && !defined(MAP_ANONYMOUS) +# define MAP_ANONYMOUS MAP_ANON +# endif +#endif +#ifdef _OS_FREEBSD_ +# include +#endif + +namespace { + +static size_t get_block_size(size_t size) +{ + return (size > jl_page_size * 256 ? LLT_ALIGN(size, jl_page_size) : + jl_page_size * 256); +} + +// Wrapper function to mmap/munmap/mprotect pages... +static void *map_anon_page(size_t size) +{ +#ifdef _OS_WINDOWS_ + char *mem = (char*)VirtualAlloc(NULL, size + jl_page_size, + MEM_COMMIT, PAGE_READWRITE); + assert(mem && "Cannot allocate RW memory"); + mem = (char*)LLT_ALIGN(uintptr_t(mem), jl_page_size); +#else // _OS_WINDOWS_ + void *mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, + MAP_NORESERVE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + assert(mem != MAP_FAILED && "Cannot allocate RW memory"); +#endif // _OS_WINDOWS_ + return mem; +} + +static void unmap_page(void *ptr, size_t size) +{ +#ifdef _OS_WINDOWS_ + VirtualFree(ptr, size, MEM_DECOMMIT); +#else // _OS_WINDOWS_ + munmap(ptr, size); +#endif // _OS_WINDOWS_ +} + +#ifdef _OS_WINDOWS_ +enum class Prot : int { + RW = PAGE_READWRITE, + RX = PAGE_EXECUTE, + RO = PAGE_READONLY +}; + +static void protect_page(void *ptr, size_t size, Prot flags) +{ + DWORD old_prot; + if (!VirtualProtect(ptr, size, (DWORD)flags, &old_prot)) { + jl_safe_printf("Cannot protect page @%p of size %u to 0x%x (err 0x%x)\n", + ptr, (unsigned)size, (unsigned)flags, + (unsigned)GetLastError()); + abort(); + } +} +#else // _OS_WINDOWS_ +enum class Prot : int { + RW = PROT_READ | PROT_WRITE, + RX = PROT_READ | PROT_EXEC, + RO = PROT_READ +}; + +static void protect_page(void *ptr, size_t size, Prot flags) +{ + int ret = mprotect(ptr, size, (int)flags); + if (ret != 0) { + perror(__func__); + abort(); + } +} + +static bool check_fd_or_close(int fd) +{ + if (fd == -1) + return false; + // This can fail due to `noexec` mount option .... + fcntl(fd, F_SETFD, FD_CLOEXEC); + fchmod(fd, S_IRWXU); + ftruncate(fd, jl_page_size); + void *ptr = mmap(nullptr, jl_page_size, PROT_READ | PROT_EXEC, + MAP_SHARED, fd, 0); + if (ptr == MAP_FAILED) { + close(fd); + return false; + } + munmap(ptr, jl_page_size); + return true; +} +#endif // _OS_WINDOWS_ + +static intptr_t anon_hdl = -1; + +#ifdef _OS_WINDOWS_ +// As far as I can tell `CreateFileMapping` cannot be resized on windows. +// Also, creating big file mapping and then map pieces of it seems to +// consume too much global resources. Therefore, we use each file mapping +// as a block on windows +static void *create_shared_map(size_t size, size_t id) +{ + void *addr = MapViewOfFile((HANDLE)id, FILE_MAP_ALL_ACCESS, + 0, 0, size); + assert(addr && "Cannot map RW view"); + return addr; +} + +static intptr_t init_shared_map() +{ + anon_hdl = 0; + return 0; +} + +static void *alloc_shared_page(size_t size, size_t *id, bool exec) +{ + assert(size % jl_page_size == 0); + auto file_mode = exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE; + HANDLE hdl = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, + file_mode, 0, size, NULL); + *id = (size_t)hdl; + auto map_mode = FILE_MAP_READ | (exec ? FILE_MAP_EXECUTE : 0); + void *addr = MapViewOfFile(hdl, map_mode, 0, 0, size); + assert(addr && "Cannot map RO view"); + return addr; +} +#else // _OS_WINDOWS_ +// For shared mapped region +static intptr_t get_anon_hdl(void) +{ + int fd = -1; + + // Linux and FreeBSD can create an anonymous fd without touching the + // file system. +# ifdef __NR_memfd_create + fd = syscall(__NR_memfd_create, "julia-codegen", MFD_CLOEXEC); + if (check_fd_or_close(fd)) + return fd; +# endif +# ifdef _OS_FREEBSD_ + fd = shm_open(SHM_ANON, O_RDWR, S_IRWXU); + if (check_fd_or_close(fd)) + return fd; +# endif + char shm_name[] = "julia-codegen-0123456789-0123456789/tmp///"; + pid_t pid = getpid(); + // `shm_open` can't be mapped exec on mac +# ifndef _OS_DARWIN_ + do { + snprintf(shm_name, sizeof(shm_name), + "julia-codegen-%d-%d", (int)pid, rand()); + fd = shm_open(shm_name, O_RDWR | O_CREAT | O_EXCL, S_IRWXU); + if (check_fd_or_close(fd)) { + shm_unlink(shm_name); + return fd; + } + } while (errno == EEXIST); +# endif + FILE *tmpf = tmpfile(); + if (tmpf) { + fd = dup(fileno(tmpf)); + fclose(tmpf); + if (check_fd_or_close(fd)) { + return fd; + } + } + snprintf(shm_name, sizeof(shm_name), + "/tmp/julia-codegen-%d-XXXXXX", (int)pid); + fd = mkstemp(shm_name); + if (check_fd_or_close(fd)) { + unlink(shm_name); + return fd; + } + return -1; +} + +static size_t map_offset = 0; +// Multiple of 128MB. +// Hopefully no one will set a ulimit for this to be a problem... +static constexpr size_t map_size_inc = 128 * 1024 * 1024; +static size_t map_size = 0; +static jl_mutex_t shared_map_lock; + +static void *create_shared_map(size_t size, size_t id) +{ + void *addr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, + anon_hdl, id); + assert(addr != MAP_FAILED && "Cannot map RW view"); + return addr; +} + +static intptr_t init_shared_map() +{ + anon_hdl = get_anon_hdl(); + if (anon_hdl == -1) + return -1; + map_offset = 0; + map_size = map_size_inc; + int ret = ftruncate(anon_hdl, map_size); + if (ret != 0) { + perror(__func__); + abort(); + } + return anon_hdl; +} + +static void *alloc_shared_page(size_t size, size_t *id, bool exec) +{ + assert(size % jl_page_size == 0); + size_t off = jl_atomic_fetch_add(&map_offset, size); + *id = off; + if (__unlikely(off + size > map_size)) { + JL_LOCK_NOGC(&shared_map_lock); + size_t old_size = map_size; + while (off + size > map_size) + map_size += map_size_inc; + if (old_size != map_size) { + int ret = ftruncate(anon_hdl, map_size); + if (ret != 0) { + perror(__func__); + abort(); + } + } + JL_UNLOCK_NOGC(&shared_map_lock); + } + return create_shared_map(size, off); +} +#endif // _OS_WINDOWS_ + +#ifdef _OS_LINUX_ +// Using `/proc/self/mem`, A.K.A. Keno's remote memory manager. + +static int self_mem_fd = -1; + +static int init_self_mem() +{ + int fd = open("/proc/self/mem", O_RDWR | O_SYNC | O_CLOEXEC); + if (fd == -1) + return -1; + // buffer to check if write works; + volatile uint64_t buff = 0; + uint64_t v = 0x12345678; + int ret = pwrite(fd, (void*)&v, sizeof(uint64_t), (uintptr_t)&buff); + if (ret != sizeof(uint64_t) || buff != 0x12345678) { + close(fd); + return -1; + } + self_mem_fd = fd; + return fd; +} + +static void write_self_mem(void *dest, void *ptr, size_t size) +{ + while (size > 0) { + ssize_t ret = pwrite(self_mem_fd, ptr, size, (uintptr_t)dest); + if (ret == size) + return; + if (ret == -1 && (errno == EAGAIN || errno == EINTR)) + continue; + assert(ret < size); + size -= ret; + ptr = (char*)ptr + ret; + dest = (char*)dest + ret; + } +} +#endif // _OS_LINUX_ + +using namespace llvm; + +// Allocation strategies +// * For RW data, no memory protection needed, use plain memory pool. +// * For RO data or code, +// +// The first allocation in the page always has write address equals to +// runtime address. +// +// 1. shared dual map +// +// Map an (unlinked) anonymous file as memory pool. +// After first allocation, write address points to the second map. +// The second map is set to unreadable and unwritable in finalization. +// +// 2. private dual map +// +// Same as above but use anonymous memory map as memory pool, +// and use low level OS api to set up the second map. +// +// 3. copying data into RO page bypassing page protection +// +// After first allocation, write address points to a temporary buffer. +// Requires copying data out of the temporary buffer in finalization. + +// Allocates at least 256 pages per block and keep up to 8 blocks in the free +// list. The block with the least free space is discarded when we need to +// allocate a new page. +// Unused full pages are free'd from the block before discarding so at most +// one page is wasted on each discarded blocks. There should be at most one +// block with more than 128 pages available so the discarded one must have +// less than 128 pages available and therefore at least 128 pages used. +// (Apart from fragmentation) this guarantees less than 1% of memory is wasted. + +// the `shared` type parameter is for Windows only.... +struct Block { + // runtime address + char *ptr{nullptr}; + size_t total{0}; + size_t avail{0}; + + Block(const Block&) = delete; + Block &operator=(const Block&) = delete; + + Block() = default; + + void *alloc(size_t size, size_t align) + { + size_t aligned_avail = avail & (-align); + if (aligned_avail < size) + return nullptr; + char *p = ptr + total - aligned_avail; + avail = aligned_avail - size; + return p; + } + void reset(void *addr, size_t size) + { + if (avail >= jl_page_size) { + uintptr_t end = uintptr_t(ptr) + total; + uintptr_t first_free = end - avail; + first_free = LLT_ALIGN(first_free, jl_page_size); + assert(first_free < end); + unmap_page((void*)first_free, end - first_free); + } + ptr = (char*)addr; + total = avail = size; + } +}; + +class RWAllocator { + static constexpr int nblocks = 8; + Block blocks[nblocks]{}; +public: + void *alloc(size_t size, size_t align) + { + size_t min_size = (size_t)-1; + int min_id = 0; + for (int i = 0;i < nblocks && blocks[i].ptr;i++) { + if (void *ptr = blocks[i].alloc(size, align)) + return ptr; + if (blocks[i].avail < min_size) { + min_size = blocks[i].avail; + min_id = i; + } + } + size_t block_size = get_block_size(size); + blocks[min_id].reset(map_anon_page(block_size), block_size); + return blocks[min_id].alloc(size, align); + } +}; + +struct SplitPtrBlock : public Block { + // Possible states + // Allocation: + // * Initial allocation: `state & InitAlloc` + // * Followup allocation: `(state & Alloc) && !(state & InitAlloc)` + enum State { + // This block has no page protection set yet + InitAlloc = (1 << 0), + // There is at least one allocation in this page since last finalization + Alloc = (1 << 1), + // `wr_ptr` can be directly used as write address. + WRInit = (1 << 2), + // With `WRInit` set, whether `wr_ptr` has write permission enabled. + WRReady = (1 << 3), + }; + + uintptr_t wr_ptr{0}; + uint32_t state{0}; + SplitPtrBlock() = default; + + void swap(SplitPtrBlock &other) + { + std::swap(ptr, other.ptr); + std::swap(total, other.total); + std::swap(avail, other.avail); + std::swap(wr_ptr, other.wr_ptr); + std::swap(state, other.state); + } + + SplitPtrBlock(SplitPtrBlock &&other) + : SplitPtrBlock() + { + swap(other); + } +}; + +struct Allocation { + // Address to write to (the one returned by the allocation function) + void *wr_addr; + // Runtime address + void *rt_addr; + size_t sz; + bool relocated; +}; + +template +class ROAllocator { +protected: + static constexpr int nblocks = 8; + SplitPtrBlock blocks[nblocks]; + // Blocks that are done allocating (removed from `blocks`) + // but might not have all the permissions set or data copied yet. + SmallVector completed; + virtual void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, + size_t size, size_t align) = 0; + virtual SplitPtrBlock alloc_block(size_t size) = 0; +public: + virtual ~ROAllocator() {} + virtual void finalize() + { + if (exec) { + for (auto &alloc: allocations) { + sys::Memory::InvalidateInstructionCache(alloc.rt_addr, + alloc.sz); + } + } + completed.clear(); + allocations.clear(); + } + // Allocations that have not been finalized yet. + SmallVector allocations; + void *alloc(size_t size, size_t align) + { + size_t min_size = (size_t)-1; + int min_id = 0; + for (int i = 0;i < nblocks && blocks[i].ptr;i++) { + auto &block = blocks[i]; + void *ptr = block.alloc(size, align); + if (ptr) { + void *wr_ptr; + if (block.state & SplitPtrBlock::InitAlloc) { + wr_ptr = ptr; + } + else { + wr_ptr = get_wr_ptr(block, ptr, size, align); + } + block.state |= SplitPtrBlock::Alloc; + allocations.push_back(Allocation{wr_ptr, ptr, size, false}); + return wr_ptr; + } + if (block.avail < min_size) { + min_size = block.avail; + min_id = i; + } + } + size_t block_size = get_block_size(size); + auto &block = blocks[min_id]; + auto new_block = alloc_block(block_size); + block.swap(new_block); + if (new_block.state) { + completed.push_back(std::move(new_block)); + } + else { + new_block.reset(nullptr, 0); + } + void *ptr = block.alloc(size, align); +#ifdef _OS_WINDOWS_ + block.state = SplitPtrBlock::Alloc; + void *wr_ptr = get_wr_ptr(block, ptr, size, align); + allocations.push_back(Allocation{wr_ptr, ptr, size, false}); + ptr = wr_ptr; +#else + block.state = SplitPtrBlock::Alloc | SplitPtrBlock::InitAlloc; + allocations.push_back(Allocation{ptr, ptr, size, false}); +#endif + return ptr; + } +}; + +template +class DualMapAllocator : public ROAllocator { +protected: + void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override + { + assert((char*)rt_ptr >= block.ptr && + (char*)rt_ptr < (block.ptr + block.total)); + if (!(block.state & SplitPtrBlock::WRInit)) { + block.wr_ptr = (uintptr_t)create_shared_map(block.total, + block.wr_ptr); + block.state |= SplitPtrBlock::WRInit; + } + if (!(block.state & SplitPtrBlock::WRReady)) { + protect_page((void*)block.wr_ptr, block.total, Prot::RW); + block.state |= SplitPtrBlock::WRReady; + } + return (char*)rt_ptr + (block.wr_ptr - uintptr_t(block.ptr)); + } + SplitPtrBlock alloc_block(size_t size) override + { + SplitPtrBlock new_block; + // use `wr_ptr` to record the id initially + auto ptr = alloc_shared_page(size, (size_t*)&new_block.wr_ptr, exec); + new_block.reset(ptr, size); + return new_block; + } + void finalize_block(SplitPtrBlock &block, bool reset) + { + // This function handles setting the block to the right mode + // and free'ing maps that are not needed anymore. + // If `reset` is `true`, we won't allocate in this block anymore and + // we should free up resources that is not needed at runtime. + if (!(block.state & SplitPtrBlock::Alloc)) { + // A block that is not used this time, check if we need to free it. + if ((block.state & SplitPtrBlock::WRInit) && reset) + unmap_page((void*)block.wr_ptr, block.total); + return; + } + // For a block we used this time + if (block.state & SplitPtrBlock::InitAlloc) { + // For an initial block, we have a single RW map. + // Need to map it to RO or RX. + assert(!(block.state & (SplitPtrBlock::WRReady | + SplitPtrBlock::WRInit))); + protect_page(block.ptr, block.total, exec ? Prot::RX : Prot::RO); + block.state = 0; + } + else { + // For other ones, the runtime address has the correct mode. + // Need to map the write address to RO. + assert(block.state & SplitPtrBlock::WRInit); + assert(block.state & SplitPtrBlock::WRReady); + if (reset) { + unmap_page((void*)block.wr_ptr, block.total); + } + else { + protect_page((void*)block.wr_ptr, block.total, Prot::RO); + block.state = SplitPtrBlock::WRInit; + } + } + } +public: + DualMapAllocator() + { + assert(anon_hdl != -1); + } + void finalize() override + { + for (auto &block : this->blocks) { + finalize_block(block, false); + } + for (auto &block : this->completed) { + finalize_block(block, true); + block.reset(nullptr, 0); + } + ROAllocator::finalize(); + } +}; + +#ifdef _OS_LINUX_ +template +class SelfMemAllocator : public ROAllocator { + SmallVector temp_buff; +protected: + void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, + size_t size, size_t align) override + { + assert(!(block.state & SplitPtrBlock::InitAlloc)); + for (auto &wr_block: temp_buff) { + if (void *ptr = wr_block.alloc(size, align)) { + return ptr; + } + } + temp_buff.emplace_back(); + Block &new_block = temp_buff.back(); + size_t block_size = get_block_size(size); + new_block.reset(map_anon_page(block_size), block_size); + return new_block.alloc(size, align); + } + SplitPtrBlock alloc_block(size_t size) override + { + SplitPtrBlock new_block; + new_block.reset(map_anon_page(size), size); + return new_block; + } + void finalize_block(SplitPtrBlock &block, bool reset) + { + if (!(block.state & SplitPtrBlock::Alloc)) + return; + if (block.state & SplitPtrBlock::InitAlloc) { + // for an initial block, we need to map it to ro or rx + assert(!(block.state & (SplitPtrBlock::WRReady | + SplitPtrBlock::WRInit))); + protect_page(block.ptr, block.total, exec ? Prot::RX : Prot::RO); + block.state = 0; + } + } +public: + SelfMemAllocator() + : ROAllocator(), + temp_buff() + { + assert(self_mem_fd != -1); + } + void finalize() override + { + for (auto &block : this->blocks) { + finalize_block(block, false); + } + for (auto &block : this->completed) { + finalize_block(block, true); + block.reset(nullptr, 0); + } + for (auto &alloc : this->allocations) { + if (alloc.rt_addr == alloc.wr_addr) + continue; + write_self_mem(alloc.rt_addr, alloc.wr_addr, alloc.sz); + } + // clear all the temp buffers except the first one + // (we expect only one) + bool cached = false; + for (auto &block : temp_buff) { + if (cached) { + munmap(block.ptr, block.total); + block.ptr = nullptr; + block.total = block.avail = 0; + } + else { + block.avail = block.total; + cached = true; + } + } + if (cached) + temp_buff.resize(1); + ROAllocator::finalize(); + } +}; +#endif // _OS_LINUX_ + +class RTDyldMemoryManagerJL : public SectionMemoryManager { + struct EHFrame { + uint8_t *addr; + size_t size; + }; + RTDyldMemoryManagerJL(const RTDyldMemoryManagerJL&) = delete; + void operator=(const RTDyldMemoryManagerJL&) = delete; + SmallVector pending_eh; + RWAllocator rw_alloc; + std::unique_ptr> ro_alloc; + std::unique_ptr> exe_alloc; + bool code_allocated; + +public: + RTDyldMemoryManagerJL() + : SectionMemoryManager(), + pending_eh(), + rw_alloc(), + ro_alloc(), + exe_alloc(), + code_allocated(false) + { +#ifdef _OS_LINUX_ + if (!ro_alloc && init_self_mem() != -1) { + ro_alloc.reset(new SelfMemAllocator()); + exe_alloc.reset(new SelfMemAllocator()); + } +#endif + if (!ro_alloc && init_shared_map() != -1) { + ro_alloc.reset(new DualMapAllocator()); + exe_alloc.reset(new DualMapAllocator()); + } + } + ~RTDyldMemoryManagerJL() override + { + } + void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, + size_t Size) override; + void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, + size_t Size) override; + uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, + StringRef SectionName) override; + uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, StringRef SectionName, + bool isReadOnly) override; +#ifdef LLVM38 + void notifyObjectLoaded(RuntimeDyld &Dyld, + const object::ObjectFile &Obj) override; +#endif + bool finalizeMemory(std::string *ErrMsg = nullptr) override; + template + void mapAddresses(DL &Dyld, Alloc &&allocator) + { + for (auto &alloc: allocator->allocations) { + if (alloc.rt_addr == alloc.wr_addr || alloc.relocated) + continue; + alloc.relocated = true; + Dyld.mapSectionAddress(alloc.wr_addr, (uintptr_t)alloc.rt_addr); + } + } + template + void mapAddresses(DL &Dyld) + { + if (!ro_alloc) + return; + mapAddresses(Dyld, ro_alloc); + mapAddresses(Dyld, exe_alloc); + } +#ifdef _OS_WINDOWS_ + template + void *lookupWriteAddressFor(void *rt_addr, Alloc &&allocator) + { + for (auto &alloc: allocator->allocations) { + if (alloc.rt_addr == rt_addr) { + return alloc.wr_addr; + } + } + return nullptr; + } + void *lookupWriteAddressFor(void *rt_addr) + { + if (!ro_alloc) + return rt_addr; + if (void *ptr = lookupWriteAddressFor(rt_addr, ro_alloc)) + return ptr; + if (void *ptr = lookupWriteAddressFor(rt_addr, exe_alloc)) + return ptr; + return rt_addr; + } +#endif // _OS_WINDOWS_ +}; + +uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size, + unsigned Alignment, + unsigned SectionID, + StringRef SectionName) +{ + // allocating more than one code section can confuse libunwind. + assert(!code_allocated); + code_allocated = true; + if (exe_alloc) + return (uint8_t*)exe_alloc->alloc(Size, Alignment); + return SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID, + SectionName); +} + +uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size, + unsigned Alignment, + unsigned SectionID, + StringRef SectionName, + bool isReadOnly) +{ + if (!isReadOnly) + return (uint8_t*)rw_alloc.alloc(Size, Alignment); + if (ro_alloc) + return (uint8_t*)ro_alloc->alloc(Size, Alignment); + return SectionMemoryManager::allocateDataSection(Size, Alignment, SectionID, + SectionName, isReadOnly); +} + +#ifdef LLVM38 +void RTDyldMemoryManagerJL::notifyObjectLoaded(RuntimeDyld &Dyld, + const object::ObjectFile &Obj) +{ + if (!ro_alloc) { + assert(!exe_alloc); + SectionMemoryManager::notifyObjectLoaded(Dyld, Obj); + return; + } + assert(exe_alloc); + mapAddresses(Dyld); +} +#endif + +bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg) +{ + code_allocated = false; + if (ro_alloc) { + ro_alloc->finalize(); + assert(exe_alloc); + exe_alloc->finalize(); + for (auto &frame: pending_eh) + register_eh_frames(frame.addr, frame.size); + pending_eh.clear(); + return false; + } + else { + assert(!exe_alloc); + return SectionMemoryManager::finalizeMemory(ErrMsg); + } +} + +void RTDyldMemoryManagerJL::registerEHFrames(uint8_t *Addr, + uint64_t LoadAddr, + size_t Size) +{ + if (uintptr_t(Addr) == LoadAddr) { + register_eh_frames(Addr, Size); + } + else { + pending_eh.push_back(EHFrame{(uint8_t*)(uintptr_t)LoadAddr, Size}); + } +} + +void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr, + uint64_t LoadAddr, + size_t Size) +{ + deregister_eh_frames((uint8_t*)LoadAddr, Size); +} + +} + +#ifndef LLVM38 +void notifyObjectLoaded(RTDyldMemoryManager *memmgr, + llvm::orc::ObjectLinkingLayerBase::ObjSetHandleT H) +{ + ((RTDyldMemoryManagerJL*)memmgr)->mapAddresses(**H); +} +#endif + +#ifdef _OS_WINDOWS_ +void *lookupWriteAddressFor(RTDyldMemoryManager *memmgr, void *rt_addr) +{ + return ((RTDyldMemoryManagerJL*)memmgr)->lookupWriteAddressFor(rt_addr); +} +#endif + +#else // LLVM37 +typedef SectionMemoryManager RTDyldMemoryManagerJL; +#endif // LLVM37 + +RTDyldMemoryManager* createRTDyldMemoryManager() +{ + return new RTDyldMemoryManagerJL(); +} +#endif // USE_MCJIT diff --git a/src/codegen.cpp b/src/codegen.cpp index 30aaa00312cc3..869fc5e21ee97 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -340,9 +340,6 @@ static GlobalVariable *jldll_var; JITMemoryManager *createJITMemoryManagerWin(); #endif #endif //_OS_WINDOWS_ -#ifdef USE_MCJIT -RTDyldMemoryManager *createRTDyldMemoryManager(); -#endif static Function *jltls_states_func; #ifndef JULIA_ENABLE_THREADING diff --git a/src/codegen_internal.h b/src/codegen_internal.h index f939edc118807..07388aa5dcb31 100644 --- a/src/codegen_internal.h +++ b/src/codegen_internal.h @@ -1,5 +1,11 @@ // This file is a part of Julia. License is MIT: http://julialang.org/license +#if defined(LLVM38) && !defined(LLVM37) +# include +void notifyObjectLoaded(RTDyldMemoryManager *memmgr, + llvm::orc::ObjectLinkingLayerBase::ObjSetHandleT H); +#endif + // Declarations for disasm.cpp extern "C" void jl_dump_asm_internal(uintptr_t Fptr, size_t Fsize, int64_t slide, @@ -28,8 +34,16 @@ extern bool jl_dylib_DI_for_fptr(size_t pointer, const object::ObjectFile **obje bool onlySysImg, bool *isSysImg, void **saddr, char **name, char **filename); #ifdef USE_ORCJIT -extern JL_DLLEXPORT void ORCNotifyObjectEmitted(JITEventListener *Listener, - const object::ObjectFile &obj, - const object::ObjectFile &debugObj, - const RuntimeDyld::LoadedObjectInfo &L); +JL_DLLEXPORT void ORCNotifyObjectEmitted(JITEventListener *Listener, + const object::ObjectFile &obj, + const object::ObjectFile &debugObj, + const RuntimeDyld::LoadedObjectInfo &L, + RTDyldMemoryManager *memmgr); +#ifdef _OS_WINDOWS_ +void *lookupWriteAddressFor(RTDyldMemoryManager *memmgr, void *rt_addr); +#endif +#endif + +#ifdef USE_MCJIT +RTDyldMemoryManager* createRTDyldMemoryManager(void); #endif diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp index a62e6a81a8721..10184e317c85e 100644 --- a/src/debuginfo.cpp +++ b/src/debuginfo.cpp @@ -298,12 +298,13 @@ class JuliaJITEventListener: public JITEventListener virtual void NotifyObjectEmitted(const object::ObjectFile &obj, const RuntimeDyld::LoadedObjectInfo &L) { - return _NotifyObjectEmitted(obj,obj,L); + return _NotifyObjectEmitted(obj,obj,L,nullptr); } virtual void _NotifyObjectEmitted(const object::ObjectFile &obj, - const object::ObjectFile &debugObj, - const RuntimeDyld::LoadedObjectInfo &L) + const object::ObjectFile &debugObj, + const RuntimeDyld::LoadedObjectInfo &L, + RTDyldMemoryManager *memmgr) #else virtual void NotifyObjectEmitted(const ObjectImage &obj) #endif @@ -340,6 +341,7 @@ class JuliaJITEventListener: public JITEventListener uint64_t SectionAddrCheck = 0; // assert that all of the Sections are at the same location uint8_t *UnwindData = NULL; #if defined(_CPU_X86_64_) + uint64_t SectionLoadOffset = 1; // The real offset shouldn't be 1. uint8_t *catchjmp = NULL; for (const object::SymbolRef &sym_iter : debugObj.symbols()) { StringRef sName; @@ -400,25 +402,37 @@ class JuliaJITEventListener: public JITEventListener assert(SectionAddrCheck == SectionLoadAddr); else SectionAddrCheck = SectionLoadAddr; +#ifdef USE_ORCJIT + if (memmgr) + SectionAddr = + (uintptr_t)lookupWriteAddressFor(memmgr, + (void*)SectionLoadAddr); +#endif + if (SectionLoadOffset != 1) + assert(SectionLoadOffset == SectionAddr - SectionLoadAddr); + else + SectionLoadOffset = SectionAddr - SectionLoadAddr; } } assert(catchjmp); assert(UnwindData); assert(SectionAddrCheck); - catchjmp[0] = 0x48; - catchjmp[1] = 0xb8; // mov RAX, QWORD PTR [&_seh_exception_handle] - *(uint64_t*)(&catchjmp[2]) = (uint64_t)&_seh_exception_handler; - catchjmp[10] = 0xff; - catchjmp[11] = 0xe0; // jmp RAX - UnwindData[0] = 0x09; // version info, UNW_FLAG_EHANDLER - UnwindData[1] = 4; // size of prolog (bytes) - UnwindData[2] = 2; // count of unwind codes (slots) - UnwindData[3] = 0x05; // frame register (rbp) = rsp - UnwindData[4] = 4; // second instruction - UnwindData[5] = 0x03; // mov RBP, RSP - UnwindData[6] = 1; // first instruction - UnwindData[7] = 0x50; // push RBP - *(DWORD*)&UnwindData[8] = (DWORD)(catchjmp - (uint8_t*)SectionAddrCheck); // relative location of catchjmp + assert(SectionLoadOffset != 1); + catchjmp[SectionLoadOffset] = 0x48; + catchjmp[SectionLoadOffset + 1] = 0xb8; // mov RAX, QWORD PTR [&_seh_exception_handle] + *(uint64_t*)(&catchjmp[SectionLoadOffset + 2]) = + (uint64_t)&_seh_exception_handler; + catchjmp[SectionLoadOffset + 10] = 0xff; + catchjmp[SectionLoadOffset + 11] = 0xe0; // jmp RAX + UnwindData[SectionLoadOffset] = 0x09; // version info, UNW_FLAG_EHANDLER + UnwindData[SectionLoadOffset + 1] = 4; // size of prolog (bytes) + UnwindData[SectionLoadOffset + 2] = 2; // count of unwind codes (slots) + UnwindData[SectionLoadOffset + 3] = 0x05; // frame register (rbp) = rsp + UnwindData[SectionLoadOffset + 4] = 4; // second instruction + UnwindData[SectionLoadOffset + 5] = 0x03; // mov RBP, RSP + UnwindData[SectionLoadOffset + 6] = 1; // first instruction + UnwindData[SectionLoadOffset + 7] = 0x50; // push RBP + *(DWORD*)&UnwindData[SectionLoadOffset + 8] = (DWORD)(catchjmp - (uint8_t*)SectionAddrCheck); // relative location of catchjmp #endif // defined(_OS_X86_64_) #endif // defined(_OS_WINDOWS_) @@ -606,9 +620,10 @@ class JuliaJITEventListener: public JITEventListener JL_DLLEXPORT void ORCNotifyObjectEmitted(JITEventListener *Listener, const object::ObjectFile &obj, const object::ObjectFile &debugObj, - const RuntimeDyld::LoadedObjectInfo &L) + const RuntimeDyld::LoadedObjectInfo &L, + RTDyldMemoryManager *memmgr) { - ((JuliaJITEventListener*)Listener)->_NotifyObjectEmitted(obj,debugObj,L); + ((JuliaJITEventListener*)Listener)->_NotifyObjectEmitted(obj,debugObj,L,memmgr); } #endif @@ -1665,7 +1680,7 @@ void register_eh_frames(uint8_t *Addr, size_t Size) if (start < start_ip) start_ip = start; if (end_ip < (start + size)) - end_ip = start+size; + end_ip = start + size; table[cur_entry].fde_offset = safe_trunc((intptr_t)Entry - (intptr_t)Addr); start_ips[cur_entry] = start; diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 69bab17e9f9a2..cd1203b2586de 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -131,44 +131,13 @@ static void addOptimizationPasses(T *PM) //PM->add(createCFGSimplificationPass()); // Merge & remove BBs } -#ifdef USE_MCJIT -#ifdef LLVM37 -class RTDyldMemoryManagerJL : public SectionMemoryManager { - RTDyldMemoryManagerJL(const RTDyldMemoryManagerJL&) = delete; - void operator=(const RTDyldMemoryManagerJL&) = delete; - -public: - RTDyldMemoryManagerJL() {}; - ~RTDyldMemoryManagerJL() override {}; - void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override; - void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override; -}; - -void RTDyldMemoryManagerJL::registerEHFrames(uint8_t *Addr, - uint64_t LoadAddr, - size_t Size) -{ - register_eh_frames(Addr, Size); -} - -void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr, - uint64_t LoadAddr, - size_t Size) -{ - deregister_eh_frames(Addr, Size); -} -#else -typedef SectionMemoryManager RTDyldMemoryManagerJL; -#endif +#ifdef USE_ORCJIT -RTDyldMemoryManager* createRTDyldMemoryManager() -{ - return new RTDyldMemoryManagerJL(); -} +#ifndef LLVM38 +void notifyObjectLoaded(RTDyldMemoryManager *memmgr, + llvm::orc::ObjectLinkingLayerBase::ObjSetHandleT H); #endif -#ifdef USE_ORCJIT - // ------------------------ TEMPORARILY COPIED FROM LLVM ----------------- // This must be kept in sync with gdb/gdb/jit.h . extern "C" { @@ -259,6 +228,9 @@ class JuliaOJIT { void operator()(ObjectLinkingLayerBase::ObjSetHandleT H, const ObjSetT &Objects, const LoadResult &LOS) { +#ifndef LLVM38 + notifyObjectLoaded(JIT.MemMgr, H); +#endif auto oit = Objects.begin(); auto lit = LOS.begin(); for (; oit != Objects.end(); ++oit, ++lit) { @@ -290,7 +262,7 @@ class JuliaOJIT { ORCNotifyObjectEmitted(JuliaListener.get(), *Object, *SavedObjects.back().getBinary(), - *LO); + *LO, JIT.MemMgr); // record all of the exported symbols defined in this object // in the primary hash table for the enclosing JIT @@ -416,7 +388,7 @@ class JuliaOJIT { } - ModuleHandleT addModule(std::unique_ptr M) + void addModule(std::unique_ptr M) { #ifndef NDEBUG // validate the relocations for M @@ -461,7 +433,6 @@ class JuliaOJIT { // Force LLVM to emit the module so that we can register the symbols // in our lookup table. CompileLayer.emitAndFinalize(modset); - return modset; } void removeModule(ModuleHandleT H)