diff --git a/Make.inc b/Make.inc index bb1922c32bc44..7c1ca6a5db7a8 100644 --- a/Make.inc +++ b/Make.inc @@ -86,6 +86,9 @@ HAVE_SSP := 0 WITH_GC_VERIFY := 0 WITH_GC_DEBUG_ENV := 0 +# MMTk GC +WITH_MMTK ?= 0 + # Enable DTrace support WITH_DTRACE := 0 @@ -709,6 +712,29 @@ JCXXFLAGS += -DGC_DEBUG_ENV JCFLAGS += -DGC_DEBUG_ENV endif +ifeq ($(WITH_MMTK), 1) +ifeq (${MMTK_JULIA_DIR},) +$(error MMTK_JULIA_DIR must be set to use MMTk) +endif +JCXXFLAGS += -DMMTK_GC +JCFLAGS += -DMMTK_GC +ifeq (${MMTK_BUILD},) +ifeq (debug,$(findstring debug,$(MAKECMDGOALS))) +MMTK_BUILD = debug +else +MMTK_BUILD = release +endif +endif +MMTK_DIR = ${MMTK_JULIA_DIR}/mmtk +MMTK_API_INC = $(MMTK_DIR)/api +MMTK_JULIA_INC = ${MMTK_JULIA_DIR}/julia +MMTK_LIB := -L$(MMTK_DIR)/target/$(MMTK_BUILD) -lmmtk_julia +LDFLAGS += -Wl,-rpath=$(MMTK_DIR)/target/$(MMTK_BUILD)/ +else +MMTK_JULIA_INC := +MMTK_LIB := +endif + ifeq ($(WITH_DTRACE), 1) JCXXFLAGS += -DUSE_DTRACE JCFLAGS += -DUSE_DTRACE diff --git a/contrib/refresh_checksums.mk b/contrib/refresh_checksums.mk index fc632728e9a9e..664a1e4b038e0 100644 --- a/contrib/refresh_checksums.mk +++ b/contrib/refresh_checksums.mk @@ -24,7 +24,7 @@ CLANG_TRIPLETS=$(filter %-darwin %-freebsd,$(TRIPLETS)) NON_CLANG_TRIPLETS=$(filter-out %-darwin %-freebsd,$(TRIPLETS)) # These are the projects currently using BinaryBuilder; both GCC-expanded and non-GCC-expanded: -BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline +BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline libmmtk_julia BB_GCC_EXPANDED_PROJECTS=openblas csl BB_CXX_EXPANDED_PROJECTS=gmp llvm clang llvm-tools lld # These are non-BB source-only deps diff --git a/src/Makefile b/src/Makefile index d113eea5422a5..2e976282015d6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -4,17 +4,6 @@ BUILDDIR := . include $(JULIAHOME)/Make.inc include $(JULIAHOME)/deps/llvm-ver.make -ifeq ($(USE_MMTK), 1) -CFLAGS = -DMMTKHEAP -CPPFLAGS = -DMMTKHEAP -MMTK_BUILD_TYPE = ${MMTK_BUILD} -MMTK_DIR = ${MMTK_JULIA_DIR} -MMTK_API_DIR_INCLUDE = $(MMTK_DIR)/api -MMTK_JULIA_DIR_INCLUDE = $(MMTK_DIR)/../julia -MMTK_LIB := -L$(MMTK_DIR)/target/$(MMTK_BUILD_TYPE)/ -lmmtk_julia -LDFLAGS += -Wl,-rpath=$(MMTK_DIR)/target/$(MMTK_BUILD_TYPE)/ -endif - JCFLAGS += $(CFLAGS) JCXXFLAGS += $(CXXFLAGS) JCPPFLAGS += $(CPPFLAGS) @@ -30,10 +19,6 @@ FLAGS := \ -I$(LIBUV_INC) -I$(build_includedir) \ -I$(JULIAHOME)/deps/valgrind -ifeq ($(USE_MMTK), 1) -FLAGS += -I$(MMTK_API_DIR_INCLUDE) -I$(MMTK_JULIA_DIR_INCLUDE) -endif - FLAGS += -Wall -Wno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden -fno-common \ -Wno-comment -Wpointer-arith -Wundef ifeq ($(USEGCC),1) # GCC bug #25509 (void)__attribute__((warn_unused_result)) @@ -45,6 +30,10 @@ ifeq ($(USECLANG),1) FLAGS += -Wno-return-type-c-linkage endif +ifeq ($(WITH_MMTK), 1) +FLAGS += -I$(MMTK_API_INC) -I$(MMTK_JULIA_INC) +endif + FLAGS += -DJL_BUILD_ARCH='"$(ARCH)"' ifeq ($(OS),WINNT) FLAGS += -DJL_BUILD_UNAME='"NT"' @@ -60,9 +49,10 @@ SRCS := \ jltypes gf typemap smallintset ast builtins module interpreter symbol \ dlload sys init task array staticdata toplevel jl_uv datatype \ simplevector runtime_intrinsics precompile jloptions \ - threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \ - jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \ - crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall + threading partr stackwalk gc-common gc gc-debug gc-pages gc-stacks gc-alloc-profiler \ + mmtk-gc method jlapi signal-handling safepoint timing subtype rtutils \ + gc-heap-snapshot crc32c APInt-C processor ircode opaque_closure codegen-stubs \ + coverage runtime_ccall RT_LLVMLINK := CG_LLVMLINK := @@ -173,13 +163,8 @@ LIBJULIA_PATH_REL := libjulia endif COMMON_LIBPATHS := -L$(build_libdir) -L$(build_shlibdir) -RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) -CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) - -ifeq ($(USE_MMTK), 1) -CG_LIBS += $(MMTK_LIB) -RT_LIBS += $(MMTK_LIB) -endif +RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) $(MMTK_LIB) +CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) $(MMTK_LIB) RT_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp-debug.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport-debug.a -ljulia-debug $(RT_LIBS) CG_DEBUG_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia-debug -ljulia-internal-debug @@ -189,10 +174,13 @@ CG_RELEASE_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia -ljulia-internal OBJS := $(SRCS:%=$(BUILDDIR)/%.o) DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj) -ifeq ($(USE_MMTK), 1) +ifeq ($(WITH_MMTK), 1) MMTK_SRCS := mmtk_julia -MMTK_OBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_DIR_INCLUDE)/%.o) -MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_DIR_INCLUDE)/%.o) +MMTK_OBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.o) +MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.dbg.obj) +else +MMTK_OBJS := +MMTK_DOBJS := endif CODEGEN_OBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.o) @@ -262,10 +250,10 @@ $(BUILDDIR)/%.o : $(SRCDIR)/%.d $(BUILDDIR)/%.dbg.obj : $(SRCDIR)/%.d @$(call PRINT_DTRACE, $(DTRACE) -G -s $< -o $@) -ifeq ($(USE_MMTK), 1) -$(MMTK_JULIA_DIR_INCLUDE)/%.o: $(MMTK_JULIA_DIR_INCLUDE)/%.c $(HEADERS) | $(MMTK_JULIA_DIR_INCLUDE) +ifeq ($(WITH_MMTK), 1) +$(MMTK_JULIA_INC)/%.o: $(MMTK_JULIA_INC)/%.c $(HEADERS) | $(MMTK_JULIA_INC) @$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@) -$(MMTK_JULIA_DIR_INCLUDE)/%.dbg.obj: $(MMTK_JULIA_DIR_INCLUDE)/%.c $(HEADERS) | $(MMTK_JULIA_DIR_INCLUDE) +$(MMTK_JULIA_INC)/%.dbg.obj: $(MMTK_JULIA_INC)/%.c $(HEADERS) | $(MMTK_JULIA_INC) @$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -c $< -o $@) endif @@ -333,6 +321,8 @@ $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR) $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h +$(BUILDDIR)/mmtk-gc.o $(BUILDDIR)/mmtk-gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h +$(BUILDDIR)/gc-common.o $(BUILDDIR)/gc-common.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h $(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-alloc-profiler.h $(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h @@ -398,7 +388,6 @@ $(BUILDDIR)/julia_version.h: $(JULIAHOME)/VERSION CXXLD = $(CXX) -shared -ifeq ($(USE_MMTK), 1) $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(OBJS) $(MMTK_OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV) @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(MMTK_OBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT))) @@ -410,19 +399,6 @@ $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR) $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@ $(DSYMUTIL) $@ -else -$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV) - @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(RPATH_LIB) -o $@ \ - $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT))) - @$(INSTALL_NAME_CMD)libjulia-internal.$(SHLIB_EXT) $@ - $(DSYMUTIL) $@ - -$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV) - @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(RPATH_LIB) -o $@ \ - $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))) - @$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@ - $(DSYMUTIL) $@ -endif ifneq ($(OS), WINNT) $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT): $(build_shlibdir)/libjulia-internal%.$(JL_MAJOR_SHLIB_EXT): \ @@ -464,20 +440,11 @@ libjulia-codegen-release: $(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SH libjulia-codegen-debug: $(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT) libjulia-codegen-debug libjulia-codegen-release: $(PUBLIC_HEADER_TARGETS) -ifeq ($(USE_MMTK), 1) clean: -rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest* -rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc -rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a - -rm -f $(BUILDDIR)/julia_version.h - -rm -fr $(MMTK_JULIA_DIR_INCLUDE)/*.o -else -clean: - -rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libjulia-codegen* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest* - -rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc $(BUILDDIR)/jl_internal_funcs.inc - -rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a $(BUILDDIR)/*.h.gen - -rm -f $(BUILDDIR)/julia_version.h -endif + -rm -f $(BUILDDIR)/julia_version.h $(MMTK_OBJS) $(MMTK_DOBJS) clean-flisp: -$(MAKE) -C $(SRCDIR)/flisp clean BUILDDIR='$(abspath $(BUILDDIR)/flisp)' diff --git a/src/array.c b/src/array.c index f515f5d26c024..c6cefbebceb20 100644 --- a/src/array.c +++ b/src/array.c @@ -497,7 +497,7 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) jl_ptls_t ptls = ct->ptls; const size_t allocsz = sz + sizeof(jl_taggedvalue_t); if (sz <= GC_MAX_SZCLASS) { -#ifndef MMTKHEAP +#ifndef MMTK_GC int pool_id = jl_gc_szclass_align8(allocsz); jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id]; int osize = jl_gc_sizeclasses[pool_id]; @@ -513,7 +513,7 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) else { if (allocsz < sz) // overflow in adding offs, size was "negative" jl_throw(jl_memory_exception); -#ifndef MMTKHEAP +#ifndef MMTK_GC s = jl_gc_big_alloc_noinline(ptls, allocsz); #else s = jl_mmtk_gc_alloc_big(ptls, allocsz); diff --git a/src/gc-common.c b/src/gc-common.c new file mode 100644 index 0000000000000..f5636c97fe32a --- /dev/null +++ b/src/gc-common.c @@ -0,0 +1,732 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#include "gc.h" + +jl_gc_num_t gc_num = {0}; +size_t last_long_collect_interval; +int gc_n_threads; +jl_ptls_t* gc_all_tls_states; + +int64_t live_bytes = 0; + +JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0; + +// mutex for gc-heap-snapshot. +jl_mutex_t heapsnapshot_lock; + +const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 +JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) +{ + return jl_buff_tag; +} + +// GC knobs and self-measurement variables + +int64_t last_gc_total_bytes = 0; + +// max_total_memory is a suggestion. We try very hard to stay +// under this limit, but we will go above it rather than halting. +#ifdef _P64 +typedef uint64_t memsize_t; +const size_t default_collect_interval = 5600 * 1024 * sizeof(void*); +const size_t max_collect_interval = 1250000000UL; +size_t total_mem; +// We expose this to the user/ci as jl_gc_set_max_memory +memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024; +#else +typedef uint32_t memsize_t; +const size_t default_collect_interval = 3200 * 1024 * sizeof(void*); +const size_t max_collect_interval = 500000000UL; +// Work really hard to stay within 2GB +// Alternative is to risk running out of address space +// on 32 bit architectures. +memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024; +#endif + + +// finalizers +// --- +uint64_t finalizer_rngState[4]; + +JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void) +{ + jl_rng_split(finalizer_rngState, jl_current_task->rngState); +} + +void run_finalizer(jl_task_t *ct, void *o, void *ff) +{ + int ptr_finalizer = gc_ptr_tag(o, 1); + o = gc_ptr_clear_tag(o, 3); + if (ptr_finalizer) { + ((void (*)(void*))ff)((void*)o); + return; + } + JL_TRY { + size_t last_age = ct->world_age; + ct->world_age = jl_atomic_load_acquire(&jl_world_counter); + jl_apply_generic((jl_value_t*)ff, (jl_value_t**)&o, 1); + ct->world_age = last_age; + } + JL_CATCH { + jl_printf((JL_STREAM*)STDERR_FILENO, "error in running finalizer: "); + jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception()); + jl_printf((JL_STREAM*)STDERR_FILENO, "\n"); + jlbacktrace(); // written to STDERR_FILENO + } +} + +JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls) +{ + if (ptls == NULL) + ptls = jl_current_task->ptls; + return ptls->finalizers_inhibited; +} + +JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + ptls->finalizers_inhibited++; +} + +JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void) +{ + jl_task_t *ct = jl_current_task; +#ifdef NDEBUG + ct->ptls->finalizers_inhibited--; +#else + jl_gc_enable_finalizers(ct, 1); +#endif +} + +JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on) +{ + if (ct == NULL) + ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + int old_val = ptls->finalizers_inhibited; + int new_val = old_val + (on ? -1 : 1); + if (new_val < 0) { + JL_TRY { + jl_error(""); // get a backtrace + } + JL_CATCH { + jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n"); + // Only print the backtrace once, to avoid spamming the logs + static int backtrace_printed = 0; + if (backtrace_printed == 0) { + backtrace_printed = 1; + jlbacktrace(); // written to STDERR_FILENO + } + } + return; + } + ptls->finalizers_inhibited = new_val; + if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers)) { + jl_gc_run_pending_finalizers(ct); + } +} + + +// allocation +// --- + +JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc_(ptls, sz, ty); +} + +// Instrumented version of jl_gc_big_alloc_inner, called into by +// LLVM-generated code. +JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz) +{ + jl_value_t *val = jl_gc_big_alloc_inner(ptls, sz); + + maybe_record_alloc_to_profile(val, sz, jl_gc_unknown_type_tag); + return val; +} + +// This wrapper exists only to prevent `jl_gc_big_alloc_inner` from being +// inlined into its callers. We provide an external-facing interface for +// callers, and inline `jl_gc_big_alloc_inner` into this. (See +// https://github.com/JuliaLang/julia/pull/43868 for more details.) +jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t sz) +{ + return jl_gc_big_alloc_inner(ptls, sz); +} + +// Instrumented version of jl_gc_pool_alloc_inner, called into by LLVM-generated code. +JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset, int osize) +{ + jl_value_t *val = jl_gc_pool_alloc_inner(ptls, pool_offset, osize); + + maybe_record_alloc_to_profile(val, osize, jl_gc_unknown_type_tag); + return val; +} + +// This wrapper exists only to prevent `jl_gc_pool_alloc_inner` from being inlined into +// its callers. We provide an external-facing interface for callers, and inline `jl_gc_pool_alloc_inner` +// into this. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) +jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize) +{ + return jl_gc_pool_alloc_inner(ptls, pool_offset, osize); +} + +int jl_gc_classify_pools(size_t sz, int *osize) +{ + if (sz > GC_MAX_SZCLASS) + return -1; + size_t allocsz = sz + sizeof(jl_taggedvalue_t); + int klass = jl_gc_szclass(allocsz); + *osize = jl_gc_sizeclasses[klass]; + return (int)(intptr_t)(&((jl_ptls_t)0)->heap.norm_pools[klass]); +} + +// TODO: jl_gc_track_malloced_array needed? Eliminate heap.mallocarrays, +// heap.mafreelist, mallocarray_t? +void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT +{ + // This is **NOT** a GC safe point. + mallocarray_t *ma; + if (ptls->heap.mafreelist == NULL) { + ma = (mallocarray_t*)malloc_s(sizeof(mallocarray_t)); + } + else { + ma = ptls->heap.mafreelist; + ptls->heap.mafreelist = ma->next; + } + ma->a = a; + ma->next = ptls->heap.mallocarrays; + ptls->heap.mallocarrays = ma; +} + +void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT +{ + jl_ptls_t ptls = jl_current_task->ptls; + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); +} + + +// GCNum, statistics manipulation +// --- +void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT +{ + int gc_n_threads; + jl_ptls_t* gc_all_tls_states; + gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); + gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls = gc_all_tls_states[i]; + if (ptls) { + dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval); + dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.freed); + dest->malloc += jl_atomic_load_relaxed(&ptls->gc_num.malloc); + dest->realloc += jl_atomic_load_relaxed(&ptls->gc_num.realloc); + dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_num.poolalloc); + dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc); + dest->freecall += jl_atomic_load_relaxed(&ptls->gc_num.freecall); + } + } +} + +void reset_thread_gc_counts(void) JL_NOTSAFEPOINT +{ + int gc_n_threads; + jl_ptls_t* gc_all_tls_states; + gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); + gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls = gc_all_tls_states[i]; + if (ptls) { + memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); + jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); + } + } +} + +void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT +{ + combine_thread_gc_counts(&gc_num); + live_bytes += (gc_num.deferred_alloc + gc_num.allocd); + gc_num.allocd = 0; + gc_num.deferred_alloc = 0; + reset_thread_gc_counts(); +} + +size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT +{ + size_t sz = 0; + int isbitsunion = jl_array_isbitsunion(a); + if (jl_array_ndims(a) == 1) + sz = a->elsize * a->maxsize + ((a->elsize == 1 && !isbitsunion) ? 1 : 0); + else + sz = a->elsize * jl_array_len(a); + if (isbitsunion) + // account for isbits Union array selector bytes + sz += jl_array_len(a); + return sz; +} + + +void gc_premark(jl_ptls_t ptls2) +{ + arraylist_t *remset = ptls2->heap.remset; + ptls2->heap.remset = ptls2->heap.last_remset; + ptls2->heap.last_remset = remset; + ptls2->heap.remset->len = 0; + ptls2->heap.remset_nptr = 0; + // avoid counting remembered objects + // in `perm_scanned_bytes` + size_t len = remset->len; + void **items = remset->items; + for (size_t i = 0; i < len; i++) { + jl_value_t *item = (jl_value_t *)items[i]; + objprofile_count(jl_typeof(item), 2, 0); + jl_astaggedvalue(item)->bits.gc = GC_OLD_MARKED; + } +} + + + +// GC control +// --- + +_Atomic(uint32_t) jl_gc_disable_counter = 1; + +JL_DLLEXPORT int jl_gc_enable(int on) +{ + jl_ptls_t ptls = jl_current_task->ptls; + int prev = !ptls->disable_gc; + ptls->disable_gc = (on == 0); + if (on && !prev) { + // disable -> enable + if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { + gc_num.allocd += gc_num.deferred_alloc; + gc_num.deferred_alloc = 0; + enable_collection(); + } + } + else if (prev && !on) { + disable_collection(); + // enable -> disable + jl_atomic_fetch_add(&jl_gc_disable_counter, 1); + // check if the GC is running and wait for it to finish + jl_gc_safepoint_(ptls); + } + return prev; +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + +JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT +{ + jl_gc_num_t num = gc_num; + combine_thread_gc_counts(&num); + // Sync this logic with `base/util.jl:GC_Diff` + *bytes = (num.total_allocd + num.deferred_alloc + num.allocd); +} + +JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) +{ + return gc_num.total_time; +} + +JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) +{ + jl_gc_num_t num = gc_num; + combine_thread_gc_counts(&num); + return num; +} + +JL_DLLEXPORT void jl_gc_reset_stats(void) +{ + gc_num.max_pause = 0; + gc_num.max_memory = 0; + gc_num.max_time_to_safepoint = 0; +} + +// TODO: these were supposed to be thread local +JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT +{ + int64_t oldtb = last_gc_total_bytes; + int64_t newtb; + jl_gc_get_total_bytes(&newtb); + last_gc_total_bytes = newtb; + return newtb - oldtb; +} + +JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT +{ + int64_t oldtb = last_gc_total_bytes; + int64_t newtb; + jl_gc_get_total_bytes(&newtb); + last_gc_total_bytes = newtb - offset; + return newtb - oldtb; +} + +JL_DLLEXPORT int64_t jl_gc_live_bytes(void) +{ + return live_bytes; +} + +JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem) +{ + if (max_mem > 0 && max_mem < (uint64_t)1 << (sizeof(memsize_t) * 8 - 1)) + max_total_memory = max_mem; +} + +// callback for passing OOM errors from gmp +JL_DLLEXPORT void jl_throw_out_of_memory_error(void) +{ + jl_throw(jl_memory_exception); +} + +// allocation wrappers that save the size of allocations, to allow using +// jl_gc_counted_* functions with a libc-compatible API. + +JL_DLLEXPORT void *jl_malloc(size_t sz) +{ + int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); + if (p == NULL) + return NULL; + p[0] = sz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +//_unchecked_calloc does not check for potential overflow of nm*sz +STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { + size_t nmsz = nm*sz; + int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); + if (p == NULL) + return NULL; + p[0] = nmsz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) +{ + if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) + return NULL; + return _unchecked_calloc(nm, sz); +} + +JL_DLLEXPORT void jl_free(void *p) +{ + if (p != NULL) { + int64_t *pp = (int64_t *)p - 2; + size_t sz = pp[0]; + jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); + } +} + +JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) +{ + int64_t *pp; + size_t szold; + if (p == NULL) { + pp = NULL; + szold = 0; + } + else { + pp = (int64_t *)p - 2; + szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; + } + int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); + if (pnew == NULL) + return NULL; + pnew[0] = sz; + return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +// allocating blocks for Arrays and Strings + +JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) +{ + jl_ptls_t ptls = jl_current_task->ptls; + maybe_collect(ptls); + size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT); + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); + int last_errno = errno; +#ifdef _OS_WINDOWS_ + DWORD last_error = GetLastError(); +#endif + void *b = malloc_cache_align(allocsz); + if (b == NULL) + jl_throw(jl_memory_exception); +#ifdef _OS_WINDOWS_ + SetLastError(last_error); +#endif + errno = last_errno; + // jl_gc_managed_malloc is currently always used for allocating array buffers. + maybe_record_alloc_to_profile((jl_value_t*)b, sz, (jl_datatype_t*)jl_buff_tag); + return b; +} + +void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz, + int isaligned, jl_value_t *owner, int8_t can_collect) +{ + if (can_collect) + maybe_collect(ptls); + + size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT); + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + + // TODO: not needed? gc_cache.*? + if (jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED) { + ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz; + live_bytes += allocsz - oldsz; + } + else if (allocsz < oldsz) + jl_atomic_store_relaxed(&ptls->gc_num.freed, + jl_atomic_load_relaxed(&ptls->gc_num.freed) + (oldsz - allocsz)); + else + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz)); + jl_atomic_store_relaxed(&ptls->gc_num.realloc, + jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); + + int last_errno = errno; +#ifdef _OS_WINDOWS_ + DWORD last_error = GetLastError(); +#endif + void *b; + if (isaligned) + b = realloc_cache_align(d, allocsz, oldsz); + else + b = realloc(d, allocsz); + if (b == NULL) + jl_throw(jl_memory_exception); +#ifdef _OS_WINDOWS_ + SetLastError(last_error); +#endif + errno = last_errno; + maybe_record_alloc_to_profile((jl_value_t*)b, sz, jl_gc_unknown_type_tag); + return b; +} + +JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, + int isaligned, jl_value_t *owner) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return gc_managed_realloc_(ptls, d, sz, oldsz, isaligned, owner, 1); +} + +// Perm gen allocator +// 2M pool +#define GC_PERM_POOL_SIZE (2 * 1024 * 1024) +// 20k limit for pool allocation. At most 1% fragmentation +#define GC_PERM_POOL_LIMIT (20 * 1024) +uv_mutex_t gc_perm_lock; +static uintptr_t gc_perm_pool = 0; +static uintptr_t gc_perm_end = 0; + +static void *gc_perm_alloc_large(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT +{ + // `align` must be power of two + assert(offset == 0 || offset < align); + const size_t malloc_align = sizeof(void*) == 8 ? 16 : 4; + if (align > 1 && (offset != 0 || align > malloc_align)) + sz += align - 1; + int last_errno = errno; +#ifdef _OS_WINDOWS_ + DWORD last_error = GetLastError(); +#endif + void *base = zero ? calloc(1, sz) : malloc(sz); + if (base == NULL) + jl_throw(jl_memory_exception); +#ifdef _OS_WINDOWS_ + SetLastError(last_error); +#endif + errno = last_errno; + jl_may_leak(base); + assert(align > 0); + unsigned diff = (offset - (uintptr_t)base) % align; + return (void*)((char*)base + diff); +} + +STATIC_INLINE void *gc_try_perm_alloc_pool(size_t sz, unsigned align, unsigned offset) JL_NOTSAFEPOINT +{ + uintptr_t pool = LLT_ALIGN(gc_perm_pool + offset, (uintptr_t)align) - offset; + uintptr_t end = pool + sz; + if (end > gc_perm_end) + return NULL; + gc_perm_pool = end; + return (void*)jl_assume(pool); +} + +// **NOT** a safepoint +void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) +{ + // The caller should have acquired `gc_perm_lock` + assert(align < GC_PERM_POOL_LIMIT); +#ifndef MEMDEBUG + if (__unlikely(sz > GC_PERM_POOL_LIMIT)) +#endif + return gc_perm_alloc_large(sz, zero, align, offset); + void *ptr = gc_try_perm_alloc_pool(sz, align, offset); + if (__likely(ptr)) + return ptr; + int last_errno = errno; +#ifdef _OS_WINDOWS_ + DWORD last_error = GetLastError(); + void *pool = VirtualAlloc(NULL, GC_PERM_POOL_SIZE, MEM_COMMIT, PAGE_READWRITE); + SetLastError(last_error); + errno = last_errno; + if (__unlikely(pool == NULL)) + return NULL; +#else + void *pool = mmap(0, GC_PERM_POOL_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + errno = last_errno; + if (__unlikely(pool == MAP_FAILED)) + return NULL; +#endif + gc_perm_pool = (uintptr_t)pool; + gc_perm_end = gc_perm_pool + GC_PERM_POOL_SIZE; + return gc_try_perm_alloc_pool(sz, align, offset); +} + +// **NOT** a safepoint +void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset) +{ + assert(align < GC_PERM_POOL_LIMIT); +#ifndef MEMDEBUG + if (__unlikely(sz > GC_PERM_POOL_LIMIT)) +#endif + return gc_perm_alloc_large(sz, zero, align, offset); + uv_mutex_lock(&gc_perm_lock); + void *p = jl_gc_perm_alloc_nolock(sz, zero, align, offset); + uv_mutex_unlock(&gc_perm_lock); + return p; +} + +JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) +{ + jl_ptls_t ptls = jl_current_task->ptls; + jl_gc_add_finalizer_th(ptls, v, f); +} + +JL_DLLEXPORT void jl_finalize(jl_value_t *o) +{ + jl_finalize_th(jl_current_task, o); +} + +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_new_weakref_th(ptls, value); +} + +JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, sz, NULL); +} + +JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, 0, NULL); +} + +JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, sizeof(void*), NULL); +} + +JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, sizeof(void*) * 2, NULL); +} + +JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, sizeof(void*) * 3, NULL); +} + +JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void) +{ + // TODO: meaningful for MMTk? + return GC_MAX_SZCLASS; +} + +JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) +{ + return sizeof(bigval_t); +} + + +JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc(ptls, sz, ty); +} + +JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) +{ + // TODO: correct for MMTk? + arraylist_push(&ptls->sweep_objs, obj); +} + + +// gc-debug common functions +// --- + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = m->usings.len; + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + else if (vt->name == jl_array_typename) { + jl_array_t *a = (jl_array_t*)obj; + start = (char*)a->data; + len = jl_array_len(a); + elsize = a->elsize; + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + +static int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +#ifdef __cplusplus +} +#endif diff --git a/src/gc-debug.c b/src/gc-debug.c index a233b18d7dcfc..c5ab21a3fb3c1 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1,5 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license +#ifndef MMTK_GC + #include "gc.h" #include #include @@ -1231,43 +1233,6 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT -{ - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; -} - -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT -{ - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - else if (vt->name == jl_array_typename) { - jl_array_t *a = (jl_array_t*)obj; - start = (char*)a->data; - len = jl_array_len(a); - elsize = a->elsize; - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; -} - // Print a backtrace from the `mq->start` of the mark queue up to `mq->current` // `offset` will be added to `mq->current` for convenience in the debugger. NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_markqueue_t *mq, int offset) @@ -1292,12 +1257,6 @@ NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_markqueue_t *mq, int off jl_set_safe_restore(old_buf); } -static int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; -} - void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { return; @@ -1312,3 +1271,5 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect #ifdef __cplusplus } #endif + +#endif // !MMTK_GC diff --git a/src/gc-pages.c b/src/gc-pages.c index d579eb0cd4fbb..e367334450863 100644 --- a/src/gc-pages.c +++ b/src/gc-pages.c @@ -1,5 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license +#ifndef MMTK_GC + #include "gc.h" #ifndef _OS_WINDOWS_ # include @@ -335,3 +337,5 @@ void jl_gc_free_page(void *p) JL_NOTSAFEPOINT #ifdef __cplusplus } #endif + +#endif // !MMTK_GC diff --git a/src/gc.c b/src/gc.c index cab7c37369450..e656fa331be38 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1,5 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license +#ifndef MMTK_GC + #include "gc.h" #include "julia_gcext.h" #include "julia_assert.h" @@ -7,10 +9,6 @@ #include // for malloc_trim #endif -#ifdef MMTKHEAP -#include "mmtk_julia.h" -#endif - #ifdef __cplusplus extern "C" { #endif @@ -123,9 +121,6 @@ JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_fre static jl_mutex_t finalizers_lock; static uv_mutex_t gc_cache_lock; -// mutex for gc-heap-snapshot. -jl_mutex_t heapsnapshot_lock; - // Flag that tells us whether we need to support conservative marking // of objects. static _Atomic(int) support_conservative_marking = 0; @@ -162,16 +157,6 @@ static _Atomic(int) support_conservative_marking = 0; * finalizers in unmanaged (GC safe) mode. */ -jl_gc_num_t gc_num = {0}; -static size_t last_long_collect_interval; -int gc_n_threads; -jl_ptls_t* gc_all_tls_states; -const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 -JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) -{ - return jl_buff_tag; -} - pagetable_t memory_map; // List of marked big objects. Not per-thread. Accessed only by master thread. @@ -185,7 +170,6 @@ bigval_t *big_objects_marked = NULL; // `to_finalize` should not have tagged pointers. arraylist_t finalizer_list_marked; arraylist_t to_finalize; -JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0; NOINLINE uintptr_t gc_get_stack_ptr(void) { @@ -215,31 +199,26 @@ void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads) } -void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads); - // malloc wrappers, aligned allocation #if defined(_OS_WINDOWS_) -STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) +inline void *jl_malloc_aligned(size_t sz, size_t align) { return _aligned_malloc(sz ? sz : 1, align); } -STATIC_INLINE void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz, +inline void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz, size_t align) { (void)oldsz; return _aligned_realloc(p, sz ? sz : 1, align); } -STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT +inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT { _aligned_free(p); } #else -STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) +inline void *jl_malloc_aligned(size_t sz, size_t align) { -#ifdef MMTKHEAP - return mmtk_malloc_aligned(sz, align); -#endif #if defined(_P64) || defined(__APPLE__) if (align <= 16) return malloc(sz); @@ -249,17 +228,9 @@ STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) return NULL; return ptr; } -STATIC_INLINE void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, +inline void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, size_t align) { -#ifdef MMTKHEAP - void *res = jl_malloc_aligned(sz, align); - if (res != NULL) { - memcpy(res, d, oldsz > sz ? sz : oldsz); - mmtk_free_aligned(d); - } - return res; -#endif #if defined(_P64) || defined(__APPLE__) if (align <= 16) return realloc(d, sz); @@ -271,17 +242,11 @@ STATIC_INLINE void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, } return b; } -STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT +inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT { -#ifdef MMTKHEAP - mmtk_free_aligned(p); -#else free(p); -#endif } #endif -#define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT) -#define realloc_cache_align(p, sz, oldsz) jl_realloc_aligned(p, sz, oldsz, JL_CACHE_BYTE_ALIGNMENT) static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT { @@ -292,31 +257,6 @@ static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1); } -#ifndef MMTKHEAP -static -#endif -void run_finalizer(jl_task_t *ct, void *o, void *ff) -{ - int ptr_finalizer = gc_ptr_tag(o, 1); - o = gc_ptr_clear_tag(o, 3); - if (ptr_finalizer) { - ((void (*)(void*))ff)((void*)o); - return; - } - JL_TRY { - size_t last_age = ct->world_age; - ct->world_age = jl_atomic_load_acquire(&jl_world_counter); - jl_apply_generic((jl_value_t*)ff, (jl_value_t**)&o, 1); - ct->world_age = last_age; - } - JL_CATCH { - jl_printf((JL_STREAM*)STDERR_FILENO, "error in running finalizer: "); - jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception()); - jl_printf((JL_STREAM*)STDERR_FILENO, "\n"); - jlbacktrace(); // written to STDERR_FILENO - } -} - // if `need_sync` is true, the `list` is the `finalizers` list of another // thread and we need additional synchronizations static void finalize_object(arraylist_t *list, jl_value_t *o, @@ -404,24 +344,8 @@ static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NO ct->sticky = sticky; } -#ifndef MMTKHEAP -static -#endif -uint64_t finalizer_rngState[4]; - -void jl_rng_split(uint64_t to[4], uint64_t from[4]) JL_NOTSAFEPOINT; - -JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void) -{ - jl_rng_split(finalizer_rngState, jl_current_task->rngState); -} - static void run_finalizers(jl_task_t *ct) { -#ifdef MMTKHEAP - mmtk_jl_run_finalizers(ct->ptls); - return; -#endif // Racy fast path: // The race here should be OK since the race can only happen if // another thread is writing to it with the lock held. In such case, @@ -460,67 +384,12 @@ JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct) { if (ct == NULL) ct = jl_current_task; -#ifdef MMTKHEAP - mmtk_jl_run_pending_finalizers(ct->ptls); - return; -#endif jl_ptls_t ptls = ct->ptls; if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) { run_finalizers(ct); } } -JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls) -{ - if (ptls == NULL) - ptls = jl_current_task->ptls; - return ptls->finalizers_inhibited; -} - -JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - ptls->finalizers_inhibited++; -} - -JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void) -{ - jl_task_t *ct = jl_current_task; -#ifdef NDEBUG - ct->ptls->finalizers_inhibited--; -#else - jl_gc_enable_finalizers(ct, 1); -#endif -} - -JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on) -{ - if (ct == NULL) - ct = jl_current_task; - jl_ptls_t ptls = ct->ptls; - int old_val = ptls->finalizers_inhibited; - int new_val = old_val + (on ? -1 : 1); - if (new_val < 0) { - JL_TRY { - jl_error(""); // get a backtrace - } - JL_CATCH { - jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n"); - // Only print the backtrace once, to avoid spamming the logs - static int backtrace_printed = 0; - if (backtrace_printed == 0) { - backtrace_printed = 1; - jlbacktrace(); // written to STDERR_FILENO - } - } - return; - } - ptls->finalizers_inhibited = new_val; - if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers)) { - jl_gc_run_pending_finalizers(ct); - } -} - static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT { void **items = flist->items; @@ -537,6 +406,7 @@ static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT void jl_gc_run_all_finalizers(jl_task_t *ct) { + if (!ct) return; int gc_n_threads; jl_ptls_t* gc_all_tls_states; gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); @@ -554,10 +424,6 @@ void jl_gc_run_all_finalizers(jl_task_t *ct) void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT { -#ifdef MMTKHEAP - register_finalizer(v, f, 0); - return; -#endif assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); arraylist_t *a = &ptls->finalizers; // This acquire load and the release store at the end are used to @@ -586,20 +452,14 @@ void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT { -#ifndef MMTKHEAP jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f); -#else - register_finalizer(v, f, 1); -#endif } // schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads) JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT { -#ifndef MMTKHEAP assert(!gc_ptr_tag(v, 3)); jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f); -#endif } JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT @@ -614,10 +474,6 @@ JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_funct JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o) { -#ifdef MMTKHEAP - run_finalizers_for_obj(o); - return; -#endif JL_LOCK_NOGC(&finalizers_lock); // Copy the finalizers into a temporary list so that code in the finalizer // won't change the list as we loop through them. @@ -677,28 +533,6 @@ static void gc_sweep_foreign_objs(void) } } -// GC knobs and self-measurement variables -static int64_t last_gc_total_bytes = 0; - -// max_total_memory is a suggestion. We try very hard to stay -// under this limit, but we will go above it rather than halting. -#ifdef _P64 -typedef uint64_t memsize_t; -static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*); -static const size_t max_collect_interval = 1250000000UL; -static size_t total_mem; -// We expose this to the user/ci as jl_gc_set_max_memory -static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024; -#else -typedef uint32_t memsize_t; -static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*); -static const size_t max_collect_interval = 500000000UL; -// Work really hard to stay within 2GB -// Alternative is to risk running out of address space -// on 32 bit architectures. -static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024; -#endif - // global variables for GC stats // Resetting the object to a young object, this is used when marking the @@ -761,7 +595,7 @@ int prev_sweep_full = 1; #define inc_sat(v,s) v = (v) >= s ? s : (v)+1 // Full collection heuristics -static int64_t live_bytes = 0; +extern int64_t live_bytes; static int64_t promoted_bytes = 0; static int64_t last_live_bytes = 0; // live_bytes at last collection static int64_t t_start = 0; // Time GC starts; @@ -977,18 +811,14 @@ void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v) JL_NOTSAFEPOINT jl_gc_queue_root(v); } -STATIC_INLINE void maybe_collect(jl_ptls_t ptls) +inline void maybe_collect(jl_ptls_t ptls) { -#ifndef MMTKHEAP if (jl_atomic_load_relaxed(&ptls->gc_num.allocd) >= 0 || jl_gc_debug_check_other()) { jl_gc_collect(JL_GC_AUTO); } else { jl_gc_safepoint_(ptls); } -#else - mmtk_gc_poll(ptls); -#endif } // weak references @@ -999,11 +829,7 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); wr->value = value; // NOTE: wb not needed here -#ifdef MMTKHEAP - mmtk_add_weak_candidate(wr); -#else arraylist_push(&ptls->heap.weak_refs, wr); -#endif return wr; } @@ -1057,7 +883,7 @@ static void sweep_weak_refs(void) // big value list // Size includes the tag and the tag is not cleared!! -STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) +inline jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) { maybe_collect(ptls); size_t offs = offsetof(bigval_t, header); @@ -1085,21 +911,6 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) return jl_valueof(&v->header); } -// Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code. -JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz) -{ - jl_value_t *val = jl_gc_big_alloc_inner(ptls, sz); - maybe_record_alloc_to_profile(val, sz, jl_gc_unknown_type_tag); - return val; -} - -// This wrapper exists only to prevent `jl_gc_big_alloc_inner` from being inlined into -// its callers. We provide an external-facing interface for callers, and inline `jl_gc_big_alloc_inner` -// into this. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) -jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t sz) { - return jl_gc_big_alloc_inner(ptls, sz); -} - // Sweep list rooted at *pv, removing and freeing any unmarked objects. // Return pointer to last `next` field in the culled list. static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT @@ -1166,108 +977,14 @@ static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT gc_time_big_end(); } -// tracking Arrays with malloc'd storage - -void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT -{ - // This is **NOT** a GC safe point. - mallocarray_t *ma; - if (ptls->heap.mafreelist == NULL) { - ma = (mallocarray_t*)malloc_s(sizeof(mallocarray_t)); - } - else { - ma = ptls->heap.mafreelist; - ptls->heap.mafreelist = ma->next; - } - ma->a = a; - ma->next = ptls->heap.mallocarrays; - ptls->heap.mallocarrays = ma; -} - -void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT -{ - jl_ptls_t ptls = jl_current_task->ptls; - jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); -} - -static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT -{ - int gc_n_threads; - jl_ptls_t* gc_all_tls_states; - gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); - gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); - for (int i = 0; i < gc_n_threads; i++) { - jl_ptls_t ptls = gc_all_tls_states[i]; - if (ptls) { - dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval); - dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.freed); - dest->malloc += jl_atomic_load_relaxed(&ptls->gc_num.malloc); - dest->realloc += jl_atomic_load_relaxed(&ptls->gc_num.realloc); - dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_num.poolalloc); - dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc); - dest->freecall += jl_atomic_load_relaxed(&ptls->gc_num.freecall); - } - } -} - -static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT -{ - int gc_n_threads; - jl_ptls_t* gc_all_tls_states; - gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); - gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); - for (int i = 0; i < gc_n_threads; i++) { - jl_ptls_t ptls = gc_all_tls_states[i]; - if (ptls != NULL) { - memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); - jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); - } - } -} - -void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT -{ - combine_thread_gc_counts(&gc_num); - live_bytes += (gc_num.deferred_alloc + gc_num.allocd); - gc_num.allocd = 0; - gc_num.deferred_alloc = 0; - reset_thread_gc_counts(); -} - -size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT -{ - size_t sz = 0; - int isbitsunion = jl_array_isbitsunion(a); - if (jl_array_ndims(a) == 1) - sz = a->elsize * a->maxsize + ((a->elsize == 1 && !isbitsunion) ? 1 : 0); - else - sz = a->elsize * jl_array_len(a); - if (isbitsunion) - // account for isbits Union array selector bytes - sz += jl_array_len(a); - return sz; -} - -#ifndef MMTKHEAP -static -#endif -void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT +static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT { if (a->flags.how == 2) { char *d = (char*)a->data - a->offset*a->elsize; -#ifndef MMTKHEAP if (a->flags.isaligned) jl_free_aligned(d); else free(d); -#else - if (a->flags.isaligned) - mmtk_free_aligned(d); - else { - mmtk_free(d); - } -#endif gc_num.freed += jl_array_nbytes(a); gc_num.freecall++; } @@ -1351,7 +1068,7 @@ static NOINLINE jl_taggedvalue_t *add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT } // Size includes the tag and the tag is not cleared!! -STATIC_INLINE jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, +inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int osize) { // Use the pool offset instead of the pool address as the argument @@ -1409,32 +1126,6 @@ STATIC_INLINE jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset return jl_valueof(v); } -// Instrumented version of jl_gc_pool_alloc_inner, called into by LLVM-generated code. -JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset, - int osize) -{ - jl_value_t *val = jl_gc_pool_alloc_inner(ptls, pool_offset, osize); - maybe_record_alloc_to_profile(val, osize, jl_gc_unknown_type_tag); - return val; -} - -// This wrapper exists only to prevent `jl_gc_pool_alloc_inner` from being inlined into -// its callers. We provide an external-facing interface for callers, and inline `jl_gc_pool_alloc_inner` -// into this. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) -jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize) { - return jl_gc_pool_alloc_inner(ptls, pool_offset, osize); -} - -int jl_gc_classify_pools(size_t sz, int *osize) -{ - if (sz > GC_MAX_SZCLASS) - return -1; - size_t allocsz = sz + sizeof(jl_taggedvalue_t); - int klass = jl_gc_szclass(allocsz); - *osize = jl_gc_sizeclasses[klass]; - return (int)(intptr_t)(&((jl_ptls_t)0)->heap.norm_pools[klass]); -} - // sweep phase int64_t lazy_freed_pages = 0; @@ -1743,7 +1434,6 @@ static void gc_sweep_perm_alloc(void) JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr) { -#ifndef MMTKHEAP jl_ptls_t ptls = jl_current_task->ptls; jl_taggedvalue_t *o = jl_astaggedvalue(ptr); // The modification of the `gc_bits` is not atomic but it @@ -1753,7 +1443,6 @@ JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr) o->bits.gc = GC_MARKED; arraylist_push(ptls->heap.remset, (jl_value_t*)ptr); ptls->heap.remset_nptr++; // conservative -#endif } void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT @@ -2639,27 +2328,6 @@ JL_EXTENSION NOINLINE void gc_mark_loop(jl_ptls_t ptls) gc_drain_own_chunkqueue(ptls, &ptls->mark_queue); } -#ifndef MMTKHEAP -static -#endif -void gc_premark(jl_ptls_t ptls2) -{ - arraylist_t *remset = ptls2->heap.remset; - ptls2->heap.remset = ptls2->heap.last_remset; - ptls2->heap.last_remset = remset; - ptls2->heap.remset->len = 0; - ptls2->heap.remset_nptr = 0; - // avoid counting remembered objects - // in `perm_scanned_bytes` - size_t len = remset->len; - void **items = remset->items; - for (size_t i = 0; i < len; i++) { - jl_value_t *item = (jl_value_t *)items[i]; - objprofile_count(jl_typeof(item), 2, 0); - jl_astaggedvalue(item)->bits.gc = GC_OLD_MARKED; - } -} - static void gc_queue_thread_local(jl_gc_markqueue_t *mq, jl_ptls_t ptls2) { jl_task_t *task; @@ -2797,93 +2465,6 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } -// collector entry point and control -static _Atomic(uint32_t) jl_gc_disable_counter = 1; - -JL_DLLEXPORT int jl_gc_enable(int on) -{ - jl_ptls_t ptls = jl_current_task->ptls; - int prev = !ptls->disable_gc; - ptls->disable_gc = (on == 0); - if (on && !prev) { - // disable -> enable - if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { - gc_num.allocd += gc_num.deferred_alloc; - gc_num.deferred_alloc = 0; -#ifdef MMTKHEAP - enable_collection(); -#endif - } - } - else if (prev && !on) { -#ifdef MMTKHEAP - disable_collection(); -#endif - // enable -> disable - jl_atomic_fetch_add(&jl_gc_disable_counter, 1); - // check if the GC is running and wait for it to finish - jl_gc_safepoint_(ptls); - } - return prev; -} - -JL_DLLEXPORT int jl_gc_is_enabled(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; -} - -JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT -{ - jl_gc_num_t num = gc_num; - combine_thread_gc_counts(&num); - // Sync this logic with `base/util.jl:GC_Diff` - *bytes = (num.total_allocd + num.deferred_alloc + num.allocd); -} - -JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) -{ - return gc_num.total_time; -} - -JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) -{ - jl_gc_num_t num = gc_num; - combine_thread_gc_counts(&num); - return num; -} - -JL_DLLEXPORT void jl_gc_reset_stats(void) -{ - gc_num.max_pause = 0; - gc_num.max_memory = 0; - gc_num.max_time_to_safepoint = 0; -} - -// TODO: these were supposed to be thread local -JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT -{ - int64_t oldtb = last_gc_total_bytes; - int64_t newtb; - jl_gc_get_total_bytes(&newtb); - last_gc_total_bytes = newtb; - return newtb - oldtb; -} - -JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT -{ - int64_t oldtb = last_gc_total_bytes; - int64_t newtb; - jl_gc_get_total_bytes(&newtb); - last_gc_total_bytes = newtb - offset; - return newtb - oldtb; -} - -JL_DLLEXPORT int64_t jl_gc_live_bytes(void) -{ - return live_bytes; -} - size_t jl_maxrss(void); // Only one thread should be running in this function @@ -3165,10 +2746,6 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; } -#ifdef MMTKHEAP - handle_user_collection_request(ptls); - return; -#endif jl_gc_debug_print(); int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state); @@ -3258,11 +2835,6 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq) // allocator entry points -JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) -{ - return jl_gc_alloc_(ptls, sz, ty); -} - // Per-thread initialization void jl_init_thread_heap(jl_ptls_t ptls) { @@ -3302,10 +2874,6 @@ void jl_init_thread_heap(jl_ptls_t ptls) memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); -#ifdef MMTKHEAP - MMTk_Mutator mmtk_mutator = bind_mutator((void *)ptls, ptls->tid); - ptls->mmtk_mutator_ptr = ((MMTkMutatorContext*)mmtk_mutator); -#endif } // System-wide initializations @@ -3344,67 +2912,9 @@ void jl_gc_init(void) if (high_water_mark < max_total_memory) max_total_memory = high_water_mark; - -#ifdef MMTKHEAP - long long min_heap_size; - long long max_heap_size; - char* min_size_def = getenv("MMTK_MIN_HSIZE"); - char* min_size_gb = getenv("MMTK_MIN_HSIZE_G"); - - char* max_size_def = getenv("MMTK_MAX_HSIZE"); - char* max_size_gb = getenv("MMTK_MAX_HSIZE_G"); - - // default min heap currently set as Julia's default_collect_interval - if (min_size_def != NULL) { - char *p; - double min_size = strtod(min_size_def, &p); - min_heap_size = (long) 1024 * 1024 * min_size; - } else if (min_size_gb != NULL) { - char *p; - double min_size = strtod(min_size_gb, &p); - min_heap_size = (long) 1024 * 1024 * 1024 * min_size; - } else { - min_heap_size = default_collect_interval; - } - - // default max heap currently set as 70% the free memory in the system - if (max_size_def != NULL) { - char *p; - double max_size = strtod(max_size_def, &p); - max_heap_size = (long) 1024 * 1024 * max_size; - } else if (max_size_gb != NULL) { - char *p; - double max_size = strtod(max_size_gb, &p); - max_heap_size = (long) 1024 * 1024 * 1024 * max_size; - } else { - max_heap_size = uv_get_free_memory() * 70 / 100; - } - - // if only max size is specified initialize MMTk with a fixed size heap - if (max_size_def != NULL || max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL)) { - gc_init(0, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); - } else { - gc_init(min_heap_size, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); - } - -#endif t_start = jl_hrtime(); } -JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem) -{ - if (max_mem > 0 - && max_mem < (uint64_t)1 << (sizeof(memsize_t) * 8 - 1)) { - max_total_memory = max_mem; - } -} - -// callback for passing OOM errors from gmp -JL_DLLEXPORT void jl_throw_out_of_memory_error(void) -{ - jl_throw(jl_memory_exception); -} - // allocation wrappers that track allocation and let collection run JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) @@ -3418,9 +2928,6 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); -#ifdef MMTKHEAP - return mmtk_counted_malloc(sz); -#endif } return malloc(sz); } @@ -3436,9 +2943,6 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz); jl_atomic_store_relaxed(&ptls->gc_num.malloc, jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); -#ifdef MMTKHEAP - return mmtk_counted_calloc(nm, sz); -#endif } return calloc(nm, sz); } @@ -3447,18 +2951,14 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) { jl_gcframe_t **pgcstack = jl_get_pgcstack(); jl_task_t *ct = jl_current_task; + free(p); if (pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; jl_atomic_store_relaxed(&ptls->gc_num.freed, jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz); jl_atomic_store_relaxed(&ptls->gc_num.freecall, jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1); -#ifdef MMTKHEAP - mmtk_free_with_size(p, sz); - return; -#endif } - free(p); } JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz) @@ -3476,151 +2976,12 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old)); jl_atomic_store_relaxed(&ptls->gc_num.realloc, jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); -#ifdef MMTKHEAP - return mmtk_realloc_with_old_size(p, sz, old); -#endif } return realloc(p, sz); } -// allocation wrappers that save the size of allocations, to allow using -// jl_gc_counted_* functions with a libc-compatible API. - -JL_DLLEXPORT void *jl_malloc(size_t sz) -{ - int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); - if (p == NULL) - return NULL; - p[0] = sz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -//_unchecked_calloc does not check for potential overflow of nm*sz -STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { - size_t nmsz = nm*sz; - int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); - if (p == NULL) - return NULL; - p[0] = nmsz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) -{ - if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) - return NULL; - return _unchecked_calloc(nm, sz); -} - -JL_DLLEXPORT void jl_free(void *p) -{ - if (p != NULL) { - int64_t *pp = (int64_t *)p - 2; - size_t sz = pp[0]; - jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); - } -} - -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) -{ - int64_t *pp; - size_t szold; - if (p == NULL) { - pp = NULL; - szold = 0; - } - else { - pp = (int64_t *)p - 2; - szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; - } - int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); - if (pnew == NULL) - return NULL; - pnew[0] = sz; - return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -// allocating blocks for Arrays and Strings - -JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) -{ - jl_ptls_t ptls = jl_current_task->ptls; - maybe_collect(ptls); - size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT); - if (allocsz < sz) // overflow in adding offs, size was "negative" - jl_throw(jl_memory_exception); - jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); - int last_errno = errno; -#ifdef _OS_WINDOWS_ - DWORD last_error = GetLastError(); -#endif - void *b = malloc_cache_align(allocsz); - if (b == NULL) - jl_throw(jl_memory_exception); -#ifdef _OS_WINDOWS_ - SetLastError(last_error); -#endif - errno = last_errno; - // jl_gc_managed_malloc is currently always used for allocating array buffers. - maybe_record_alloc_to_profile((jl_value_t*)b, sz, (jl_datatype_t*)jl_buff_tag); - return b; -} - -static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz, - int isaligned, jl_value_t *owner, int8_t can_collect) -{ - if (can_collect) - maybe_collect(ptls); - - size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT); - if (allocsz < sz) // overflow in adding offs, size was "negative" - jl_throw(jl_memory_exception); - - if (jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED) { - ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz; - live_bytes += allocsz - oldsz; - } - else if (allocsz < oldsz) - jl_atomic_store_relaxed(&ptls->gc_num.freed, - jl_atomic_load_relaxed(&ptls->gc_num.freed) + (oldsz - allocsz)); - else - jl_atomic_store_relaxed(&ptls->gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz)); - jl_atomic_store_relaxed(&ptls->gc_num.realloc, - jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); - - int last_errno = errno; -#ifdef _OS_WINDOWS_ - DWORD last_error = GetLastError(); -#endif - void *b; - if (isaligned) - b = realloc_cache_align(d, allocsz, oldsz); - else - b = realloc(d, allocsz); - if (b == NULL) - jl_throw(jl_memory_exception); -#ifdef _OS_WINDOWS_ - SetLastError(last_error); -#endif - errno = last_errno; - maybe_record_alloc_to_profile((jl_value_t*)b, sz, jl_gc_unknown_type_tag); - return b; -} - -JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, - int isaligned, jl_value_t *owner) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return gc_managed_realloc_(ptls, d, sz, oldsz, isaligned, owner, 1); -} - jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz) { -#ifndef MMTKHEAP size_t len = jl_string_len(s); if (sz <= len) return s; jl_taggedvalue_t *v = jl_astaggedvalue(s); @@ -3654,148 +3015,6 @@ jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz) jl_value_t *snew = jl_valueof(&newbig->header); *(size_t*)snew = sz; return snew; -#else - size_t len = jl_string_len(s); - jl_value_t *snew = jl_alloc_string(sz); - memcpy(jl_string_data(snew), jl_string_data(s), sz <= len ? sz : len); - return snew; -#endif -} - -// Perm gen allocator -// 2M pool -#define GC_PERM_POOL_SIZE (2 * 1024 * 1024) -// 20k limit for pool allocation. At most 1% fragmentation -#define GC_PERM_POOL_LIMIT (20 * 1024) -uv_mutex_t gc_perm_lock; -static uintptr_t gc_perm_pool = 0; -static uintptr_t gc_perm_end = 0; - -static void *gc_perm_alloc_large(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT -{ - // `align` must be power of two - assert(offset == 0 || offset < align); - const size_t malloc_align = sizeof(void*) == 8 ? 16 : 4; - if (align > 1 && (offset != 0 || align > malloc_align)) - sz += align - 1; - int last_errno = errno; -#ifdef _OS_WINDOWS_ - DWORD last_error = GetLastError(); -#endif - void *base = zero ? calloc(1, sz) : malloc(sz); - if (base == NULL) - jl_throw(jl_memory_exception); -#ifdef _OS_WINDOWS_ - SetLastError(last_error); -#endif - errno = last_errno; - jl_may_leak(base); - assert(align > 0); - unsigned diff = (offset - (uintptr_t)base) % align; - return (void*)((char*)base + diff); -} - -STATIC_INLINE void *gc_try_perm_alloc_pool(size_t sz, unsigned align, unsigned offset) JL_NOTSAFEPOINT -{ - uintptr_t pool = LLT_ALIGN(gc_perm_pool + offset, (uintptr_t)align) - offset; - uintptr_t end = pool + sz; - if (end > gc_perm_end) - return NULL; - gc_perm_pool = end; - return (void*)jl_assume(pool); -} - -// **NOT** a safepoint -void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) -{ - // The caller should have acquired `gc_perm_lock` - assert(align < GC_PERM_POOL_LIMIT); -#ifndef MEMDEBUG - if (__unlikely(sz > GC_PERM_POOL_LIMIT)) -#endif - return gc_perm_alloc_large(sz, zero, align, offset); - void *ptr = gc_try_perm_alloc_pool(sz, align, offset); - if (__likely(ptr)) - return ptr; - int last_errno = errno; -#ifdef _OS_WINDOWS_ - DWORD last_error = GetLastError(); - void *pool = VirtualAlloc(NULL, GC_PERM_POOL_SIZE, MEM_COMMIT, PAGE_READWRITE); - SetLastError(last_error); - errno = last_errno; - if (__unlikely(pool == NULL)) - return NULL; -#else - void *pool = mmap(0, GC_PERM_POOL_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - errno = last_errno; - if (__unlikely(pool == MAP_FAILED)) - return NULL; -#endif - gc_perm_pool = (uintptr_t)pool; - gc_perm_end = gc_perm_pool + GC_PERM_POOL_SIZE; - return gc_try_perm_alloc_pool(sz, align, offset); -} - -// **NOT** a safepoint -void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset) -{ - assert(align < GC_PERM_POOL_LIMIT); -#ifndef MEMDEBUG - if (__unlikely(sz > GC_PERM_POOL_LIMIT)) -#endif - return gc_perm_alloc_large(sz, zero, align, offset); - uv_mutex_lock(&gc_perm_lock); - void *p = jl_gc_perm_alloc_nolock(sz, zero, align, offset); - uv_mutex_unlock(&gc_perm_lock); - return p; -} - -JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) -{ - jl_ptls_t ptls = jl_current_task->ptls; - jl_gc_add_finalizer_th(ptls, v, f); -} - -JL_DLLEXPORT void jl_finalize(jl_value_t *o) -{ - jl_finalize_th(jl_current_task, o); -} - -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_new_weakref_th(ptls, value); -} - -JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, sz, NULL); -} - -JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, 0, NULL); -} - -JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, sizeof(void*), NULL); -} - -JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, sizeof(void*) * 2, NULL); -} - -JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, sizeof(void*) * 3, NULL); } JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) @@ -3915,27 +3134,16 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) return NULL; } -JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void) -{ - return GC_MAX_SZCLASS; -} - -JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) -{ - return sizeof(bigval_t); -} - - -JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +// added for MMTk integration +void enable_collection(void) { - return jl_gc_alloc(ptls, sz, ty); } - -JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) +void disable_collection(void) { - arraylist_push(&ptls->sweep_objs, obj); } #ifdef __cplusplus } #endif + +#endif // !MMTK_GC diff --git a/src/gc.h b/src/gc.h index 930f7f3c30594..1db0211eb6c68 100644 --- a/src/gc.h +++ b/src/gc.h @@ -4,6 +4,7 @@ allocation and garbage collection . non-moving, precise mark and sweep collector . pool-allocates small objects, keeps big objects on a simple list + MMTk alternative */ #ifndef JL_GC_H @@ -27,36 +28,48 @@ #include "gc-heap-snapshot.h" #include "gc-alloc-profiler.h" -#ifdef __cplusplus -extern "C" { +// interface from and to gc-common.c +extern void maybe_collect(jl_ptls_t ptls); +extern void run_finalizer(jl_task_t *ct, void *o, void *ff); +extern void *jl_malloc_aligned(size_t sz, size_t align); +extern void *jl_gc_counted_calloc(size_t nm, size_t sz); +extern void jl_gc_counted_free_with_size(void *p, size_t sz); +extern void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz); +extern void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, size_t align); +extern void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f); +extern void jl_finalize_th(jl_task_t *ct, jl_value_t *o); +extern jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value); +extern jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz); +extern jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int osize); +extern void jl_rng_split(uint64_t to[4], uint64_t from[4]); +extern void gc_premark(jl_ptls_t ptls2); +extern void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz, + int isaligned, jl_value_t *owner, int8_t can_collect); +extern size_t jl_array_nbytes(jl_array_t *a); +extern void objprofile_count(void *ty, int old, int sz); + +#define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT) +#define realloc_cache_align(p, sz, oldsz) jl_realloc_aligned(p, sz, oldsz, JL_CACHE_BYTE_ALIGNMENT) + +// common types and globals +#ifdef _P64 +typedef uint64_t memsize_t; +#else +typedef uint32_t memsize_t; #endif -#define GC_PAGE_LG2 14 // log2(size of a page) -#define GC_PAGE_SZ (1 << GC_PAGE_LG2) // 16k -#define GC_PAGE_OFFSET (JL_HEAP_ALIGNMENT - (sizeof(jl_taggedvalue_t) % JL_HEAP_ALIGNMENT)) - -#define jl_malloc_tag ((void*)0xdeadaa01) -#define jl_singleton_tag ((void*)0xdeadaa02) - -// Used by GC_DEBUG_ENV -typedef struct { - uint64_t num; - uint64_t next; - uint64_t min; - uint64_t interv; - uint64_t max; - unsigned short random[3]; -} jl_alloc_num_t; - -typedef struct { - int always_full; - int wait_for_debugger; - jl_alloc_num_t pool; - jl_alloc_num_t other; - jl_alloc_num_t print; -} jl_gc_debug_env_t; +extern const size_t default_collect_interval; +extern const size_t max_collect_interval; +extern size_t last_long_collect_interval; +extern size_t total_mem; +extern memsize_t max_total_memory; +extern _Atomic(uint32_t) jl_gc_disable_counter; +extern jl_mutex_t heapsnapshot_lock; +extern uint64_t finalizer_rngState[]; +extern int gc_n_threads; +extern jl_ptls_t* gc_all_tls_states; -// This struct must be kept in sync with the Julia type of the same name in base/timing.jl +// keep in sync with the Julia type of the same name in base/timing.jl typedef struct { int64_t allocd; int64_t deferred_alloc; @@ -82,29 +95,18 @@ typedef struct { uint64_t total_mark_time; } jl_gc_num_t; -typedef enum { - GC_empty_chunk, - GC_objary_chunk, - GC_ary8_chunk, - GC_ary16_chunk, - GC_finlist_chunk, -} gc_chunk_id_t; +extern jl_gc_num_t gc_num; -typedef struct _jl_gc_chunk_t { - gc_chunk_id_t cid; - struct _jl_value_t *parent; - struct _jl_value_t **begin; - struct _jl_value_t **end; - void *elem_begin; - void *elem_end; - uint32_t step; - uintptr_t nptr; -} jl_gc_chunk_t; +// data structure for tracking malloc'd arrays. +typedef struct _mallocarray_t { + jl_array_t *a; + struct _mallocarray_t *next; +} mallocarray_t; -#define MAX_REFS_AT_ONCE (1 << 16) +extern void combine_thread_gc_counts(jl_gc_num_t *dest); +extern void reset_thread_gc_counts(void); // layout for big (>2k) objects - JL_EXTENSION typedef struct _bigval_t { struct _bigval_t *next; struct _bigval_t **prev; // pointer to the next field of the prev entry @@ -129,12 +131,111 @@ JL_EXTENSION typedef struct _bigval_t { // must be 64-byte aligned here, in 32 & 64 bit modes } bigval_t; -// data structure for tracking malloc'd arrays. +STATIC_INLINE uintptr_t gc_ptr_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT +{ + return ((uintptr_t)v) & mask; +} -typedef struct _mallocarray_t { - jl_array_t *a; - struct _mallocarray_t *next; -} mallocarray_t; +STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT +{ + return (void*)(((uintptr_t)v) & ~mask); +} + +STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT +{ + return (bits & GC_MARKED) != 0; +} + +#ifdef GC_VERIFY +#error "GC_VERIFY is unsupported with MMTk" +#endif + +#ifdef MEMFENCE +#error "MEMFENCE is unsupported with MMTk" +#endif + +#ifdef GC_DEBUG_ENV +#error "GC_DEBUG_ENV is unsupported with MMTk" +#endif + +#ifdef GC_FINAL_STATS +#error "GC_FINAL_STATS is currently unsupported with MMTk" +#endif + +#ifdef GC_TIME +#error "GC_TIME is currently unsupported with MMTk" +#endif + +#ifdef MEMPROFILE +#error "MEMPROFILE is not supported with MMTk" +#endif + +#ifdef OBJPROFILE +#ifdef MMTK_GC +#warning "OBJPROFILE is unsupported with MMTk; disabling" +#undef OBJPROFILE +#endif +#endif + + +#ifdef MMTK_GC +#include "mmtk.h" + +typedef struct { + char c; +} jl_gc_pagemeta_t; + +#else // !MMTK_GC + +#ifdef __cplusplus +extern "C" { +#endif + +#define GC_PAGE_LG2 14 // log2(size of a page) +#define GC_PAGE_SZ (1 << GC_PAGE_LG2) // 16k +#define GC_PAGE_OFFSET (JL_HEAP_ALIGNMENT - (sizeof(jl_taggedvalue_t) % JL_HEAP_ALIGNMENT)) + +#define jl_malloc_tag ((void*)0xdeadaa01) +#define jl_singleton_tag ((void*)0xdeadaa02) + +// Used by GC_DEBUG_ENV +typedef struct { + uint64_t num; + uint64_t next; + uint64_t min; + uint64_t interv; + uint64_t max; + unsigned short random[3]; +} jl_alloc_num_t; + +typedef struct { + int always_full; + int wait_for_debugger; + jl_alloc_num_t pool; + jl_alloc_num_t other; + jl_alloc_num_t print; +} jl_gc_debug_env_t; + +typedef enum { + GC_empty_chunk, + GC_objary_chunk, + GC_ary8_chunk, + GC_ary16_chunk, + GC_finlist_chunk, +} gc_chunk_id_t; + +typedef struct _jl_gc_chunk_t { + gc_chunk_id_t cid; + struct _jl_value_t *parent; + struct _jl_value_t **begin; + struct _jl_value_t **end; + void *elem_begin; + void *elem_end; + uint32_t step; + uintptr_t nptr; +} jl_gc_chunk_t; + +#define MAX_REFS_AT_ONCE (1 << 16) // pool page metadata typedef struct { @@ -250,14 +351,11 @@ STATIC_INLINE unsigned ffs_u32(uint32_t bitvec) } #endif -extern jl_gc_num_t gc_num; extern pagetable_t memory_map; extern bigval_t *big_objects_marked; extern arraylist_t finalizer_list_marked; extern arraylist_t to_finalize; extern int64_t lazy_freed_pages; -extern int gc_n_threads; -extern jl_ptls_t* gc_all_tls_states; STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT { @@ -280,11 +378,6 @@ STATIC_INLINE jl_taggedvalue_t *page_pfl_end(jl_gc_pagemeta_t *p) JL_NOTSAFEPOIN return (jl_taggedvalue_t*)(p->data + p->fl_end_offset); } -STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT -{ - return (bits & GC_MARKED) != 0; -} - STATIC_INLINE int gc_old(uintptr_t bits) JL_NOTSAFEPOINT { return (bits & GC_OLD) != 0; @@ -295,16 +388,6 @@ STATIC_INLINE uintptr_t gc_set_bits(uintptr_t tag, int bits) JL_NOTSAFEPOINT return (tag & ~(uintptr_t)3) | bits; } -STATIC_INLINE uintptr_t gc_ptr_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT -{ - return ((uintptr_t)v) & mask; -} - -STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT -{ - return (void*)(((uintptr_t)v) & ~mask); -} - NOINLINE uintptr_t gc_get_stack_ptr(void); STATIC_INLINE jl_gc_pagemeta_t *page_metadata(void *_data) JL_NOTSAFEPOINT @@ -538,24 +621,6 @@ static inline void gc_scrub(void) } #endif -#ifdef OBJPROFILE -void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT; -void objprofile_printall(void); -void objprofile_reset(void); -#else -static inline void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT -{ -} - -static inline void objprofile_printall(void) -{ -} - -static inline void objprofile_reset(void) -{ -} -#endif - #ifdef MEMPROFILE void gc_stats_all_pool(void); void gc_stats_big_obj(void); @@ -567,8 +632,6 @@ void gc_stats_big_obj(void); // For debugging void gc_count_pool(void); -size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT; - JL_DLLEXPORT void jl_enable_gc_logging(int enable); void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT; @@ -576,4 +639,6 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect } #endif +#endif // !MMTK_GC + #endif diff --git a/src/init.c b/src/init.c index 45d6b8ee98873..2bfdebe00dfaf 100644 --- a/src/init.c +++ b/src/init.c @@ -295,12 +295,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER JL_STDOUT = (uv_stream_t*) STDOUT_FILENO; JL_STDERR = (uv_stream_t*) STDERR_FILENO; -#ifndef MMTKHEAP - if (ct) - jl_gc_run_all_finalizers(ct); -#else - mmtk_jl_gc_run_all_finalizers(); -#endif + jl_gc_run_all_finalizers(ct); uv_loop_t *loop = jl_global_event_loop(); if (loop != NULL) { @@ -811,11 +806,9 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) jl_ptls_t ptls = jl_init_threadtls(0); -#ifdef MMTKHEAP - // start MMTk's GC - initialize_collection((void*) ptls); +#ifdef MMTK_GC + initialize_collection((void *)ptls); #endif - #pragma GCC diagnostic push #if defined(_COMPILER_GCC_) && __GNUC__ >= 12 #pragma GCC diagnostic ignored "-Wdangling-pointer" diff --git a/src/julia.h b/src/julia.h index 2bc1a97b681ed..8a8624360fc7a 100644 --- a/src/julia.h +++ b/src/julia.h @@ -930,29 +930,25 @@ JL_DLLEXPORT void jl_clear_malloc_data(void); JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *root) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *root, const jl_value_t *stored) JL_NOTSAFEPOINT; +#ifndef MMTK_GC STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT { -#ifndef MMTKHEAP // parent and ptr isa jl_value_t* if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 && // parent is old and not in remset (jl_astaggedvalue(ptr)->bits.gc & 1) == 0)) // ptr is young jl_gc_queue_root((jl_value_t*)parent); -#endif } STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t* { -#ifndef MMTKHEAP // if ptr is old if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3)) { jl_gc_queue_root((jl_value_t*)ptr); } -#endif } STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT { -#ifndef MMTKHEAP // ptr is an immutable object if (__likely(jl_astaggedvalue(parent)->bits.gc != 3)) return; // parent is young or in remset @@ -962,9 +958,23 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_ const jl_datatype_layout_t *ly = dt->layout; if (ly->npointers) jl_gc_queue_multiroot((jl_value_t*)parent, ptr); -#endif } +#else // MMTK_GC + +STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ +} + +STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t* +{ +} + +STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT +{ +} +#endif // MMTK_GC + JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz); JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, int isaligned, jl_value_t *owner); diff --git a/src/julia_internal.h b/src/julia_internal.h index 5b60be740bfb8..b921c63444e86 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -326,13 +326,15 @@ extern jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED; JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; +void enable_collection(void); +void disable_collection(void); jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize); jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz); -#ifdef MMTKHEAP +#ifdef MMTK_GC JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty); JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz); -#endif +#endif // MMTK_GC JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT; extern uv_mutex_t gc_perm_lock; void *jl_gc_perm_alloc_nolock(size_t sz, int zero, @@ -451,37 +453,50 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE #define GC_MAX_SZCLASS (2032-sizeof(void*)) static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, ""); +#ifndef MMTK_GC STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) { jl_value_t *v; const size_t allocsz = sz + sizeof(jl_taggedvalue_t); if (sz <= GC_MAX_SZCLASS) { -#ifndef MMTKHEAP int pool_id = jl_gc_szclass(allocsz); jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id]; int osize = jl_gc_sizeclasses[pool_id]; // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) v = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); -#else + } + else { + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + v = jl_gc_big_alloc_noinline(ptls, allocsz); + } + jl_set_typeof(v, ty); + maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); + return v; +} + +#else // MMTK_GC + +STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) +{ + jl_value_t *v; + const size_t allocsz = sz + sizeof(jl_taggedvalue_t); + if (sz <= GC_MAX_SZCLASS) { int pool_id = jl_gc_szclass(allocsz); int osize = jl_gc_sizeclasses[pool_id]; v = jl_mmtk_gc_alloc_default(ptls, pool_id, osize, ty); -#endif } else { if (allocsz < sz) // overflow in adding offs, size was "negative" jl_throw(jl_memory_exception); -#ifndef MMTKHEAP - v = jl_gc_big_alloc_noinline(ptls, allocsz); -#else v = jl_mmtk_gc_alloc_big(ptls, allocsz); -#endif } jl_set_typeof(v, ty); maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); return v; } +#endif // MMTK_GC /* Programming style note: When using jl_gc_alloc, do not JL_GC_PUSH it into a * gc frame, until it has been fully initialized. An uninitialized value in a @@ -576,24 +591,32 @@ void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT; void gc_setmark_buf(jl_ptls_t ptls, void *buf, uint8_t, size_t) JL_NOTSAFEPOINT; +#ifndef MMTK_GC STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t* { -#ifndef MMTKHEAP jl_gc_wb(bnd, val); -#endif } STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t* { -#ifndef MMTKHEAP // if parent is marked and buf is not if (__unlikely(jl_astaggedvalue(parent)->bits.gc & 1)) { jl_task_t *ct = jl_current_task; gc_setmark_buf(ct->ptls, bufptr, 3, minsz); } -#endif } +#else // MMTK_GC + +STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t* +{ +} + +STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t* +{ +} +#endif // MMTK_GC + void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT; void jl_print_gc_stats(JL_STREAM *s); diff --git a/src/julia_threads.h b/src/julia_threads.h index c15f19e78966f..17d9d0857dc39 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -4,7 +4,7 @@ #ifndef JL_THREADS_H #define JL_THREADS_H -#ifdef MMTKHEAP +#ifdef MMTK_GC #include "mmtkMutator.h" #endif @@ -281,7 +281,7 @@ typedef struct _jl_tls_states_t { uint64_t sleep_leave; ) -#ifdef MMTKHEAP +#ifdef MMTK_GC MMTkMutatorContext* mmtk_mutator_ptr; void* cursor; void* limit; diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 3e2eb3bcdf6ed..5b8eeb49f60ad 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -224,12 +224,12 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), sz + sizeof(void*)); } else { -#ifndef MMTKHEAP +#ifndef MMTK_GC auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset); auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize }); derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize); - #else +#else // MMTK_GC auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize); auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, cursor)); @@ -295,7 +295,7 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) phiNode->takeName(target); return phiNode; -#endif +#endif // MMTK_GC } newI->setAttributes(newI->getCalledFunction()->getAttributes()); newI->addRetAttr(derefAttr); diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index c46228f13490b..ea390f01010fd 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -226,13 +226,8 @@ namespace jl_intrinsics { } namespace jl_well_known { -#ifndef MMTKHEAP static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc); static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc); -#else - static const char *GC_BIG_ALLOC_NAME = XSTR(jl_mmtk_gc_alloc_big); - static const char *GC_POOL_ALLOC_NAME = XSTR(jl_mmtk_gc_alloc_default_llvm); -#endif static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root); using jl_intrinsics::addGCAllocAttributes; diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c new file mode 100644 index 0000000000000..00cd54c9df920 --- /dev/null +++ b/src/mmtk-gc.c @@ -0,0 +1,487 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#ifdef MMTK_GC + +#include "gc.h" +#include "mmtk_julia.h" +#include "julia_gcext.h" + +// callbacks +// --- + +typedef void (*jl_gc_cb_func_t)(void); + +JL_DLLEXPORT void jl_gc_set_cb_root_scanner(jl_gc_cb_root_scanner_t cb, int enable) +{ +} +JL_DLLEXPORT void jl_gc_set_cb_task_scanner(jl_gc_cb_task_scanner_t cb, int enable) +{ +} +JL_DLLEXPORT void jl_gc_set_cb_pre_gc(jl_gc_cb_pre_gc_t cb, int enable) +{ +} +JL_DLLEXPORT void jl_gc_set_cb_post_gc(jl_gc_cb_post_gc_t cb, int enable) +{ +} +JL_DLLEXPORT void jl_gc_set_cb_notify_external_alloc(jl_gc_cb_notify_external_alloc_t cb, int enable) +{ +} +JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_free_t cb, int enable) +{ +} + + +inline void maybe_collect(jl_ptls_t ptls) +{ + mmtk_gc_poll(ptls); +} + + +// malloc wrappers, aligned allocation +// --- + +inline void *jl_malloc_aligned(size_t sz, size_t align) +{ + return mmtk_malloc_aligned(sz ? sz : 1, align); // XXX sz +} +inline void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, + size_t align) +{ + void *res = jl_malloc_aligned(sz, align); + if (res != NULL) { + memcpy(res, d, oldsz > sz ? sz : oldsz); + mmtk_free_aligned(d); + } + return res; +} +inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT +{ + mmtk_free_aligned(p); +} + + +// finalizers +// --- + +JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct) +{ + if (ct == NULL) + ct = jl_current_task; + mmtk_jl_run_pending_finalizers(ct->ptls); +} + +JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT +{ + register_finalizer(v, f, 1); +} + +// schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads) +JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT +{ + /* TODO: unsupported? */ +} + +JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT +{ + if (__unlikely(jl_typeis(f, jl_voidpointer_type))) { + jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f)); + } + else { + register_finalizer(v, f, 0); + } +} + +JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o) +{ + run_finalizers_for_obj(o); +} + +void jl_gc_run_all_finalizers(jl_task_t *ct) +{ + mmtk_jl_gc_run_all_finalizers(); +} + +void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT +{ + register_finalizer(v, f, 0); +} + + +// weak references +// --- +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value) +{ + jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); + wr->value = value; // NOTE: wb not needed here + mmtk_add_weak_candidate(wr); + return wr; +} + + +// big values +// --- + +// Size includes the tag and the tag is not cleared!! +inline jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) +{ + // TODO: assertion needed here? + assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); + // TODO: drop this okay? + // maybe_collect(ptls); + + jl_value_t *v = jl_mmtk_gc_alloc_big(ptls, sz); + // TODO: this is done (without atomic operations) in jl_mmtk_gc_alloc_big; enable + // here when that's edited? + /* + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_num.bigalloc, + jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1); + */ + // TODO: move to jl_mmtk_gc_alloc_big if needed. +/* +#ifdef MEMDEBUG + memset(v, 0xee, allocsz); +#endif +*/ + // TODO: need to set this? have to move to jl_mmtk_gc_alloc_big then. + // v->age = 0; + // TODO: dropping this; confirm okay? `sweep_big` no longer needed? + // gc_big_object_link(v, &ptls->heap.big_objects); + return v; +} + +// Size includes the tag and the tag is not cleared!! +inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int osize) +{ + assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); +#ifdef MEMDEBUG + return jl_gc_big_alloc(ptls, osize); +#endif + // TODO: drop this okay? + // maybe_collect(ptls); + + jl_value_t *v = jl_mmtk_gc_alloc_default(ptls, pool_offset, osize, NULL); + // TODO: this is done (without atomic operations) in jl_mmtk_gc_alloc_default; enable + // here when that's edited? + /* + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + osize); + jl_atomic_store_relaxed(&ptls->gc_num.poolalloc, + jl_atomic_load_relaxed(&ptls->gc_num.poolalloc) + 1); + */ + return v; +} + +void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT +{ + if (a->flags.how == 2) { + char *d = (char*)a->data - a->offset*a->elsize; + if (a->flags.isaligned) + mmtk_free_aligned(d); + else + mmtk_free(d); + gc_num.freed += jl_array_nbytes(a); + gc_num.freecall++; + } +} + + +// roots +// --- + +JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr) +{ + /* TODO: not needed? */ +} + +// TODO: exported, but not MMTk-specific? +JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT +{ + /* TODO: confirm not needed? */ +} + + +// marking +// --- + +JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj) +{ + return 0; +} +JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent, + jl_value_t **objs, size_t nobjs) +{ +} + + +// GC control +// --- + +JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) +{ + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + if (jl_atomic_load_relaxed(&jl_gc_disable_counter)) { + size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval; + jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); + static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); + jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); + return; + } + handle_user_collection_request(ptls); +} + +// Per-thread initialization +// TODO: remove `norm_pools`, `weak_refs`, etc. from `heap`? +// TODO: remove `gc_cache`? +void jl_init_thread_heap(jl_ptls_t ptls) +{ + jl_thread_heap_t *heap = &ptls->heap; + jl_gc_pool_t *p = heap->norm_pools; + for (int i = 0; i < JL_GC_N_POOLS; i++) { + p[i].osize = jl_gc_sizeclasses[i]; + p[i].freelist = NULL; + p[i].newpages = NULL; + } + arraylist_new(&heap->weak_refs, 0); + arraylist_new(&heap->live_tasks, 0); + heap->mallocarrays = NULL; + heap->mafreelist = NULL; + heap->big_objects = NULL; + heap->remset = &heap->_remset[0]; + heap->last_remset = &heap->_remset[1]; + arraylist_new(heap->remset, 0); + arraylist_new(heap->last_remset, 0); + arraylist_new(&ptls->finalizers, 0); + arraylist_new(&ptls->sweep_objs, 0); + + jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache; + gc_cache->perm_scanned_bytes = 0; + gc_cache->scanned_bytes = 0; + gc_cache->nbig_obj = 0; + + memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); + jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); + + MMTk_Mutator mmtk_mutator = bind_mutator((void *)ptls, ptls->tid); + ptls->mmtk_mutator_ptr = ((MMTkMutatorContext*)mmtk_mutator); +} + +// System-wide initialization +// TODO: remove locks? remove anything else? +void jl_gc_init(void) +{ + if (jl_options.heap_size_hint) + jl_gc_set_max_memory(jl_options.heap_size_hint); + + JL_MUTEX_INIT(&heapsnapshot_lock); + uv_mutex_init(&gc_perm_lock); + + gc_num.interval = default_collect_interval; + last_long_collect_interval = default_collect_interval; + gc_num.allocd = 0; + gc_num.max_pause = 0; + gc_num.max_memory = 0; + +#ifdef _P64 + total_mem = uv_get_total_memory(); + uint64_t constrained_mem = uv_get_constrained_memory(); + if (constrained_mem > 0 && constrained_mem < total_mem) + total_mem = constrained_mem; +#endif + + // We allocate with abandon until we get close to the free memory on the machine. + uint64_t free_mem = uv_get_available_memory(); + uint64_t high_water_mark = free_mem / 10 * 7; // 70% high water mark + + if (high_water_mark < max_total_memory) + max_total_memory = high_water_mark; + + // MMTk-specific + long long min_heap_size; + long long max_heap_size; + char* min_size_def = getenv("MMTK_MIN_HSIZE"); + char* min_size_gb = getenv("MMTK_MIN_HSIZE_G"); + + char* max_size_def = getenv("MMTK_MAX_HSIZE"); + char* max_size_gb = getenv("MMTK_MAX_HSIZE_G"); + + // default min heap currently set as Julia's default_collect_interval + if (min_size_def != NULL) { + char *p; + double min_size = strtod(min_size_def, &p); + min_heap_size = (long) 1024 * 1024 * min_size; + } else if (min_size_gb != NULL) { + char *p; + double min_size = strtod(min_size_gb, &p); + min_heap_size = (long) 1024 * 1024 * 1024 * min_size; + } else { + min_heap_size = default_collect_interval; + } + + // default max heap currently set as 70% the free memory in the system + if (max_size_def != NULL) { + char *p; + double max_size = strtod(max_size_def, &p); + max_heap_size = (long) 1024 * 1024 * max_size; + } else if (max_size_gb != NULL) { + char *p; + double max_size = strtod(max_size_gb, &p); + max_heap_size = (long) 1024 * 1024 * 1024 * max_size; + } else { + max_heap_size = uv_get_free_memory() * 70 / 100; + } + + // if only max size is specified initialize MMTk with a fixed size heap + if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) { + gc_init(0, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); + } else { + gc_init(min_heap_size, max_heap_size, &mmtk_upcalls, (sizeof(jl_taggedvalue_t))); + } +} + +// allocation wrappers that track allocation and let collection run + +JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) +{ + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + if (pgcstack && ct->world_age) { + jl_ptls_t ptls = ct->ptls; + maybe_collect(ptls); + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); + return mmtk_counted_malloc(sz); + } + return malloc(sz); +} + +JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) +{ + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + if (pgcstack && ct->world_age) { + jl_ptls_t ptls = ct->ptls; + maybe_collect(ptls); + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz); + jl_atomic_store_relaxed(&ptls->gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); + return mmtk_counted_calloc(nm, sz); + } + return calloc(nm, sz); +} + +JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) +{ + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + if (pgcstack && ct->world_age) { + jl_ptls_t ptls = ct->ptls; + jl_atomic_store_relaxed(&ptls->gc_num.freed, + jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz); + jl_atomic_store_relaxed(&ptls->gc_num.freecall, + jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1); + mmtk_free_with_size(p, sz); + return; + } + free(p); +} + +JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz) +{ + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + if (pgcstack && ct->world_age) { + jl_ptls_t ptls = ct->ptls; + maybe_collect(ptls); + if (sz < old) + jl_atomic_store_relaxed(&ptls->gc_num.freed, + jl_atomic_load_relaxed(&ptls->gc_num.freed) + (old - sz)); + else + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old)); + jl_atomic_store_relaxed(&ptls->gc_num.realloc, + jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); + return mmtk_realloc_with_old_size(p, sz, old); + } + // TODO: correct? + return realloc(p, sz); +} + +jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz) +{ + size_t len = jl_string_len(s); + jl_value_t *snew = jl_alloc_string(sz); + memcpy(jl_string_data(snew), jl_string_data(s), sz <= len ? sz : len); + return snew; +} + +JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) +{ + return 0; +} + +JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void) +{ + return 0; +} + +// TODO: if this is needed, it can be added in MMTk +JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) +{ + return NULL; +} + + +// gc-debug functions +// --- + +jl_gc_pagemeta_t *jl_gc_page_metadata(void *data) +{ + return NULL; +} + +JL_DLLEXPORT jl_taggedvalue_t *jl_gc_find_taggedvalue_pool(char *p, size_t *osize_p) +{ + return NULL; +} + +void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT +{ +} + +void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT +{ + // May not be accurate but should be helpful enough + uint64_t pool_count = gc_num.poolalloc; + uint64_t big_count = gc_num.bigalloc; + jl_safe_printf("Allocations: %" PRIu64 " " + "(Pool: %" PRIu64 "; Big: %" PRIu64 "); GC: %d\n", + pool_count + big_count, pool_count, big_count, gc_num.pause); +} + +void jl_print_gc_stats(JL_STREAM *s) +{ +} + +void objprofile_count(void *ty, int old, int sz) JL_NOTSAFEPOINT +{ +} + +void objprofile_printall(void) +{ +} + +void objprofile_reset(void) +{ +} + +#ifdef __cplusplus +} +#endif + +#endif // MMTK_GC diff --git a/src/threading.c b/src/threading.c index 52b3fc2d8c06d..bc31eb1e46bb6 100644 --- a/src/threading.c +++ b/src/threading.c @@ -345,12 +345,12 @@ jl_ptls_t jl_init_threadtls(int16_t tid) #endif ptls->system_id = (jl_thread_t)(uintptr_t)uv_thread_self(); ptls->rngseed = jl_rand(); - if (tid == 0) + if (tid == 0) { ptls->disable_gc = 1; -#ifdef MMTKHEAP - if (tid == 0) +#ifdef MMTK_GC disable_collection(); #endif + } #ifdef _OS_WINDOWS_ if (tid == 0) { if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(),