diff --git a/Compiler/src/optimize.jl b/Compiler/src/optimize.jl index 3532b2043d76f..da4a17c5d6913 100644 --- a/Compiler/src/optimize.jl +++ b/Compiler/src/optimize.jl @@ -113,7 +113,9 @@ set_inlineable!(src::CodeInfo, val::Bool) = function inline_cost_clamp(x::Int) x > MAX_INLINE_COST && return MAX_INLINE_COST x < MIN_INLINE_COST && return MIN_INLINE_COST - return convert(InlineCostType, x) + x = ccall(:jl_encode_inlining_cost, UInt8, (InlineCostType,), x) + x = ccall(:jl_decode_inlining_cost, InlineCostType, (UInt8,), x) + return x end const SRC_FLAG_DECLARED_INLINE = 0x1 diff --git a/src/codegen.cpp b/src/codegen.cpp index a736449813608..2a1f044345d08 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -9900,7 +9900,7 @@ void emit_always_inline(orc::ThreadSafeModule &result_m, jl_codegen_params_t &pa src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred); jl_method_instance_t *mi = jl_get_ci_mi(codeinst); jl_method_t *def = mi->def.method; - if (src && (jl_value_t*)src != jl_nothing && jl_is_method(def) && jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX) + if (src && jl_is_string((jl_value_t*)src) && jl_is_method(def) && jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX) src = jl_uncompress_ir(def, codeinst, (jl_value_t*)src); if (src && jl_is_code_info(src) && jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX) { jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params); // contains safepoints diff --git a/src/gc-stock.h b/src/gc-stock.h index d478ee1366da0..8e27893697f68 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -365,15 +365,6 @@ STATIC_INLINE jl_gc_pagemeta_t *pop_page_metadata_back(jl_gc_pagemeta_t **ppg) J return v; } -#ifdef __clang_gcanalyzer__ /* clang may not have __builtin_ffs */ -unsigned ffs_u32(uint32_t bitvec) JL_NOTSAFEPOINT; -#else -STATIC_INLINE unsigned ffs_u32(uint32_t bitvec) -{ - return __builtin_ffs(bitvec) - 1; -} -#endif - extern bigval_t *oldest_generation_of_bigvals; extern int64_t buffered_pages; extern int gc_first_tid; diff --git a/src/gf.c b/src/gf.c index 8205cf70b99c3..8ca5761ce56ff 100644 --- a/src/gf.c +++ b/src/gf.c @@ -357,7 +357,7 @@ static int emit_codeinst_and_edges(jl_code_instance_t *codeinst) JL_GC_PUSH1(&code); jl_method_instance_t *mi = jl_get_ci_mi(codeinst); jl_method_t *def = mi->def.method; - if (jl_is_string(code) && jl_is_method(def)) + if (jl_is_method(def)) code = (jl_value_t*)jl_uncompress_ir(def, codeinst, (jl_value_t*)code); if (jl_is_code_info(code)) { jl_emit_codeinst_to_jit(codeinst, (jl_code_info_t*)code); diff --git a/src/ircode.c b/src/ircode.c index 9a94c4c62431a..65130e46edfe0 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -989,7 +989,7 @@ static int codelocs_nstmts(jl_string_t *cl) JL_NOTSAFEPOINT #define IR_DATASIZE_FLAGS sizeof(uint16_t) #define IR_DATASIZE_PURITY sizeof(uint16_t) -#define IR_DATASIZE_INLINING_COST sizeof(uint16_t) +#define IR_DATASIZE_INLINING_COST sizeof(uint8_t) #define IR_DATASIZE_NSLOTS sizeof(int32_t) typedef enum { ir_offset_flags = 0, @@ -1044,7 +1044,7 @@ JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code) code->ssaflags); write_uint16(s.s, checked_size(flags.packed, IR_DATASIZE_FLAGS)); write_uint16(s.s, checked_size(code->purity.bits, IR_DATASIZE_PURITY)); - write_uint16(s.s, checked_size(code->inlining_cost, IR_DATASIZE_INLINING_COST)); + write_uint8(s.s, checked_size(jl_encode_inlining_cost(code->inlining_cost), IR_DATASIZE_INLINING_COST)); size_t nslots = jl_array_nrows(code->slotflags); assert(nslots >= m->nargs && nslots < INT32_MAX); // required by generated functions @@ -1109,6 +1109,8 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t { if (jl_is_code_info(data)) return (jl_code_info_t*)data; + if (!jl_is_string(data)) + return (jl_code_info_t*)jl_nothing; JL_TIMING(AST_UNCOMPRESS, AST_UNCOMPRESS); JL_LOCK(&m->writelock); // protect the roots array (Might GC) assert(jl_is_method(m)); @@ -1139,7 +1141,7 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t code->nospecializeinfer = flags.bits.nospecializeinfer; code->isva = flags.bits.isva; code->purity.bits = read_uint16(s.s); - code->inlining_cost = read_uint16(s.s); + code->inlining_cost = jl_decode_inlining_cost(read_uint8(s.s)); size_t nslots = read_int32(s.s); code->slotflags = jl_alloc_array_1d(jl_array_uint8_type, nslots); @@ -1240,12 +1242,46 @@ JL_DLLEXPORT uint8_t jl_ir_flag_has_image_globalref(jl_string_t *data) return flags.bits.has_image_globalref; } -JL_DLLEXPORT uint16_t jl_ir_inlining_cost(jl_string_t *data) +// create a compressed u16 value with range 0..3968, 3 bits exponent, 5 bits mantissa, implicit first digit, rounding up, full accuracy over 0..63 +JL_DLLEXPORT uint8_t jl_encode_inlining_cost(uint16_t inlining_cost) { + unsigned shift = 0; + unsigned mantissa; + if (inlining_cost <= 0x1f) { + mantissa = inlining_cost; + } + else { + while (inlining_cost >> 5 >> shift != 0) + shift++; + assert(1 <= shift && shift <= 11); + mantissa = (inlining_cost >> (shift - 1)) & 0x1f; + mantissa += (inlining_cost & ((1 << (shift - 1)) - 1)) != 0; // round up if trailing bits non-zero, overflowing into exp + } + unsigned r = (shift << 5) + mantissa; + if (r > 0xff) + r = 0xff; + return r; +} + +JL_DLLEXPORT uint16_t jl_decode_inlining_cost(uint8_t inlining_cost) +{ + unsigned shift = inlining_cost >> 5; + if (inlining_cost == 0xff) + return 0xffff; + else if (shift == 0) + return inlining_cost; + else + return (0x20 | (inlining_cost & 0x1f)) << (shift - 1); +} + +JL_DLLEXPORT uint16_t jl_ir_inlining_cost(jl_value_t *data) +{ + if (jl_is_uint8(data)) + return jl_decode_inlining_cost(*(uint8_t*)data); if (jl_is_code_info(data)) return ((jl_code_info_t*)data)->inlining_cost; assert(jl_is_string(data)); - uint16_t res = jl_load_unaligned_i16(jl_string_data(data) + ir_offset_inlining_cost); + uint16_t res = jl_decode_inlining_cost(*(uint8_t*)(jl_string_data(data) + ir_offset_inlining_cost)); return res; } diff --git a/src/julia.h b/src/julia.h index 6c1c8af0a788b..12389dd13a9ff 100644 --- a/src/julia.h +++ b/src/julia.h @@ -438,10 +438,11 @@ typedef struct _jl_code_instance_t { jl_value_t *rettype_const; // inferred constant return value, or null // Inferred result. When part of the runtime cache, either - // - A jl_code_info_t (may be compressed) containing the inferred IR + // - A jl_code_info_t (may be compressed as a String) containing the inferred IR // - jl_nothing, indicating that inference was completed, but the result was // deleted to save space. - // - null, indicating that inference was not yet completed or did not succeed + // - UInt8, indicating that inference recorded the estimated inlining cost, but deleted the result to save space + // - NULL, indicating that inference was not yet completed or did not succeed _Atomic(jl_value_t *) inferred; _Atomic(jl_debuginfo_t *) debuginfo; // stored information about edges from this object (set once, with a happens-before both source and invoke) _Atomic(jl_svec_t *) edges; // forward edge info @@ -2310,6 +2311,8 @@ JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i); JL_DLLEXPORT struct jl_codeloc_t jl_uncompress1_codeloc(jl_value_t *cl, size_t pc) JL_NOTSAFEPOINT; JL_DLLEXPORT jl_value_t *jl_compress_codelocs(int32_t firstline, jl_value_t *codelocs, size_t nstmts); JL_DLLEXPORT jl_value_t *jl_uncompress_codelocs(jl_value_t *cl, size_t nstmts); +JL_DLLEXPORT uint8_t jl_encode_inlining_cost(uint16_t inlining_cost) JL_NOTSAFEPOINT; +JL_DLLEXPORT uint16_t jl_decode_inlining_cost(uint8_t inlining_cost) JL_NOTSAFEPOINT; JL_DLLEXPORT int jl_is_operator(const char *sym); JL_DLLEXPORT int jl_is_unary_operator(const char *sym); diff --git a/src/precompile_utils.c b/src/precompile_utils.c index 86bb723443925..491f111ac4746 100644 --- a/src/precompile_utils.c +++ b/src/precompile_utils.c @@ -208,7 +208,7 @@ static int precompile_enq_specialization_(jl_method_instance_t *mi, void *closur jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred); if (inferred && (jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL || inferred == jl_nothing || - ((jl_is_string(inferred) || jl_is_code_info(inferred)) && jl_ir_inlining_cost(inferred) == UINT16_MAX))) { + ((jl_is_string(inferred) || jl_is_code_info(inferred) || jl_is_uint8(inferred)) && jl_ir_inlining_cost(inferred) == UINT16_MAX))) { do_compile = 1; } else if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL || jl_atomic_load_relaxed(&codeinst->precompile)) { diff --git a/src/staticdata.c b/src/staticdata.c index 92e7f494ad35d..6038b43f1c75d 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -901,30 +901,36 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_ } } jl_value_t *inferred = jl_atomic_load_relaxed(&ci->inferred); - if (inferred && inferred != jl_nothing) { // disregard if there is nothing here to delete (e.g. builtins, unspecialized) + if (inferred && inferred != jl_nothing && !jl_is_uint8(inferred)) { // disregard if there is nothing here to delete (e.g. builtins, unspecialized) jl_method_t *def = mi->def.method; if (jl_is_method(def)) { // don't delete toplevel code int is_relocatable = !s->incremental || jl_is_code_info(inferred) || (jl_is_string(inferred) && jl_string_len(inferred) > 0 && jl_string_data(inferred)[jl_string_len(inferred) - 1]); + int discard = 0; if (!is_relocatable) { - inferred = jl_nothing; + discard = 1; } else if (def->source == NULL) { // don't delete code from optimized opaque closures that can't be reconstructed (and builtins) } else if (jl_atomic_load_relaxed(&ci->max_world) != ~(size_t)0 || // delete all code that cannot run jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_const_return) { // delete all code that just returns a constant - inferred = jl_nothing; + discard = 1; } else if (native_functions && // don't delete any code if making a ji file (ci->owner == jl_nothing) && // don't delete code for external interpreters !effects_foldable(jl_atomic_load_relaxed(&ci->ipo_purity_bits)) && // don't delete code we may want for irinterp jl_ir_inlining_cost(inferred) == UINT16_MAX) { // don't delete inlineable code // delete the code now: if we thought it was worth keeping, it would have been converted to object code - inferred = jl_nothing; + discard = 1; } - if (inferred == jl_nothing) { - record_field_change((jl_value_t**)&ci->inferred, jl_nothing); + if (discard) { + // keep only the inlining cost, so inference can later decide if it is worth getting the source back + if (jl_is_string(inferred) || jl_is_code_info(inferred)) + inferred = jl_box_uint8(jl_encode_inlining_cost(jl_ir_inlining_cost(inferred))); + else + inferred = jl_nothing; + record_field_change((jl_value_t**)&ci->inferred, inferred); } else if (s->incremental && jl_is_string(inferred)) { // New roots for external methods @@ -2687,7 +2693,7 @@ static void strip_specializations_(jl_method_instance_t *mi) jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache); while (codeinst) { jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred); - if (inferred && inferred != jl_nothing) { + if (inferred && inferred != jl_nothing && !jl_is_uint8(inferred)) { if (jl_options.strip_ir) { record_field_change((jl_value_t**)&codeinst->inferred, jl_nothing); }