Skip to content

Commit

Permalink
staticdata: Some refactors for clarity
Browse files Browse the repository at this point in the history
I was reading this code as part of looking into #52797. To recap, when we serialize
objects for package images, we classify each method as either internal or external,
depending on whether the method extends a function in the ambient set of modules (
system image, other previously loaded package images, etc) or whether it is private
to the module we're currently serializing (in which case we call it internal).

One of my primary confusions in reading this code was that the `new_specializations`
object holds only external code instances in most of the code, but towards the end
of loading, we used to also add any internal code instances that had dependencies on
other external method instances in order to share the code instance validation code
between the two cases.

I don't think this design is that great. Conceptually, internal CodeInstances are
already in the CI cache at the point that validation happens (their max_world is just
set to 1, so they're not eligible to run). We do guard the cache insertion by a check
whether a code instance already exists, which also catches this case, but I think
it's cleaner to just not add any internal code instances to the list and instead
simply re-activate the code instance in situ.

Another issue with the old code that I didn't observe, but I think is possible in theory
is that any CodeInstances that are not part of the cache hierarchy (e.g. because
they were created by external abstract interpreters) should not accidentally get added
to the cache hierarchy by this code path. I think this was possible in the old code,
but I didn't observe it.

With this change, `new_specializations` now always only holds external cis, so rename
it appropriately. While we're at it, also do some other clarity changes.
  • Loading branch information
Keno committed Jan 10, 2024
1 parent 2afc20c commit d59901b
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 81 deletions.
6 changes: 3 additions & 3 deletions src/precompile_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ static void *jl_precompile(int all)
return native_code;
}

static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_methods, jl_array_t *new_specializations)
static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_methods, jl_array_t *new_ext_cis)
{
if (!worklist)
return NULL;
Expand Down Expand Up @@ -310,9 +310,9 @@ static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_met
}
}
}
n = jl_array_nrows(new_specializations);
n = jl_array_nrows(new_ext_cis);
for (i = 0; i < n; i++) {
jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_specializations, i);
jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_ext_cis, i);
precompile_enq_specialization_(ci->def, m);
}
void *native_code = jl_precompile_(m, 1);
Expand Down
98 changes: 50 additions & 48 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ External links:
#include "valgrind.h"
#include "julia_assert.h"

static const size_t WORLD_AGE_REVALIDATION_SENTINEL = 0x1;

#include "staticdata_utils.c"
#include "precompile_utils.c"

Expand Down Expand Up @@ -501,6 +503,8 @@ typedef struct {
jl_array_t *link_ids_gvars;
jl_array_t *link_ids_external_fnvars;
jl_ptls_t ptls;
// Set (implemented has a hasmap of MethodInstances to themselves) of which MethodInstances have (forward) edges
// to other MethodInstances.
htable_t callers_with_edges;
jl_image_t *image;
int8_t incremental;
Expand Down Expand Up @@ -1596,45 +1600,45 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
}
else if (jl_is_code_instance(v)) {
assert(f == s->s);

// Handle the native-code pointers
assert(f == s->s);
jl_code_instance_t *m = (jl_code_instance_t*)v;
jl_code_instance_t *newm = (jl_code_instance_t*)&f->buf[reloc_offset];
jl_code_instance_t *ci = (jl_code_instance_t*)v;
jl_code_instance_t *newci = (jl_code_instance_t*)&f->buf[reloc_offset];

if (s->incremental) {
arraylist_push(&s->fixup_objs, (void*)reloc_offset);
if (m->min_world > 1)
newm->min_world = ~(size_t)0; // checks that we reprocess this upon deserialization
if (m->max_world != ~(size_t)0)
newm->max_world = 0;
if (ci->min_world > 1)
newci->min_world = ~(size_t)0; // checks that we reprocess this upon deserialization
if (ci->max_world != ~(size_t)0)
newci->max_world = 0;
else {
if (jl_atomic_load_relaxed(&m->inferred) && ptrhash_has(&s->callers_with_edges, m->def))
newm->max_world = 1; // sentinel value indicating this will need validation
if (m->min_world > 0 && jl_atomic_load_relaxed(&m->inferred) ) {
if (jl_atomic_load_relaxed(&ci->inferred) && ptrhash_has(&s->callers_with_edges, ci->def))
newci->max_world = WORLD_AGE_REVALIDATION_SENTINEL;
if (ci->min_world > 0 && jl_atomic_load_relaxed(&ci->inferred) ) {
// TODO: also check if this object is part of the codeinst cache
// will check on deserialize if this cache entry is still valid
}
}
}
jl_atomic_store_relaxed(&newm->invoke, NULL);
jl_atomic_store_relaxed(&newm->specsigflags, 0);
jl_atomic_store_relaxed(&newm->specptr.fptr, NULL);
jl_atomic_store_relaxed(&newci->invoke, NULL);
jl_atomic_store_relaxed(&newci->specsigflags, 0);
jl_atomic_store_relaxed(&newci->specptr.fptr, NULL);
int8_t fptr_id = JL_API_NULL;
int8_t builtin_id = 0;
if (jl_atomic_load_relaxed(&m->invoke) == jl_fptr_const_return) {
if (jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_const_return) {
fptr_id = JL_API_CONST;
}
else {
if (jl_is_method(m->def->def.method)) {
builtin_id = jl_fptr_id(jl_atomic_load_relaxed(&m->specptr.fptr));
if (jl_is_method(ci->def->def.method)) {
builtin_id = jl_fptr_id(jl_atomic_load_relaxed(&ci->specptr.fptr));
if (builtin_id) { // found in the table of builtins
assert(builtin_id >= 2);
fptr_id = JL_API_BUILTIN;
}
else {
int32_t invokeptr_id = 0;
int32_t specfptr_id = 0;
jl_get_function_id(native_functions, m, &invokeptr_id, &specfptr_id); // see if we generated code for it
jl_get_function_id(native_functions, ci, &invokeptr_id, &specfptr_id); // see if we generated code for it
if (invokeptr_id) {
if (invokeptr_id == -1) {
fptr_id = JL_API_BOXED;
Expand Down Expand Up @@ -1666,7 +1670,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
}
}
}
jl_atomic_store_relaxed(&newm->invoke, NULL); // relocation offset
jl_atomic_store_relaxed(&newci->invoke, NULL); // relocation offset
if (fptr_id != JL_API_NULL) {
assert(fptr_id < BuiltinFunctionTag && "too many functions to serialize");
arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_code_instance_t, invoke))); // relocation location
Expand Down Expand Up @@ -2514,7 +2518,7 @@ JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val, int insert)
}

static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *newly_inferred, uint64_t worklist_key,
/* outputs */ jl_array_t **extext_methods, jl_array_t **new_specializations,
/* outputs */ jl_array_t **extext_methods, jl_array_t **new_ext_cis,
jl_array_t **method_roots_list, jl_array_t **ext_targets, jl_array_t **edges)
{
// extext_methods: [method1, ...], worklist-owned "extending external" methods added to functions owned by modules outside the worklist
Expand All @@ -2526,7 +2530,7 @@ static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *new
assert(edges_map == NULL);

// Save the inferred code from newly inferred, external methods
*new_specializations = queue_external_cis(newly_inferred);
*new_ext_cis = queue_external_cis(newly_inferred);

// Collect method extensions and edges data
JL_GC_PUSH1(&edges_map);
Expand All @@ -2552,9 +2556,9 @@ static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *new
*ext_targets = jl_alloc_vec_any(0);
*edges = jl_alloc_vec_any(0);
*method_roots_list = jl_alloc_vec_any(0);
// Collect the new method roots
jl_collect_new_roots(*method_roots_list, *new_specializations, worklist_key);
jl_collect_edges(*edges, *ext_targets, *new_specializations, world);
// Collect the new method roots for external specializations
jl_collect_new_roots(*method_roots_list, *new_ext_cis, worklist_key);
jl_collect_edges(*edges, *ext_targets, *new_ext_cis, world);
}
assert(edges_map == NULL); // jl_collect_edges clears this when done

Expand All @@ -2564,7 +2568,7 @@ static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *new
// In addition to the system image (where `worklist = NULL`), this can also save incremental images with external linkage
static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
jl_array_t *worklist, jl_array_t *extext_methods,
jl_array_t *new_specializations, jl_array_t *method_roots_list,
jl_array_t *new_ext_cis, jl_array_t *method_roots_list,
jl_array_t *ext_targets, jl_array_t *edges) JL_GC_DISABLED
{
htable_new(&field_replace, 0);
Expand Down Expand Up @@ -2680,7 +2684,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
// Queue method extensions
jl_queue_for_serialization(&s, extext_methods);
// Queue the new specializations
jl_queue_for_serialization(&s, new_specializations);
jl_queue_for_serialization(&s, new_ext_cis);
// Queue the new roots
jl_queue_for_serialization(&s, method_roots_list);
// Queue the edges
Expand Down Expand Up @@ -2836,7 +2840,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
}
jl_write_value(&s, jl_module_init_order);
jl_write_value(&s, extext_methods);
jl_write_value(&s, new_specializations);
jl_write_value(&s, new_ext_cis);
jl_write_value(&s, method_roots_list);
jl_write_value(&s, ext_targets);
jl_write_value(&s, edges);
Expand Down Expand Up @@ -2924,24 +2928,24 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
ff = f;
}

jl_array_t *mod_array = NULL, *extext_methods = NULL, *new_specializations = NULL;
jl_array_t *mod_array = NULL, *extext_methods = NULL, *new_ext_cis = NULL;
jl_array_t *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL;
int64_t checksumpos = 0;
int64_t checksumpos_ff = 0;
int64_t datastartpos = 0;
JL_GC_PUSH6(&mod_array, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges);
JL_GC_PUSH6(&mod_array, &extext_methods, &new_ext_cis, &method_roots_list, &ext_targets, &edges);

if (worklist) {
mod_array = jl_get_loaded_modules(); // __toplevel__ modules loaded in this session (from Base.loaded_modules_array)
// Generate _native_data`
if (_native_data != NULL) {
jl_prepare_serialization_data(mod_array, newly_inferred, jl_worklist_key(worklist),
&extext_methods, &new_specializations, NULL, NULL, NULL);
&extext_methods, &new_ext_cis, NULL, NULL, NULL);
jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1);
*_native_data = jl_precompile_worklist(worklist, extext_methods, new_specializations);
*_native_data = jl_precompile_worklist(worklist, extext_methods, new_ext_cis);
jl_precompile_toplevel_module = NULL;
extext_methods = NULL;
new_specializations = NULL;
new_ext_cis = NULL;
}
jl_write_header_for_incremental(f, worklist, mod_array, udeps, srctextpos, &checksumpos);
if (emit_split) {
Expand All @@ -2964,7 +2968,7 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
ct->reentrant_timing |= 0b1000;
if (worklist) {
jl_prepare_serialization_data(mod_array, newly_inferred, jl_worklist_key(worklist),
&extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges);
&extext_methods, &new_ext_cis, &method_roots_list, &ext_targets, &edges);
if (!emit_split) {
write_int32(f, 0); // No clone_targets
write_padding(f, LLT_ALIGN(ios_pos(f), JL_CACHE_BYTE_ALIGNMENT) - ios_pos(f));
Expand All @@ -2976,7 +2980,7 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
}
if (_native_data != NULL)
native_functions = *_native_data;
jl_save_system_image_to_stream(ff, mod_array, worklist, extext_methods, new_specializations, method_roots_list, ext_targets, edges);
jl_save_system_image_to_stream(ff, mod_array, worklist, extext_methods, new_ext_cis, method_roots_list, ext_targets, edges);
if (_native_data != NULL)
native_functions = NULL;
// make sure we don't run any Julia code concurrently before this point
Expand Down Expand Up @@ -3058,7 +3062,7 @@ extern void export_jl_small_typeof(void);
static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl_array_t *depmods, uint64_t checksum,
/* outputs */ jl_array_t **restored, jl_array_t **init_order,
jl_array_t **extext_methods,
jl_array_t **new_specializations, jl_array_t **method_roots_list,
jl_array_t **new_ext_cis, jl_array_t **method_roots_list,
jl_array_t **ext_targets, jl_array_t **edges,
char **base, arraylist_t *ccallable_list, pkgcachesizes *cachesizes) JL_GC_DISABLED
{
Expand Down Expand Up @@ -3120,7 +3124,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
ios_seek(f, LLT_ALIGN(ios_pos(f), 8));
assert(!ios_eof(f));
s.s = f;
uintptr_t offset_restored = 0, offset_init_order = 0, offset_extext_methods = 0, offset_new_specializations = 0, offset_method_roots_list = 0;
uintptr_t offset_restored = 0, offset_init_order = 0, offset_extext_methods = 0, offset_new_ext_cis = 0, offset_method_roots_list = 0;
uintptr_t offset_ext_targets = 0, offset_edges = 0;
if (!s.incremental) {
size_t i;
Expand Down Expand Up @@ -3152,7 +3156,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
offset_restored = jl_read_offset(&s);
offset_init_order = jl_read_offset(&s);
offset_extext_methods = jl_read_offset(&s);
offset_new_specializations = jl_read_offset(&s);
offset_new_ext_cis = jl_read_offset(&s);
offset_method_roots_list = jl_read_offset(&s);
offset_ext_targets = jl_read_offset(&s);
offset_edges = jl_read_offset(&s);
Expand Down Expand Up @@ -3181,16 +3185,14 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
uint32_t external_fns_begin = read_uint32(f);
jl_read_arraylist(s.s, ccallable_list ? ccallable_list : &s.ccallable_list);
if (s.incremental) {
assert(restored && init_order && extext_methods && new_specializations && method_roots_list && ext_targets && edges);
assert(restored && init_order && extext_methods && new_ext_cis && method_roots_list && ext_targets && edges);
*restored = (jl_array_t*)jl_delayed_reloc(&s, offset_restored);
*init_order = (jl_array_t*)jl_delayed_reloc(&s, offset_init_order);
*extext_methods = (jl_array_t*)jl_delayed_reloc(&s, offset_extext_methods);
*new_specializations = (jl_array_t*)jl_delayed_reloc(&s, offset_new_specializations);
*new_ext_cis = (jl_array_t*)jl_delayed_reloc(&s, offset_new_ext_cis);
*method_roots_list = (jl_array_t*)jl_delayed_reloc(&s, offset_method_roots_list);
*ext_targets = (jl_array_t*)jl_delayed_reloc(&s, offset_ext_targets);
*edges = (jl_array_t*)jl_delayed_reloc(&s, offset_edges);
if (!*new_specializations)
*new_specializations = jl_alloc_vec_any(0);
}
s.s = NULL;

Expand Down Expand Up @@ -3454,8 +3456,6 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
jl_code_instance_t *ci = (jl_code_instance_t*)obj;
assert(s.incremental);
ci->min_world = world;
if (ci->max_world != 0)
jl_array_ptr_1d_push(*new_specializations, (jl_value_t*)ci);
}
else if (jl_is_globalref(obj)) {
continue; // wait until all the module binding tables have been initialized
Expand Down Expand Up @@ -3601,11 +3601,11 @@ static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, i
assert(datastartpos > 0 && datastartpos < dataendpos);
needs_permalloc = jl_options.permalloc_pkgimg || needs_permalloc;
jl_value_t *restored = NULL;
jl_array_t *init_order = NULL, *extext_methods = NULL, *new_specializations = NULL, *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL;
jl_array_t *init_order = NULL, *extext_methods = NULL, *new_ext_cis = NULL, *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL;
jl_svec_t *cachesizes_sv = NULL;
char *base;
arraylist_t ccallable_list;
JL_GC_PUSH8(&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &cachesizes_sv);
JL_GC_PUSH8(&restored, &init_order, &extext_methods, &new_ext_cis, &method_roots_list, &ext_targets, &edges, &cachesizes_sv);

{ // make a permanent in-memory copy of f (excluding the header)
ios_bufmode(f, bm_none);
Expand All @@ -3629,17 +3629,18 @@ static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, i
ios_close(f);
ios_static_buffer(f, sysimg, len);
pkgcachesizes cachesizes;
jl_restore_system_image_from_stream_(f, image, depmods, checksum, (jl_array_t**)&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &base, &ccallable_list, &cachesizes);
jl_restore_system_image_from_stream_(f, image, depmods, checksum, (jl_array_t**)&restored, &init_order, &extext_methods, &new_ext_cis, &method_roots_list,
&ext_targets, &edges, &base, &ccallable_list, &cachesizes);
JL_SIGATOMIC_END();

// Insert method extensions
jl_insert_methods(extext_methods);
// No special processing of `new_specializations` is required because recaching handled it
// No special processing of `new_ext_cis` is required because recaching handled it
// Add roots to methods
jl_copy_roots(method_roots_list, jl_worklist_key((jl_array_t*)restored));
// Handle edges
size_t world = jl_atomic_load_acquire(&jl_world_counter);
jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets, (jl_array_t*)new_specializations, world); // restore external backedges (needs to be last)
jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets, (jl_array_t*)new_ext_cis, world); // restore external backedges (needs to be last)
// reinit ccallables
jl_reinit_ccallable(&ccallable_list, base, pkgimage_handle);
arraylist_free(&ccallable_list);
Expand All @@ -3653,7 +3654,8 @@ static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, i
jl_svecset(cachesizes_sv, 4, jl_box_long(cachesizes.reloclist));
jl_svecset(cachesizes_sv, 5, jl_box_long(cachesizes.gvarlist));
jl_svecset(cachesizes_sv, 6, jl_box_long(cachesizes.fptrlist));
restored = (jl_value_t*)jl_svec(8, restored, init_order, extext_methods, new_specializations, method_roots_list,
restored = (jl_value_t*)jl_svec(8, restored, init_order, extext_methods,
(jl_value_t*)(new_ext_cis ? new_ext_cis : jl_nothing), method_roots_list,
ext_targets, edges, cachesizes_sv);
}
else {
Expand Down
Loading

0 comments on commit d59901b

Please sign in to comment.