Skip to content

Commit

Permalink
Merge pull request #23898 from JuliaLang/teh/include_hook
Browse files Browse the repository at this point in the history
More extensive record-keeping during module loading/compilation
  • Loading branch information
timholy authored Oct 7, 2017
2 parents b84131f + ef28c11 commit b9a0d1d
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 34 deletions.
75 changes: 57 additions & 18 deletions base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -201,20 +201,24 @@ const package_locks = Dict{Symbol,Condition}()
# Callbacks take the form (mod::Symbol) -> nothing.
# WARNING: This is an experimental feature and might change later, without deprecation.
const package_callbacks = Any[]
# to notify downstream consumers that a file has been included into a particular module
# Callbacks take the form (mod::Module, filename::String) -> nothing
# WARNING: This is an experimental feature and might change later, without deprecation.
const include_callbacks = Any[]

# used to optionally track dependencies when requiring a module:
const _concrete_dependencies = Any[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them
const _require_dependencies = Any[] # a list of (path, mtime) tuples that are the file dependencies of the module currently being precompiled
const _require_dependencies = Any[] # a list of (mod, path, mtime) tuples that are the file dependencies of the module currently being precompiled
const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies
function _include_dependency(_path::AbstractString)
function _include_dependency(modstring::AbstractString, _path::AbstractString)
prev = source_path(nothing)
if prev === nothing
path = abspath(_path)
else
path = joinpath(dirname(prev), _path)
end
if _track_dependencies[]
push!(_require_dependencies, (path, mtime(path)))
push!(_require_dependencies, (modstring, path, mtime(path)))
end
return path, prev
end
Expand All @@ -230,7 +234,7 @@ This is only needed if your module depends on a file that is not used via `inclu
no effect outside of compilation.
"""
function include_dependency(path::AbstractString)
_include_dependency(path)
_include_dependency("#__external__", path)
return nothing
end

Expand Down Expand Up @@ -518,7 +522,10 @@ end

include_relative(mod::Module, path::AbstractString) = include_relative(mod, String(path))
function include_relative(mod::Module, _path::String)
path, prev = _include_dependency(_path)
path, prev = _include_dependency(string(mod), _path)
for callback in include_callbacks # to preserve order, must come before Core.include
invokelatest(callback, mod, path)
end
tls = task_local_storage()
tls[:SOURCE_PATH] = path
local result
Expand Down Expand Up @@ -671,15 +678,20 @@ function parse_cache_header(f::IO)
end
totbytes = ntoh(read(f, Int64)) # total bytes for file dependencies
# read the list of files
files = Tuple{String,Float64}[]
files = Tuple{String,String,Float64}[]
while true
n = ntoh(read(f, Int32))
n == 0 && break
totbytes -= 4 + n + 8
@assert n >= 0 "EOF while reading cache header" # probably means this wasn't a valid file to be read by Base.parse_cache_header
push!(files, (String(read(f, n)), ntoh(read(f, Float64))))
end
@assert totbytes == 4 "header of cache file appears to be corrupt"
n1 = ntoh(read(f, Int32))
n1 == 0 && break
@assert n1 >= 0 "EOF while reading cache header" # probably means this wasn't a valid file to be read by Base.parse_cache_header
modname = String(read(f, n1))
n2 = ntoh(read(f, Int32))
@assert n2 >= 0 "EOF while reading cache header" # probably means this wasn't a valid file to be read by Base.parse_cache_header
filename = String(read(f, n2))
push!(files, (modname, filename, ntoh(read(f, Float64))))
totbytes -= 8 + n1 + n2 + 8
end
@assert totbytes == 12 "header of cache file appears to be corrupt"
srctextpos = ntoh(read(f, Int64))
# read the list of modules that are required to be present during loading
required_modules = Dict{Symbol,UInt64}()
while true
Expand All @@ -689,7 +701,7 @@ function parse_cache_header(f::IO)
uuid = ntoh(read(f, UInt64)) # module UUID
required_modules[sym] = uuid
end
return modules, files, required_modules
return modules, files, required_modules, srctextpos
end

function parse_cache_header(cachefile::String)
Expand All @@ -704,7 +716,7 @@ end

function cache_dependencies(f::IO)
defs, files, modules = parse_cache_header(f)
return modules, files
return modules, map(mod_fl_mt -> (mod_fl_mt[2], mod_fl_mt[3]), files) # discard the module
end

function cache_dependencies(cachefile::String)
Expand All @@ -717,6 +729,33 @@ function cache_dependencies(cachefile::String)
end
end

function read_dependency_src(io::IO, filename::AbstractString)
modules, files, required_modules, srctextpos = parse_cache_header(io)
srctextpos == 0 && error("no source-text stored in cache file")
seek(io, srctextpos)
while !eof(io)
filenamelen = ntoh(read(io, Int32))
filenamelen == 0 && break
fn = String(read(io, filenamelen))
len = ntoh(read(io, UInt64))
if fn == filename
return String(read(io, len))
end
seek(io, position(io) + len)
end
error(filename, " is not stored in the source-text cache")
end

function read_dependency_src(cachefile::String, filename::AbstractString)
io = open(cachefile, "r")
try
!isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile."))
return read_dependency_src(io, filename)
finally
close(io)
end
end

function stale_cachefile(modpath::String, cachefile::String)
io = open(cachefile, "r")
try
Expand Down Expand Up @@ -756,11 +795,11 @@ function stale_cachefile(modpath::String, cachefile::String)
end

# now check if this file is fresh relative to its source files
if !samefile(files[1][1], modpath)
DEBUG_LOADING[] && info("JL_DEBUG_LOADING: Rejecting cache file $cachefile because it is for file $(files[1][1])) not file $modpath.")
if !samefile(files[1][2], modpath)
DEBUG_LOADING[] && info("JL_DEBUG_LOADING: Rejecting cache file $cachefile because it is for file $(files[1][2])) not file $modpath.")
return true # cache file was compiled from a different path
end
for (f, ftime_req) in files
for (_, f, ftime_req) in files
# Issue #13606: compensate for Docker images rounding mtimes
# Issue #20837: compensate for GlusterFS truncating mtimes to microseconds
ftime = mtime(f)
Expand Down
86 changes: 72 additions & 14 deletions src/dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@ static uint64_t read_uint64(ios_t *s)
return b0 | (b1<<32);
}

static void write_int64(ios_t *s, int64_t i)
{
write_int32(s, (i>>32) & 0xffffffff);
write_int32(s, i & 0xffffffff);
}

static void write_uint16(ios_t *s, uint16_t i)
{
write_uint8(s, (i>> 8) & 0xff);
Expand Down Expand Up @@ -1030,7 +1036,7 @@ static void write_mod_list(ios_t *s, jl_array_t *a)
}

// "magic" string and version header of .ji file
static const int JI_FORMAT_VERSION = 3;
static const int JI_FORMAT_VERSION = 4;
static const char JI_MAGIC[] = "\373jli\r\n\032\n"; // based on PNG signature
static const uint16_t BOM = 0xFEFF; // byte-order marker
static void write_header(ios_t *s)
Expand Down Expand Up @@ -1065,9 +1071,10 @@ static void write_work_list(ios_t *s)

// serialize the global _require_dependencies array of pathnames that
// are include depenencies
static void write_dependency_list(ios_t *s)
static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp)
{
size_t total_size = 0;
int64_t pos = 0;
static jl_array_t *deps = NULL;
if (!deps)
deps = (jl_array_t*)jl_get_global(jl_base_module, jl_symbol("_require_dependencies"));
Expand All @@ -1080,18 +1087,19 @@ static void write_dependency_list(ios_t *s)
jl_value_t *uniqargs[2] = {unique_func, (jl_value_t*)deps};
size_t last_age = jl_get_ptls_states()->world_age;
jl_get_ptls_states()->world_age = jl_world_counter;
jl_array_t *udeps = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL;
jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL);
jl_get_ptls_states()->world_age = last_age;

JL_GC_PUSH1(&udeps);
if (udeps) {
size_t l = jl_array_len(udeps);
for (size_t i=0; i < l; i++) {
jl_value_t *dep = jl_fieldref(jl_array_ptr_ref(udeps, i), 0);
size_t slen = jl_string_len(dep);
total_size += 4 + slen + 8;
dep = jl_fieldref(jl_array_ptr_ref(udeps, i), 1);
slen += jl_string_len(dep);
total_size += 8 + slen + 8;
}
total_size += 4;
total_size += 4 + 8;
}
// write the total size so that we can quickly seek past all of the
// dependencies if we don't need them
Expand All @@ -1100,15 +1108,22 @@ static void write_dependency_list(ios_t *s)
size_t l = jl_array_len(udeps);
for (size_t i=0; i < l; i++) {
jl_value_t *deptuple = jl_array_ptr_ref(udeps, i);
jl_value_t *dep = jl_fieldref(deptuple, 0);
jl_value_t *dep = jl_fieldref(deptuple, 0); // evaluating module (as string)
size_t slen = jl_string_len(dep);
write_int32(s, slen);
ios_write(s, jl_string_data(dep), slen);
write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 1)));
dep = jl_fieldref(deptuple, 1); // file abspath
slen = jl_string_len(dep);
write_int32(s, slen);
ios_write(s, jl_string_data(dep), slen);
write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 2))); // mtime
}
write_int32(s, 0); // terminator, for ease of reading
// write a dummy file position to indicate the beginning of the source-text
pos = ios_pos(s);
write_int64(s, 0);
}
JL_GC_POP();
return pos;
}

// --- deserialize ---
Expand Down Expand Up @@ -2280,18 +2295,18 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
{
char *tmpfname = strcat(strcpy((char *) alloca(strlen(fname)+8), fname), ".XXXXXX");
ios_t f;
jl_array_t *mod_array;
jl_array_t *mod_array, *udeps = NULL;
if (ios_mkstemp(&f, tmpfname) == NULL) {
jl_printf(JL_STDERR, "Cannot open cache file \"%s\" for writing.\n", tmpfname);
return 1;
}
JL_GC_PUSH1(&mod_array);
JL_GC_PUSH2(&mod_array, &udeps);
mod_array = jl_get_loaded_modules();

serializer_worklist = worklist;
write_header(&f);
write_work_list(&f);
write_dependency_list(&f);
int64_t srctextpos = write_dependency_list(&f, &udeps);
write_mod_list(&f, mod_array); // this can return errors during deserialize,
// best to keep it early (before any actual initialization)

Expand All @@ -2313,7 +2328,6 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
assert(jl_is_module(m));
jl_collect_lambdas_from_mod(lambdas, m);
}
JL_GC_POP();

jl_collect_backedges(edges);

Expand All @@ -2326,15 +2340,59 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
jl_serialize_value(&s, worklist);
jl_serialize_value(&s, lambdas);
jl_serialize_value(&s, edges);
jl_finalize_serializer(&s); // done with f
jl_finalize_serializer(&s);
serializer_worklist = NULL;

jl_gc_enable(en);
htable_reset(&edges_map, 0);
htable_reset(&backref_table, 0);
arraylist_free(&reinit_list);

// Write the source-text for the dependent files
if (udeps) {
// Go back and update the source-text position to point to the current position
int64_t posfile = ios_pos(&f);
ios_seek(&f, srctextpos);
write_int64(&f, posfile);
ios_seek_end(&f);
// Each source-text file is written as
// int32: length of abspath
// char*: abspath
// uint64: length of src text
// char*: src text
// At the end we write int32(0) as a terminal sentinel.
len = jl_array_len(udeps);
ios_t srctext;
for (i = 0; i < len; i++) {
jl_value_t *deptuple = jl_array_ptr_ref(udeps, i);
jl_value_t *dep = jl_fieldref(deptuple, 0); // module name
// Dependencies declared with `include_dependency` are excluded
// because these may not be Julia code (and could be huge)
if (strcmp(jl_string_data(dep), "#__external__") != 0) {
dep = jl_fieldref(deptuple, 1); // file abspath
ios_t *srctp = ios_file(&srctext, jl_string_data(dep), 1, 0, 0, 0);
if (!srctp) {
jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n",
jl_string_data(dep));
continue;
}
size_t slen = jl_string_len(dep);
write_int32(&f, slen);
ios_write(&f, jl_string_data(dep), slen);
posfile = ios_pos(&f);
write_uint64(&f, 0); // placeholder for length of this file in bytes
uint64_t filelen = (uint64_t) ios_copyall(&f, &srctext);
ios_close(&srctext);
ios_seek(&f, posfile);
write_uint64(&f, filelen);
ios_seek_end(&f);
}
}
}
write_int32(&f, 0); // mark the end of the source text
ios_close(&f);

JL_GC_POP();
if (jl_fs_rename(tmpfname, fname) < 0) {
jl_printf(JL_STDERR, "Cannot write cache file \"%s\".\n", fname);
return 1;
Expand Down
10 changes: 8 additions & 2 deletions test/compile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -204,16 +204,22 @@ try
@test stringmime("text/plain", Base.Docs.doc(Foo.Bar.bar)) == "bar function\n"

modules, deps, required_modules = Base.parse_cache_header(cachefile)
discard_module = mod_fl_mt -> (mod_fl_mt[2], mod_fl_mt[3])
@test modules == Dict(Foo_module => Base.module_uuid(Foo))
@test map(x -> x[1], sort(deps)) == [Foo_file, joinpath(dir, "bar.jl"), joinpath(dir, "foo.jl")]
@test map(x -> x[1], sort(discard_module.(deps))) == [Foo_file, joinpath(dir, "bar.jl"), joinpath(dir, "foo.jl")]
srctxt = Base.read_dependency_src(cachefile, Foo_file)
@test !isempty(srctxt) && srctxt == read(Foo_file, String)
@test_throws ErrorException Base.read_dependency_src(cachefile, "/tmp/nonexistent.txt")
# dependencies declared with `include_dependency` should not be stored
@test_throws ErrorException Base.read_dependency_src(cachefile, joinpath(dir, "foo.jl"))

modules, deps1 = Base.cache_dependencies(cachefile)
@test modules == merge(Dict(s => Base.module_uuid(getfield(Foo, s)) for s in
[:Base, :Core, Foo2_module, FooBase_module, :Main, :Test]),
# plus modules included in the system image
Dict(s => Base.module_uuid(Base.root_module(s)) for s in
[:DelimitedFiles,:Mmap]))
@test deps == deps1
@test discard_module.(deps) == deps1

@test current_task()(0x01, 0x4000, 0x30031234) == 2
@test nothing(0x01, 0x4000, 0x30031234) == 52
Expand Down
4 changes: 4 additions & 0 deletions test/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@ end
@test @nested_LINE_expansion() == ((@__LINE__() - 4, @__LINE__() - 12), @__LINE__())
@test @nested_LINE_expansion2() == ((@__LINE__() - 5, @__LINE__() - 9), @__LINE__())

loaded_files = String[]
push!(Base.include_callbacks, (mod::Module, fn::String) -> push!(loaded_files, fn))
include("test_sourcepath.jl")
@test length(loaded_files) == 1 && endswith(loaded_files[1], "test_sourcepath.jl")
pop!(Base.include_callbacks)
thefname = "the fname!//\\&\1*"
include_string_test_func = include_string(@__MODULE__, "include_string_test() = @__FILE__", thefname)
@test include_string_test_func() == thefname
Expand Down

0 comments on commit b9a0d1d

Please sign in to comment.