diff --git a/base/loading.jl b/base/loading.jl index f6d2512f2734c..a539fadd26a76 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -201,12 +201,16 @@ const package_locks = Dict{Symbol,Condition}() # Callbacks take the form (mod::Symbol) -> nothing. # WARNING: This is an experimental feature and might change later, without deprecation. const package_callbacks = Any[] +# to notify downstream consumers that a file has been included into a particular module +# Callbacks take the form (mod::Module, filename::String) -> nothing +# WARNING: This is an experimental feature and might change later, without deprecation. +const include_callbacks = Any[] # used to optionally track dependencies when requiring a module: const _concrete_dependencies = Any[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them -const _require_dependencies = Any[] # a list of (path, mtime) tuples that are the file dependencies of the module currently being precompiled +const _require_dependencies = Any[] # a list of (mod, path, mtime) tuples that are the file dependencies of the module currently being precompiled const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies -function _include_dependency(_path::AbstractString) +function _include_dependency(modstring::AbstractString, _path::AbstractString) prev = source_path(nothing) if prev === nothing path = abspath(_path) @@ -214,7 +218,7 @@ function _include_dependency(_path::AbstractString) path = joinpath(dirname(prev), _path) end if _track_dependencies[] - push!(_require_dependencies, (path, mtime(path))) + push!(_require_dependencies, (modstring, path, mtime(path))) end return path, prev end @@ -230,7 +234,7 @@ This is only needed if your module depends on a file that is not used via `inclu no effect outside of compilation. """ function include_dependency(path::AbstractString) - _include_dependency(path) + _include_dependency("#__external__", path) return nothing end @@ -518,7 +522,10 @@ end include_relative(mod::Module, path::AbstractString) = include_relative(mod, String(path)) function include_relative(mod::Module, _path::String) - path, prev = _include_dependency(_path) + path, prev = _include_dependency(string(mod), _path) + for callback in include_callbacks # to preserve order, must come before Core.include + invokelatest(callback, mod, path) + end tls = task_local_storage() tls[:SOURCE_PATH] = path local result @@ -671,15 +678,20 @@ function parse_cache_header(f::IO) end totbytes = ntoh(read(f, Int64)) # total bytes for file dependencies # read the list of files - files = Tuple{String,Float64}[] + files = Tuple{String,String,Float64}[] while true - n = ntoh(read(f, Int32)) - n == 0 && break - totbytes -= 4 + n + 8 - @assert n >= 0 "EOF while reading cache header" # probably means this wasn't a valid file to be read by Base.parse_cache_header - push!(files, (String(read(f, n)), ntoh(read(f, Float64)))) - end - @assert totbytes == 4 "header of cache file appears to be corrupt" + n1 = ntoh(read(f, Int32)) + n1 == 0 && break + @assert n1 >= 0 "EOF while reading cache header" # probably means this wasn't a valid file to be read by Base.parse_cache_header + modname = String(read(f, n1)) + n2 = ntoh(read(f, Int32)) + @assert n2 >= 0 "EOF while reading cache header" # probably means this wasn't a valid file to be read by Base.parse_cache_header + filename = String(read(f, n2)) + push!(files, (modname, filename, ntoh(read(f, Float64)))) + totbytes -= 8 + n1 + n2 + 8 + end + @assert totbytes == 12 "header of cache file appears to be corrupt" + srctextpos = ntoh(read(f, Int64)) # read the list of modules that are required to be present during loading required_modules = Dict{Symbol,UInt64}() while true @@ -689,7 +701,7 @@ function parse_cache_header(f::IO) uuid = ntoh(read(f, UInt64)) # module UUID required_modules[sym] = uuid end - return modules, files, required_modules + return modules, files, required_modules, srctextpos end function parse_cache_header(cachefile::String) @@ -704,7 +716,7 @@ end function cache_dependencies(f::IO) defs, files, modules = parse_cache_header(f) - return modules, files + return modules, map(mod_fl_mt -> (mod_fl_mt[2], mod_fl_mt[3]), files) # discard the module end function cache_dependencies(cachefile::String) @@ -717,6 +729,33 @@ function cache_dependencies(cachefile::String) end end +function read_dependency_src(io::IO, filename::AbstractString) + modules, files, required_modules, srctextpos = parse_cache_header(io) + srctextpos == 0 && error("no source-text stored in cache file") + seek(io, srctextpos) + while !eof(io) + filenamelen = ntoh(read(io, Int32)) + filenamelen == 0 && break + fn = String(read(io, filenamelen)) + len = ntoh(read(io, UInt64)) + if fn == filename + return String(read(io, len)) + end + seek(io, position(io) + len) + end + error(filename, " is not stored in the source-text cache") +end + +function read_dependency_src(cachefile::String, filename::AbstractString) + io = open(cachefile, "r") + try + !isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile.")) + return read_dependency_src(io, filename) + finally + close(io) + end +end + function stale_cachefile(modpath::String, cachefile::String) io = open(cachefile, "r") try @@ -756,11 +795,11 @@ function stale_cachefile(modpath::String, cachefile::String) end # now check if this file is fresh relative to its source files - if !samefile(files[1][1], modpath) - DEBUG_LOADING[] && info("JL_DEBUG_LOADING: Rejecting cache file $cachefile because it is for file $(files[1][1])) not file $modpath.") + if !samefile(files[1][2], modpath) + DEBUG_LOADING[] && info("JL_DEBUG_LOADING: Rejecting cache file $cachefile because it is for file $(files[1][2])) not file $modpath.") return true # cache file was compiled from a different path end - for (f, ftime_req) in files + for (_, f, ftime_req) in files # Issue #13606: compensate for Docker images rounding mtimes # Issue #20837: compensate for GlusterFS truncating mtimes to microseconds ftime = mtime(f) diff --git a/src/dump.c b/src/dump.c index 1941730083e88..6e223ab4d6017 100644 --- a/src/dump.c +++ b/src/dump.c @@ -156,6 +156,12 @@ static uint64_t read_uint64(ios_t *s) return b0 | (b1<<32); } +static void write_int64(ios_t *s, int64_t i) +{ + write_int32(s, (i>>32) & 0xffffffff); + write_int32(s, i & 0xffffffff); +} + static void write_uint16(ios_t *s, uint16_t i) { write_uint8(s, (i>> 8) & 0xff); @@ -1030,7 +1036,7 @@ static void write_mod_list(ios_t *s, jl_array_t *a) } // "magic" string and version header of .ji file -static const int JI_FORMAT_VERSION = 3; +static const int JI_FORMAT_VERSION = 4; static const char JI_MAGIC[] = "\373jli\r\n\032\n"; // based on PNG signature static const uint16_t BOM = 0xFEFF; // byte-order marker static void write_header(ios_t *s) @@ -1065,9 +1071,10 @@ static void write_work_list(ios_t *s) // serialize the global _require_dependencies array of pathnames that // are include depenencies -static void write_dependency_list(ios_t *s) +static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp) { size_t total_size = 0; + int64_t pos = 0; static jl_array_t *deps = NULL; if (!deps) deps = (jl_array_t*)jl_get_global(jl_base_module, jl_symbol("_require_dependencies")); @@ -1080,18 +1087,19 @@ static void write_dependency_list(ios_t *s) jl_value_t *uniqargs[2] = {unique_func, (jl_value_t*)deps}; size_t last_age = jl_get_ptls_states()->world_age; jl_get_ptls_states()->world_age = jl_world_counter; - jl_array_t *udeps = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL; + jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL); jl_get_ptls_states()->world_age = last_age; - JL_GC_PUSH1(&udeps); if (udeps) { size_t l = jl_array_len(udeps); for (size_t i=0; i < l; i++) { jl_value_t *dep = jl_fieldref(jl_array_ptr_ref(udeps, i), 0); size_t slen = jl_string_len(dep); - total_size += 4 + slen + 8; + dep = jl_fieldref(jl_array_ptr_ref(udeps, i), 1); + slen += jl_string_len(dep); + total_size += 8 + slen + 8; } - total_size += 4; + total_size += 4 + 8; } // write the total size so that we can quickly seek past all of the // dependencies if we don't need them @@ -1100,15 +1108,22 @@ static void write_dependency_list(ios_t *s) size_t l = jl_array_len(udeps); for (size_t i=0; i < l; i++) { jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); - jl_value_t *dep = jl_fieldref(deptuple, 0); + jl_value_t *dep = jl_fieldref(deptuple, 0); // evaluating module (as string) size_t slen = jl_string_len(dep); write_int32(s, slen); ios_write(s, jl_string_data(dep), slen); - write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 1))); + dep = jl_fieldref(deptuple, 1); // file abspath + slen = jl_string_len(dep); + write_int32(s, slen); + ios_write(s, jl_string_data(dep), slen); + write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 2))); // mtime } write_int32(s, 0); // terminator, for ease of reading + // write a dummy file position to indicate the beginning of the source-text + pos = ios_pos(s); + write_int64(s, 0); } - JL_GC_POP(); + return pos; } // --- deserialize --- @@ -2280,18 +2295,18 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist) { char *tmpfname = strcat(strcpy((char *) alloca(strlen(fname)+8), fname), ".XXXXXX"); ios_t f; - jl_array_t *mod_array; + jl_array_t *mod_array, *udeps = NULL; if (ios_mkstemp(&f, tmpfname) == NULL) { jl_printf(JL_STDERR, "Cannot open cache file \"%s\" for writing.\n", tmpfname); return 1; } - JL_GC_PUSH1(&mod_array); + JL_GC_PUSH2(&mod_array, &udeps); mod_array = jl_get_loaded_modules(); serializer_worklist = worklist; write_header(&f); write_work_list(&f); - write_dependency_list(&f); + int64_t srctextpos = write_dependency_list(&f, &udeps); write_mod_list(&f, mod_array); // this can return errors during deserialize, // best to keep it early (before any actual initialization) @@ -2313,7 +2328,6 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist) assert(jl_is_module(m)); jl_collect_lambdas_from_mod(lambdas, m); } - JL_GC_POP(); jl_collect_backedges(edges); @@ -2326,15 +2340,59 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist) jl_serialize_value(&s, worklist); jl_serialize_value(&s, lambdas); jl_serialize_value(&s, edges); - jl_finalize_serializer(&s); // done with f + jl_finalize_serializer(&s); serializer_worklist = NULL; jl_gc_enable(en); htable_reset(&edges_map, 0); htable_reset(&backref_table, 0); arraylist_free(&reinit_list); + + // Write the source-text for the dependent files + if (udeps) { + // Go back and update the source-text position to point to the current position + int64_t posfile = ios_pos(&f); + ios_seek(&f, srctextpos); + write_int64(&f, posfile); + ios_seek_end(&f); + // Each source-text file is written as + // int32: length of abspath + // char*: abspath + // uint64: length of src text + // char*: src text + // At the end we write int32(0) as a terminal sentinel. + len = jl_array_len(udeps); + ios_t srctext; + for (i = 0; i < len; i++) { + jl_value_t *deptuple = jl_array_ptr_ref(udeps, i); + jl_value_t *dep = jl_fieldref(deptuple, 0); // module name + // Dependencies declared with `include_dependency` are excluded + // because these may not be Julia code (and could be huge) + if (strcmp(jl_string_data(dep), "#__external__") != 0) { + dep = jl_fieldref(deptuple, 1); // file abspath + ios_t *srctp = ios_file(&srctext, jl_string_data(dep), 1, 0, 0, 0); + if (!srctp) { + jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n", + jl_string_data(dep)); + continue; + } + size_t slen = jl_string_len(dep); + write_int32(&f, slen); + ios_write(&f, jl_string_data(dep), slen); + posfile = ios_pos(&f); + write_uint64(&f, 0); // placeholder for length of this file in bytes + uint64_t filelen = (uint64_t) ios_copyall(&f, &srctext); + ios_close(&srctext); + ios_seek(&f, posfile); + write_uint64(&f, filelen); + ios_seek_end(&f); + } + } + } + write_int32(&f, 0); // mark the end of the source text ios_close(&f); + JL_GC_POP(); if (jl_fs_rename(tmpfname, fname) < 0) { jl_printf(JL_STDERR, "Cannot write cache file \"%s\".\n", fname); return 1; diff --git a/test/compile.jl b/test/compile.jl index a56862691f923..eae916a096284 100644 --- a/test/compile.jl +++ b/test/compile.jl @@ -204,8 +204,14 @@ try @test stringmime("text/plain", Base.Docs.doc(Foo.Bar.bar)) == "bar function\n" modules, deps, required_modules = Base.parse_cache_header(cachefile) + discard_module = mod_fl_mt -> (mod_fl_mt[2], mod_fl_mt[3]) @test modules == Dict(Foo_module => Base.module_uuid(Foo)) - @test map(x -> x[1], sort(deps)) == [Foo_file, joinpath(dir, "bar.jl"), joinpath(dir, "foo.jl")] + @test map(x -> x[1], sort(discard_module.(deps))) == [Foo_file, joinpath(dir, "bar.jl"), joinpath(dir, "foo.jl")] + srctxt = Base.read_dependency_src(cachefile, Foo_file) + @test !isempty(srctxt) && srctxt == read(Foo_file, String) + @test_throws ErrorException Base.read_dependency_src(cachefile, "/tmp/nonexistent.txt") + # dependencies declared with `include_dependency` should not be stored + @test_throws ErrorException Base.read_dependency_src(cachefile, joinpath(dir, "foo.jl")) modules, deps1 = Base.cache_dependencies(cachefile) @test modules == merge(Dict(s => Base.module_uuid(getfield(Foo, s)) for s in @@ -213,7 +219,7 @@ try # plus modules included in the system image Dict(s => Base.module_uuid(Base.root_module(s)) for s in [:DelimitedFiles,:Mmap])) - @test deps == deps1 + @test discard_module.(deps) == deps1 @test current_task()(0x01, 0x4000, 0x30031234) == 2 @test nothing(0x01, 0x4000, 0x30031234) == 52 diff --git a/test/loading.jl b/test/loading.jl index 4c4809765b6ed..f85be6cac24fd 100644 --- a/test/loading.jl +++ b/test/loading.jl @@ -31,7 +31,11 @@ end @test @nested_LINE_expansion() == ((@__LINE__() - 4, @__LINE__() - 12), @__LINE__()) @test @nested_LINE_expansion2() == ((@__LINE__() - 5, @__LINE__() - 9), @__LINE__()) +loaded_files = String[] +push!(Base.include_callbacks, (mod::Module, fn::String) -> push!(loaded_files, fn)) include("test_sourcepath.jl") +@test length(loaded_files) == 1 && endswith(loaded_files[1], "test_sourcepath.jl") +pop!(Base.include_callbacks) thefname = "the fname!//\\&\1*" include_string_test_func = include_string(@__MODULE__, "include_string_test() = @__FILE__", thefname) @test include_string_test_func() == thefname