diff --git a/src/librustc_codegen_llvm/back/lto.rs b/src/librustc_codegen_llvm/back/lto.rs index 0e4e4e2f983f6..6481ef5c73ce0 100644 --- a/src/librustc_codegen_llvm/back/lto.rs +++ b/src/librustc_codegen_llvm/back/lto.rs @@ -16,15 +16,24 @@ use rustc::hir::def_id::LOCAL_CRATE; use rustc::middle::exported_symbols::SymbolExportLevel; use rustc::session::config::{self, Lto}; use rustc::util::common::time_ext; -use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::fx::{FxHashSet, FxHashMap}; use rustc_codegen_ssa::{RLIB_BYTECODE_EXTENSION, ModuleCodegen, ModuleKind}; use log::{info, debug}; use std::ffi::{CStr, CString}; +use std::fs::File; +use std::io; +use std::mem; +use std::path::Path; use std::ptr; use std::slice; use std::sync::Arc; +/// We keep track of past LTO imports that were used to produce the current set +/// of compiled object files that we might choose to reuse during this +/// compilation session. +pub const THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-imports.bin"; + pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool { match crate_type { config::CrateType::Executable | @@ -472,13 +481,26 @@ fn thin_lto(cgcx: &CodegenContext, info!("thin LTO data created"); - let import_map = if cgcx.incr_comp_session_dir.is_some() { - ThinLTOImports::from_thin_lto_data(data) + let (import_map_path, prev_import_map, curr_import_map) = + if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir + { + let path = incr_comp_session_dir.join(THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME); + // If previous imports have been deleted, or we get an IO error + // reading the file storing them, then we'll just use `None` as the + // prev_import_map, which will force the code to be recompiled. + let prev = if path.exists() { + ThinLTOImports::load_from_file(&path).ok() + } else { + None + }; + let curr = ThinLTOImports::from_thin_lto_data(data); + (Some(path), prev, curr) } else { // If we don't compile incrementally, we don't need to load the // import data from LLVM. assert!(green_modules.is_empty()); - ThinLTOImports::default() + let curr = ThinLTOImports::default(); + (None, None, curr) }; info!("thin LTO import map loaded"); @@ -502,18 +524,36 @@ fn thin_lto(cgcx: &CodegenContext, for (module_index, module_name) in shared.module_names.iter().enumerate() { let module_name = module_name_to_str(module_name); - // If the module hasn't changed and none of the modules it imports - // from has changed, we can re-use the post-ThinLTO version of the - // module. - if green_modules.contains_key(module_name) { - let imports_all_green = import_map.modules_imported_by(module_name) + // If (1.) the module hasn't changed, and (2.) none of the modules + // it imports from has changed, *and* (3.) the import-set itself has + // not changed from the previous compile when it was last + // ThinLTO'ed, then we can re-use the post-ThinLTO version of the + // module. Otherwise, freshly perform LTO optimization. + // + // This strategy means we can always save the computed imports as + // canon: when we reuse the post-ThinLTO version, condition (3.) + // ensures that the curent import set is the same as the previous + // one. (And of course, when we don't reuse the post-ThinLTO + // version, the current import set *is* the correct one, since we + // are doing the ThinLTO in this current compilation cycle.) + // + // See rust-lang/rust#59535. + if let (Some(prev_import_map), true) = + (prev_import_map.as_ref(), green_modules.contains_key(module_name)) + { + assert!(cgcx.incr_comp_session_dir.is_some()); + + let prev_imports = prev_import_map.modules_imported_by(module_name); + let curr_imports = curr_import_map.modules_imported_by(module_name); + let imports_all_green = curr_imports .iter() .all(|imported_module| green_modules.contains_key(imported_module)); - if imports_all_green { + if imports_all_green && equivalent_as_sets(prev_imports, curr_imports) { let work_product = green_modules[module_name].clone(); copy_jobs.push(work_product); info!(" - {}: re-used", module_name); + assert!(cgcx.incr_comp_session_dir.is_some()); cgcx.cgu_reuse_tracker.set_actual_reuse(module_name, CguReuse::PostLto); continue @@ -527,10 +567,33 @@ fn thin_lto(cgcx: &CodegenContext, })); } + // Save the curent ThinLTO import information for the next compilation + // session, overwriting the previous serialized imports (if any). + if let Some(path) = import_map_path { + if let Err(err) = curr_import_map.save_to_file(&path) { + let msg = format!("Error while writing ThinLTO import data: {}", err); + return Err(write::llvm_err(&diag_handler, &msg)); + } + } + Ok((opt_jobs, copy_jobs)) } } +/// Given two slices, each with no repeat elements. returns true if and only if +/// the two slices have the same contents when considered as sets (i.e. when +/// element order is disregarded). +fn equivalent_as_sets(a: &[String], b: &[String]) -> bool { + // cheap path: unequal lengths means cannot possibly be set equivalent. + if a.len() != b.len() { return false; } + // fast path: before building new things, check if inputs are equivalent as is. + if a == b { return true; } + // slow path: general set comparison. + let a: FxHashSet<&str> = a.iter().map(|s| s.as_str()).collect(); + let b: FxHashSet<&str> = b.iter().map(|s| s.as_str()).collect(); + a == b +} + pub(crate) fn run_pass_manager(cgcx: &CodegenContext, module: &ModuleCodegen, config: &ModuleConfig, @@ -832,6 +895,47 @@ impl ThinLTOImports { self.imports.get(llvm_module_name).map(|v| &v[..]).unwrap_or(&[]) } + fn save_to_file(&self, path: &Path) -> io::Result<()> { + use std::io::Write; + let file = File::create(path)?; + let mut writer = io::BufWriter::new(file); + for (importing_module_name, imported_modules) in &self.imports { + writeln!(writer, "{}", importing_module_name)?; + for imported_module in imported_modules { + writeln!(writer, " {}", imported_module)?; + } + writeln!(writer)?; + } + Ok(()) + } + + fn load_from_file(path: &Path) -> io::Result { + use std::io::BufRead; + let mut imports = FxHashMap::default(); + let mut current_module = None; + let mut current_imports = vec![]; + let file = File::open(path)?; + for line in io::BufReader::new(file).lines() { + let line = line?; + if line.is_empty() { + let importing_module = current_module + .take() + .expect("Importing module not set"); + imports.insert(importing_module, + mem::replace(&mut current_imports, vec![])); + } else if line.starts_with(" ") { + // Space marks an imported module + assert_ne!(current_module, None); + current_imports.push(line.trim().to_string()); + } else { + // Otherwise, beginning of a new module (must be start or follow empty line) + assert_eq!(current_module, None); + current_module = Some(line.trim().to_string()); + } + } + Ok(ThinLTOImports { imports }) + } + /// Loads the ThinLTO import map from ThinLTOData. unsafe fn from_thin_lto_data(data: *const llvm::ThinLTOData) -> ThinLTOImports { unsafe extern "C" fn imported_module_callback(payload: *mut libc::c_void, diff --git a/src/test/incremental/thinlto/cgu_invalidated_when_import_added.rs b/src/test/incremental/thinlto/cgu_invalidated_when_import_added.rs new file mode 100644 index 0000000000000..42168dd273eff --- /dev/null +++ b/src/test/incremental/thinlto/cgu_invalidated_when_import_added.rs @@ -0,0 +1,62 @@ +// revisions: cfail1 cfail2 +// compile-flags: -O -Zhuman-readable-cgu-names -Cllvm-args=-import-instr-limit=10 +// build-pass + +// rust-lang/rust#59535: +// +// This is analgous to cgu_invalidated_when_import_removed.rs, but it covers +// the other direction: +// +// We start with a call-graph like `[A] -> [B -> D] [C]` (where the letters are +// functions and the modules are enclosed in `[]`), and add a new call `D <- C`, +// yielding the new call-graph: `[A] -> [B -> D] <- [C]` +// +// The effect of this is that the compiler previously classfied `D` as internal +// and the import-set of `[A]` to be just `B`. But after adding the `D <- C` call, +// `D` is no longer classified as internal, and the import-set of `[A]` becomes +// both `B` and `D`. +// +// We check this case because an early proposed pull request included an +// assertion that the import-sets monotonically decreased over time, a claim +// which this test case proves to be false. + +fn main() { + foo::foo(); + bar::baz(); +} + +mod foo { + + // In cfail1, ThinLTO decides that foo() does not get inlined into main, and + // instead bar() gets inlined into foo(). + // In cfail2, foo() gets inlined into main. + pub fn foo(){ + bar() + } + + // This function needs to be big so that it does not get inlined by ThinLTO + // but *does* get inlined into foo() when it is declared `internal` in + // cfail1 (alone). + pub fn bar(){ + println!("quux1"); + println!("quux2"); + println!("quux3"); + println!("quux4"); + println!("quux5"); + println!("quux6"); + println!("quux7"); + println!("quux8"); + println!("quux9"); + } +} + +mod bar { + + #[inline(never)] + pub fn baz() { + #[cfg(cfail2)] + { + crate::foo::bar(); + } + } +} diff --git a/src/test/incremental/thinlto/cgu_invalidated_when_import_removed.rs b/src/test/incremental/thinlto/cgu_invalidated_when_import_removed.rs new file mode 100644 index 0000000000000..19ce7b3e148f7 --- /dev/null +++ b/src/test/incremental/thinlto/cgu_invalidated_when_import_removed.rs @@ -0,0 +1,74 @@ +// revisions: cfail1 cfail2 +// compile-flags: -O -Zhuman-readable-cgu-names -Cllvm-args=-import-instr-limit=10 +// build-pass + +// rust-lang/rust#59535: +// +// Consider a call-graph like `[A] -> [B -> D] <- [C]` (where the letters are +// functions and the modules are enclosed in `[]`) +// +// In our specific instance, the earlier compilations were inlining the call +// to`B` into `A`; thus `A` ended up with a external reference to the symbol `D` +// in its object code, to be resolved at subsequent link time. The LTO import +// information provided by LLVM for those runs reflected that information: it +// explicitly says during those runs, `B` definition and `D` declaration were +// imported into `[A]`. +// +// The change between incremental builds was that the call `D <- C` was removed. +// +// That change, coupled with other decisions within `rustc`, made the compiler +// decide to make `D` an internal symbol (since it was no longer accessed from +// other codegen units, this makes sense locally). And then the definition of +// `D` was inlined into `B` and `D` itself was eliminated entirely. +// +// The current LTO import information reported that `B` alone is imported into +// `[A]` for the *current compilation*. So when the Rust compiler surveyed the +// dependence graph, it determined that nothing `[A]` imports changed since the +// last build (and `[A]` itself has not changed either), so it chooses to reuse +// the object code generated during the previous compilation. +// +// But that previous object code has an unresolved reference to `D`, and that +// causes a link time failure! + +fn main() { + foo::foo(); + bar::baz(); +} + +mod foo { + + // In cfail1, foo() gets inlined into main. + // In cfail2, ThinLTO decides that foo() does not get inlined into main, and + // instead bar() gets inlined into foo(). But faulty logic in our incr. + // ThinLTO implementation thought that `main()` is unchanged and thus reused + // the object file still containing a call to the now non-existant bar(). + pub fn foo(){ + bar() + } + + // This function needs to be big so that it does not get inlined by ThinLTO + // but *does* get inlined into foo() once it is declared `internal` in + // cfail2. + pub fn bar(){ + println!("quux1"); + println!("quux2"); + println!("quux3"); + println!("quux4"); + println!("quux5"); + println!("quux6"); + println!("quux7"); + println!("quux8"); + println!("quux9"); + } +} + +mod bar { + + #[inline(never)] + pub fn baz() { + #[cfg(cfail1)] + { + crate::foo::bar(); + } + } +}