diff --git a/src/Cargo.lock b/src/Cargo.lock
index e4d9f6e23945e..defb5b9869d82 100644
--- a/src/Cargo.lock
+++ b/src/Cargo.lock
@@ -1198,6 +1198,15 @@ dependencies = [
  "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
+[[package]]
+name = "memmap"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
 [[package]]
 name = "memoffset"
 version = "0.2.1"
@@ -2029,6 +2038,7 @@ name = "rustc_codegen_llvm"
 version = "0.0.0"
 dependencies = [
  "cc 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
  "rustc_llvm 0.0.0",
@@ -3151,6 +3161,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08"
 "checksum mdbook 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "90b5a8d7e341ceee5db3882a06078d42661ddcfa2b3687319cc5da76ec4e782f"
 "checksum memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a3b4142ab8738a78c51896f704f83c11df047ff1bda9a92a661aa6361552d93d"
+"checksum memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e2ffa2c986de11a9df78620c01eeaaf27d94d3ff02bf81bfcca953102dd0c6ff"
 "checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3"
 "checksum minifier 0.0.19 (registry+https://github.com/rust-lang/crates.io-index)" = "9908ed7c62f990c21ab41fdca53a864a3ada0da69d8729c4de727b397e27bc11"
 "checksum miniz-sys 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "609ce024854aeb19a0ef7567d348aaa5a746b32fb72e336df7fcc16869d7e2b4"
diff --git a/src/librustc/dep_graph/graph.rs b/src/librustc/dep_graph/graph.rs
index a285399657177..4df0fc443a27c 100644
--- a/src/librustc/dep_graph/graph.rs
+++ b/src/librustc/dep_graph/graph.rs
@@ -878,7 +878,7 @@ pub struct WorkProduct {
     pub saved_files: Vec<(WorkProductFileKind, String)>,
 }
 
-#[derive(Clone, Copy, Debug, RustcEncodable, RustcDecodable)]
+#[derive(Clone, Copy, Debug, RustcEncodable, RustcDecodable, PartialEq)]
 pub enum WorkProductFileKind {
     Object,
     Bytecode,
diff --git a/src/librustc/session/config.rs b/src/librustc/session/config.rs
index ee683e37648f0..ee3fabc58d53f 100644
--- a/src/librustc/session/config.rs
+++ b/src/librustc/session/config.rs
@@ -68,7 +68,7 @@ pub enum OptLevel {
     SizeMin,    // -Oz
 }
 
-#[derive(Clone, Copy, PartialEq, Hash)]
+#[derive(Clone, Copy, PartialEq, Hash, Debug)]
 pub enum Lto {
     /// Don't do any LTO whatsoever
     No,
diff --git a/src/librustc/session/mod.rs b/src/librustc/session/mod.rs
index 00c5369e0644e..778c388c7dec7 100644
--- a/src/librustc/session/mod.rs
+++ b/src/librustc/session/mod.rs
@@ -580,11 +580,6 @@ impl Session {
             return config::Lto::No;
         }
 
-        // Right now ThinLTO isn't compatible with incremental compilation.
-        if self.opts.incremental.is_some() {
-            return config::Lto::No;
-        }
-
         // Now we're in "defaults" territory. By default we enable ThinLTO for
         // optimized compiles (anything greater than O0).
         match self.opts.optimize {
@@ -1177,8 +1172,18 @@ pub fn build_session_(
 // commandline argument, you can do so here.
 fn validate_commandline_args_with_session_available(sess: &Session) {
 
-    if sess.lto() != Lto::No && sess.opts.incremental.is_some() {
-        sess.err("can't perform LTO when compiling incrementally");
+    if sess.opts.incremental.is_some() {
+        match sess.lto() {
+            Lto::Yes |
+            Lto::Thin |
+            Lto::Fat => {
+                sess.err("can't perform LTO when compiling incrementally");
+            }
+            Lto::ThinLocal |
+            Lto::No => {
+                // This is fine
+            }
+        }
     }
 
     // Since we don't know if code in an rlib will be linked to statically or
diff --git a/src/librustc/ty/query/config.rs b/src/librustc/ty/query/config.rs
index b5093d0a1fc95..07b1863e32c6c 100644
--- a/src/librustc/ty/query/config.rs
+++ b/src/librustc/ty/query/config.rs
@@ -722,12 +722,6 @@ impl<'tcx> QueryDescription<'tcx> for queries::codegen_unit<'tcx> {
     }
 }
 
-impl<'tcx> QueryDescription<'tcx> for queries::compile_codegen_unit<'tcx> {
-    fn describe(_tcx: TyCtxt, _: InternedString) -> String {
-        "compile_codegen_unit".to_string()
-    }
-}
-
 impl<'tcx> QueryDescription<'tcx> for queries::output_filenames<'tcx> {
     fn describe(_tcx: TyCtxt, _: CrateNum) -> String {
         "output_filenames".to_string()
diff --git a/src/librustc/ty/query/mod.rs b/src/librustc/ty/query/mod.rs
index 6f61583e49b8e..9485f62c61ca0 100644
--- a/src/librustc/ty/query/mod.rs
+++ b/src/librustc/ty/query/mod.rs
@@ -28,7 +28,7 @@ use middle::lib_features::LibFeatures;
 use middle::lang_items::{LanguageItems, LangItem};
 use middle::exported_symbols::{SymbolExportLevel, ExportedSymbol};
 use mir::interpret::ConstEvalResult;
-use mir::mono::{CodegenUnit, Stats};
+use mir::mono::CodegenUnit;
 use mir;
 use mir::interpret::{GlobalId, Allocation};
 use session::{CompileResult, CrateDisambiguator};
@@ -530,7 +530,6 @@ define_queries! { <'tcx>
             -> (Arc<DefIdSet>, Arc<Vec<Arc<CodegenUnit<'tcx>>>>),
         [] fn is_codegened_item: IsCodegenedItem(DefId) -> bool,
         [] fn codegen_unit: CodegenUnit(InternedString) -> Arc<CodegenUnit<'tcx>>,
-        [] fn compile_codegen_unit: CompileCodegenUnit(InternedString) -> Stats,
     },
 
     Other {
diff --git a/src/librustc_codegen_llvm/Cargo.toml b/src/librustc_codegen_llvm/Cargo.toml
index 28fa49846b736..b711502b14b7f 100644
--- a/src/librustc_codegen_llvm/Cargo.toml
+++ b/src/librustc_codegen_llvm/Cargo.toml
@@ -14,6 +14,7 @@ cc = "1.0.1"
 num_cpus = "1.0"
 rustc-demangle = "0.1.4"
 rustc_llvm = { path = "../librustc_llvm" }
+memmap = "0.6"
 
 [features]
 # This is used to convince Cargo to separately cache builds of `rustc_codegen_llvm`
diff --git a/src/librustc_codegen_llvm/back/lto.rs b/src/librustc_codegen_llvm/back/lto.rs
index 56858a31efd25..c1dda02264ed1 100644
--- a/src/librustc_codegen_llvm/back/lto.rs
+++ b/src/librustc_codegen_llvm/back/lto.rs
@@ -11,21 +11,25 @@
 use back::bytecode::{DecodedBytecode, RLIB_BYTECODE_EXTENSION};
 use back::symbol_export;
 use back::write::{ModuleConfig, with_llvm_pmb, CodegenContext};
-use back::write::{self, DiagnosticHandlers};
+use back::write::{self, DiagnosticHandlers, pre_lto_bitcode_filename};
 use errors::{FatalError, Handler};
 use llvm::archive_ro::ArchiveRO;
 use llvm::{True, False};
 use llvm;
+use memmap;
+use rustc::dep_graph::WorkProduct;
 use rustc::hir::def_id::LOCAL_CRATE;
 use rustc::middle::exported_symbols::SymbolExportLevel;
 use rustc::session::config::{self, Lto};
 use rustc::util::common::time_ext;
+use rustc_data_structures::fx::FxHashMap;
 use time_graph::Timeline;
-use {ModuleCodegen, ModuleLlvm, ModuleKind, ModuleSource};
+use {ModuleCodegen, ModuleLlvm, ModuleKind};
 
 use libc;
 
-use std::ffi::CString;
+use std::ffi::{CStr, CString};
+use std::fs::{self, File};
 use std::ptr;
 use std::slice;
 use std::sync::Arc;
@@ -75,8 +79,8 @@ impl LtoModuleCodegen {
                 let module = module.take().unwrap();
                 {
                     let config = cgcx.config(module.kind);
-                    let llmod = module.llvm().unwrap().llmod();
-                    let tm = &*module.llvm().unwrap().tm;
+                    let llmod = module.module_llvm.llmod();
+                    let tm = &*module.module_llvm.tm;
                     run_pass_manager(cgcx, tm, llmod, config, false);
                     timeline.record("fat-done");
                 }
@@ -97,10 +101,16 @@ impl LtoModuleCodegen {
     }
 }
 
+/// Performs LTO, which in the case of full LTO means merging all modules into
+/// a single one and returning it for further optimizing. For ThinLTO, it will
+/// do the global analysis necessary and return two lists, one of the modules
+/// the need optimization and another for modules that can simply be copied over
+/// from the incr. comp. cache.
 pub(crate) fn run(cgcx: &CodegenContext,
                   modules: Vec<ModuleCodegen>,
+                  cached_modules: Vec<(SerializedModule, WorkProduct)>,
                   timeline: &mut Timeline)
-    -> Result<Vec<LtoModuleCodegen>, FatalError>
+    -> Result<(Vec<LtoModuleCodegen>, Vec<WorkProduct>), FatalError>
 {
     let diag_handler = cgcx.create_diag_handler();
     let export_threshold = match cgcx.lto {
@@ -187,11 +197,20 @@ pub(crate) fn run(cgcx: &CodegenContext,
         }
     }
 
-    let arr = symbol_white_list.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
+    let symbol_white_list = symbol_white_list.iter()
+                                             .map(|c| c.as_ptr())
+                                             .collect::<Vec<_>>();
     match cgcx.lto {
         Lto::Yes | // `-C lto` == fat LTO by default
         Lto::Fat => {
-            fat_lto(cgcx, &diag_handler, modules, upstream_modules, &arr, timeline)
+            assert!(cached_modules.is_empty());
+            let opt_jobs = fat_lto(cgcx,
+                                  &diag_handler,
+                                  modules,
+                                  upstream_modules,
+                                  &symbol_white_list,
+                                  timeline);
+            opt_jobs.map(|opt_jobs| (opt_jobs, vec![]))
         }
         Lto::Thin |
         Lto::ThinLocal => {
@@ -199,7 +218,13 @@ pub(crate) fn run(cgcx: &CodegenContext,
                 unreachable!("We should never reach this case if the LTO step \
                               is deferred to the linker");
             }
-            thin_lto(&diag_handler, modules, upstream_modules, &arr, timeline)
+            thin_lto(cgcx,
+                     &diag_handler,
+                     modules,
+                     upstream_modules,
+                     cached_modules,
+                     &symbol_white_list,
+                     timeline)
         }
         Lto::No => unreachable!(),
     }
@@ -229,7 +254,7 @@ fn fat_lto(cgcx: &CodegenContext,
         .filter(|&(_, module)| module.kind == ModuleKind::Regular)
         .map(|(i, module)| {
             let cost = unsafe {
-                llvm::LLVMRustModuleCost(module.llvm().unwrap().llmod())
+                llvm::LLVMRustModuleCost(module.module_llvm.llmod())
             };
             (cost, i)
         })
@@ -239,7 +264,7 @@ fn fat_lto(cgcx: &CodegenContext,
     let mut serialized_bitcode = Vec::new();
     {
         let (llcx, llmod) = {
-            let llvm = module.llvm().expect("can't lto pre-codegened modules");
+            let llvm = &module.module_llvm;
             (&llvm.llcx, llvm.llmod())
         };
         info!("using {:?} as a base module", module.name);
@@ -255,8 +280,7 @@ fn fat_lto(cgcx: &CodegenContext,
         // way we know of to do that is to serialize them to a string and them parse
         // them later. Not great but hey, that's why it's "fat" LTO, right?
         for module in modules {
-            let llvm = module.llvm().expect("can't lto pre-codegened modules");
-            let buffer = ModuleBuffer::new(llvm.llmod());
+            let buffer = ModuleBuffer::new(module.module_llvm.llmod());
             let llmod_id = CString::new(&module.name[..]).unwrap();
             serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
         }
@@ -362,16 +386,23 @@ impl Drop for Linker<'a> {
 /// calculating the *index* for ThinLTO. This index will then be shared amongst
 /// all of the `LtoModuleCodegen` units returned below and destroyed once
 /// they all go out of scope.
-fn thin_lto(diag_handler: &Handler,
+fn thin_lto(cgcx: &CodegenContext,
+            diag_handler: &Handler,
             modules: Vec<ModuleCodegen>,
             serialized_modules: Vec<(SerializedModule, CString)>,
+            cached_modules: Vec<(SerializedModule, WorkProduct)>,
             symbol_white_list: &[*const libc::c_char],
             timeline: &mut Timeline)
-    -> Result<Vec<LtoModuleCodegen>, FatalError>
+    -> Result<(Vec<LtoModuleCodegen>, Vec<WorkProduct>), FatalError>
 {
     unsafe {
         info!("going for that thin, thin LTO");
 
+        let green_modules: FxHashMap<_, _> = cached_modules
+            .iter()
+            .map(|&(_, ref wp)| (wp.cgu_name.clone(), wp.clone()))
+            .collect();
+
         let mut thin_buffers = Vec::new();
         let mut module_names = Vec::new();
         let mut thin_modules = Vec::new();
@@ -385,9 +416,24 @@ fn thin_lto(diag_handler: &Handler,
         //        analysis!
         for (i, module) in modules.iter().enumerate() {
             info!("local module: {} - {}", i, module.name);
-            let llvm = module.llvm().expect("can't lto precodegened module");
             let name = CString::new(module.name.clone()).unwrap();
-            let buffer = ThinBuffer::new(llvm.llmod());
+            let buffer = ThinBuffer::new(module.module_llvm.llmod());
+
+            // We emit the module after having serialized it into a ThinBuffer
+            // because only then it will contain the ThinLTO module summary.
+            if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
+                if cgcx.config(module.kind).emit_pre_thin_lto_bc {
+                    let path = incr_comp_session_dir
+                        .join(pre_lto_bitcode_filename(&module.name));
+
+                    fs::write(&path, buffer.data()).unwrap_or_else(|e| {
+                        panic!("Error writing pre-lto-bitcode file `{}`: {}",
+                               path.display(),
+                               e);
+                    });
+                }
+            }
+
             thin_modules.push(llvm::ThinLTOModule {
                 identifier: name.as_ptr(),
                 data: buffer.data().as_ptr(),
@@ -415,8 +461,13 @@ fn thin_lto(diag_handler: &Handler,
         //        looking at upstream modules entirely sometimes (the contents,
         //        we must always unconditionally look at the index).
         let mut serialized = Vec::new();
-        for (module, name) in serialized_modules {
-            info!("foreign module {:?}", name);
+
+        let cached_modules = cached_modules.into_iter().map(|(sm, wp)| {
+            (sm, CString::new(wp.cgu_name).unwrap())
+        });
+
+        for (module, name) in serialized_modules.into_iter().chain(cached_modules) {
+            info!("upstream or cached module {:?}", name);
             thin_modules.push(llvm::ThinLTOModule {
                 identifier: name.as_ptr(),
                 data: module.data().as_ptr(),
@@ -426,6 +477,9 @@ fn thin_lto(diag_handler: &Handler,
             module_names.push(name);
         }
 
+        // Sanity check
+        assert_eq!(thin_modules.len(), module_names.len());
+
         // Delegate to the C++ bindings to create some data here. Once this is a
         // tried-and-true interface we may wish to try to upstream some of this
         // to LLVM itself, right now we reimplement a lot of what they do
@@ -439,10 +493,22 @@ fn thin_lto(diag_handler: &Handler,
             write::llvm_err(&diag_handler, "failed to prepare thin LTO context".to_string())
         })?;
 
-        let data = ThinData(data);
         info!("thin LTO data created");
         timeline.record("data");
 
+        let import_map = if cgcx.incr_comp_session_dir.is_some() {
+            ThinLTOImports::from_thin_lto_data(data)
+        } else {
+            // If we don't compile incrementally, we don't need to load the
+            // import data from LLVM.
+            assert!(green_modules.is_empty());
+            ThinLTOImports::new()
+        };
+        info!("thin LTO import map loaded");
+        timeline.record("import-map-loaded");
+
+        let data = ThinData(data);
+
         // Throw our data in an `Arc` as we'll be sharing it across threads. We
         // also put all memory referenced by the C++ data (buffers, ids, etc)
         // into the arc as well. After this we'll create a thin module
@@ -453,12 +519,38 @@ fn thin_lto(diag_handler: &Handler,
             serialized_modules: serialized,
             module_names,
         });
-        Ok((0..shared.module_names.len()).map(|i| {
-            LtoModuleCodegen::Thin(ThinModule {
+
+        let mut copy_jobs = vec![];
+        let mut opt_jobs = vec![];
+
+        info!("checking which modules can be-reused and which have to be re-optimized.");
+        for (module_index, module_name) in shared.module_names.iter().enumerate() {
+            let module_name = module_name_to_str(module_name);
+
+            // If the module hasn't changed and none of the modules it imports
+            // from has changed, we can re-use the post-ThinLTO version of the
+            // module.
+            if green_modules.contains_key(module_name) {
+                let imports_all_green = import_map.modules_imported_by(module_name)
+                    .iter()
+                    .all(|imported_module| green_modules.contains_key(imported_module));
+
+                if imports_all_green {
+                    let work_product = green_modules[module_name].clone();
+                    copy_jobs.push(work_product);
+                    info!(" - {}: re-used", module_name);
+                    continue
+                }
+            }
+
+            info!(" - {}: re-compiled", module_name);
+            opt_jobs.push(LtoModuleCodegen::Thin(ThinModule {
                 shared: shared.clone(),
-                idx: i,
-            })
-        }).collect())
+                idx: module_index,
+            }));
+        }
+
+        Ok((opt_jobs, copy_jobs))
     }
 }
 
@@ -527,6 +619,7 @@ fn run_pass_manager(cgcx: &CodegenContext,
 pub enum SerializedModule {
     Local(ModuleBuffer),
     FromRlib(Vec<u8>),
+    FromUncompressedFile(memmap::Mmap, File),
 }
 
 impl SerializedModule {
@@ -534,6 +627,7 @@ impl SerializedModule {
         match *self {
             SerializedModule::Local(ref m) => m.data(),
             SerializedModule::FromRlib(ref m) => m,
+            SerializedModule::FromUncompressedFile(ref m, _) => m,
         }
     }
 }
@@ -663,16 +757,16 @@ impl ThinModule {
             write::llvm_err(&diag_handler, msg)
         })? as *const _;
         let module = ModuleCodegen {
-            source: ModuleSource::Codegened(ModuleLlvm {
+            module_llvm: ModuleLlvm {
                 llmod_raw,
                 llcx,
                 tm,
-            }),
+            },
             name: self.name().to_string(),
             kind: ModuleKind::Regular,
         };
         {
-            let llmod = module.llvm().unwrap().llmod();
+            let llmod = module.module_llvm.llmod();
             cgcx.save_temp_bitcode(&module, "thin-lto-input");
 
             // Before we do much else find the "main" `DICompileUnit` that we'll be
@@ -768,7 +862,7 @@ impl ThinModule {
             // little differently.
             info!("running thin lto passes over {}", module.name);
             let config = cgcx.config(module.kind);
-            run_pass_manager(cgcx, module.llvm().unwrap().tm, llmod, config, true);
+            run_pass_manager(cgcx, module.module_llvm.tm, llmod, config, true);
             cgcx.save_temp_bitcode(&module, "thin-lto-after-pm");
             timeline.record("thin-done");
         }
@@ -776,3 +870,61 @@ impl ThinModule {
         Ok(module)
     }
 }
+
+#[derive(Debug)]
+pub struct ThinLTOImports {
+    // key = llvm name of importing module, value = list of modules it imports from
+    imports: FxHashMap<String, Vec<String>>,
+}
+
+impl ThinLTOImports {
+    fn new() -> ThinLTOImports {
+        ThinLTOImports {
+            imports: FxHashMap(),
+        }
+    }
+
+    fn modules_imported_by(&self, llvm_module_name: &str) -> &[String] {
+        self.imports.get(llvm_module_name).map(|v| &v[..]).unwrap_or(&[])
+    }
+
+    /// Load the ThinLTO import map from ThinLTOData.
+    unsafe fn from_thin_lto_data(data: *const llvm::ThinLTOData) -> ThinLTOImports {
+        unsafe extern "C" fn imported_module_callback(payload: *mut libc::c_void,
+                                                      importing_module_name: *const libc::c_char,
+                                                      imported_module_name: *const libc::c_char) {
+            let map = &mut* (payload as *mut ThinLTOImports);
+            let importing_module_name = CStr::from_ptr(importing_module_name);
+            let importing_module_name = module_name_to_str(&importing_module_name);
+            let imported_module_name = CStr::from_ptr(imported_module_name);
+            let imported_module_name = module_name_to_str(&imported_module_name);
+
+            if !map.imports.contains_key(importing_module_name) {
+                map.imports.insert(importing_module_name.to_owned(), vec![]);
+            }
+
+            map.imports
+               .get_mut(importing_module_name)
+               .unwrap()
+               .push(imported_module_name.to_owned());
+        }
+        let mut map = ThinLTOImports {
+            imports: FxHashMap(),
+        };
+        llvm::LLVMRustGetThinLTOModuleImports(data,
+                                              imported_module_callback,
+                                              &mut map as *mut _ as *mut libc::c_void);
+        map
+    }
+}
+
+fn module_name_to_str(c_str: &CStr) -> &str {
+    match c_str.to_str() {
+        Ok(s) => s,
+        Err(e) => {
+            bug!("Encountered non-utf8 LLVM module name `{}`: {}",
+                c_str.to_string_lossy(),
+                e)
+        }
+    }
+}
diff --git a/src/librustc_codegen_llvm/back/write.rs b/src/librustc_codegen_llvm/back/write.rs
index 2373428d68c0d..1c0f89193b209 100644
--- a/src/librustc_codegen_llvm/back/write.rs
+++ b/src/librustc_codegen_llvm/back/write.rs
@@ -10,14 +10,16 @@
 
 use attributes;
 use back::bytecode::{self, RLIB_BYTECODE_EXTENSION};
-use back::lto::{self, ModuleBuffer, ThinBuffer};
+use back::lto::{self, ModuleBuffer, ThinBuffer, SerializedModule};
 use back::link::{self, get_linker, remove};
 use back::command::Command;
 use back::linker::LinkerInfo;
 use back::symbol_export::ExportedSymbols;
 use base;
 use consts;
-use rustc_incremental::{copy_cgu_workproducts_to_incr_comp_cache_dir, in_incr_comp_dir};
+use memmap;
+use rustc_incremental::{copy_cgu_workproducts_to_incr_comp_cache_dir,
+                        in_incr_comp_dir, in_incr_comp_dir_sess};
 use rustc::dep_graph::{WorkProduct, WorkProductId, WorkProductFileKind};
 use rustc::middle::cstore::EncodedMetadata;
 use rustc::session::config::{self, OutputFilenames, OutputType, Passes, Sanitizer, Lto};
@@ -26,7 +28,8 @@ use rustc::util::nodemap::FxHashMap;
 use time_graph::{self, TimeGraph, Timeline};
 use llvm::{self, DiagnosticInfo, PassManager, SMDiagnostic};
 use llvm_util;
-use {CodegenResults, ModuleSource, ModuleCodegen, CompiledModule, ModuleKind};
+use {CodegenResults, ModuleCodegen, CompiledModule, ModuleKind, // ModuleLlvm,
+     CachedModuleCodegen};
 use CrateInfo;
 use rustc::hir::def_id::{CrateNum, LOCAL_CRATE};
 use rustc::ty::TyCtxt;
@@ -84,6 +87,8 @@ pub const TLS_MODEL_ARGS : [(&'static str, llvm::ThreadLocalMode); 4] = [
     ("local-exec", llvm::ThreadLocalMode::LocalExec),
 ];
 
+const PRE_THIN_LTO_BC_EXT: &str = "pre-thin-lto.bc";
+
 pub fn llvm_err(handler: &errors::Handler, msg: String) -> FatalError {
     match llvm::last_error() {
         Some(err) => handler.fatal(&format!("{}: {}", msg, err)),
@@ -223,6 +228,7 @@ pub struct ModuleConfig {
     pgo_use: String,
 
     // Flags indicating which outputs to produce.
+    pub emit_pre_thin_lto_bc: bool,
     emit_no_opt_bc: bool,
     emit_bc: bool,
     emit_bc_compressed: bool,
@@ -260,6 +266,7 @@ impl ModuleConfig {
             pgo_use: String::new(),
 
             emit_no_opt_bc: false,
+            emit_pre_thin_lto_bc: false,
             emit_bc: false,
             emit_bc_compressed: false,
             emit_lto_bc: false,
@@ -392,7 +399,7 @@ impl CodegenContext {
             let cgu = Some(&module.name[..]);
             let path = self.output_filenames.temp_path_ext(&ext, cgu);
             let cstr = path2cstr(&path);
-            let llmod = module.llvm().unwrap().llmod();
+            let llmod = module.module_llvm.llmod();
             llvm::LLVMWriteBitcodeToFile(llmod, cstr.as_ptr());
         }
     }
@@ -495,13 +502,9 @@ unsafe fn optimize(cgcx: &CodegenContext,
                    timeline: &mut Timeline)
     -> Result<(), FatalError>
 {
-    let (llmod, llcx, tm) = match module.source {
-        ModuleSource::Codegened(ref llvm) => (llvm.llmod(), &*llvm.llcx, &*llvm.tm),
-        ModuleSource::Preexisting(_) => {
-            bug!("optimize_and_codegen: called with ModuleSource::Preexisting")
-        }
-    };
-
+    let llmod = module.module_llvm.llmod();
+    let llcx = &*module.module_llvm.llcx;
+    let tm = &*module.module_llvm.tm;
     let _handlers = DiagnosticHandlers::new(cgcx, diag_handler, llcx);
 
     let module_name = module.name.clone();
@@ -627,7 +630,8 @@ unsafe fn optimize(cgcx: &CodegenContext,
 }
 
 fn generate_lto_work(cgcx: &CodegenContext,
-                     modules: Vec<ModuleCodegen>)
+                     modules: Vec<ModuleCodegen>,
+                     import_only_modules: Vec<(SerializedModule, WorkProduct)>)
     -> Vec<(WorkItem, u64)>
 {
     let mut timeline = cgcx.time_graph.as_ref().map(|tg| {
@@ -635,13 +639,22 @@ fn generate_lto_work(cgcx: &CodegenContext,
                  CODEGEN_WORK_PACKAGE_KIND,
                  "generate lto")
     }).unwrap_or(Timeline::noop());
-    let lto_modules = lto::run(cgcx, modules, &mut timeline)
+    let (lto_modules, copy_jobs) = lto::run(cgcx, modules, import_only_modules, &mut timeline)
         .unwrap_or_else(|e| e.raise());
 
-    lto_modules.into_iter().map(|module| {
+    let lto_modules = lto_modules.into_iter().map(|module| {
         let cost = module.cost();
         (WorkItem::LTO(module), cost)
-    }).collect()
+    });
+
+    let copy_jobs = copy_jobs.into_iter().map(|wp| {
+        (WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
+            name: wp.cgu_name.clone(),
+            source: wp,
+        }), 0)
+    });
+
+    lto_modules.chain(copy_jobs).collect()
 }
 
 unsafe fn codegen(cgcx: &CodegenContext,
@@ -653,12 +666,9 @@ unsafe fn codegen(cgcx: &CodegenContext,
 {
     timeline.record("codegen");
     {
-        let (llmod, llcx, tm) = match module.source {
-            ModuleSource::Codegened(ref llvm) => (llvm.llmod(), &*llvm.llcx, &*llvm.tm),
-            ModuleSource::Preexisting(_) => {
-                bug!("codegen: called with ModuleSource::Preexisting")
-            }
-        };
+        let llmod = module.module_llvm.llmod();
+        let llcx = &*module.module_llvm.llcx;
+        let tm = &*module.module_llvm.tm;
         let module_name = module.name.clone();
         let module_name = Some(&module_name[..]);
         let handlers = DiagnosticHandlers::new(cgcx, diag_handler, llcx);
@@ -912,6 +922,20 @@ fn need_crate_bitcode_for_rlib(sess: &Session) -> bool {
     sess.opts.output_types.contains_key(&OutputType::Exe)
 }
 
+fn need_pre_thin_lto_bitcode_for_incr_comp(sess: &Session) -> bool {
+    if sess.opts.incremental.is_none() {
+        return false
+    }
+
+    match sess.lto() {
+        Lto::Yes |
+        Lto::Fat |
+        Lto::No => false,
+        Lto::Thin |
+        Lto::ThinLocal => true,
+    }
+}
+
 pub fn start_async_codegen(tcx: TyCtxt,
                                time_graph: Option<TimeGraph>,
                                metadata: EncodedMetadata,
@@ -970,6 +994,7 @@ pub fn start_async_codegen(tcx: TyCtxt,
     // Save all versions of the bytecode if we're saving our temporaries.
     if sess.opts.cg.save_temps {
         modules_config.emit_no_opt_bc = true;
+        modules_config.emit_pre_thin_lto_bc = true;
         modules_config.emit_bc = true;
         modules_config.emit_lto_bc = true;
         metadata_config.emit_bc = true;
@@ -984,6 +1009,9 @@ pub fn start_async_codegen(tcx: TyCtxt,
         allocator_config.emit_bc_compressed = true;
     }
 
+    modules_config.emit_pre_thin_lto_bc =
+        need_pre_thin_lto_bitcode_for_incr_comp(sess);
+
     modules_config.no_integrated_as = tcx.sess.opts.cg.no_integrated_as ||
         tcx.sess.target.target.options.no_integrated_as;
 
@@ -1056,7 +1084,7 @@ pub fn start_async_codegen(tcx: TyCtxt,
 
 fn copy_all_cgu_workproducts_to_incr_comp_cache_dir(
     sess: &Session,
-    compiled_modules: &CompiledModules
+    compiled_modules: &CompiledModules,
 ) -> FxHashMap<WorkProductId, WorkProduct> {
     let mut work_products = FxHashMap::default();
 
@@ -1064,7 +1092,7 @@ fn copy_all_cgu_workproducts_to_incr_comp_cache_dir(
         return work_products;
     }
 
-    for module in compiled_modules.modules.iter() {
+    for module in compiled_modules.modules.iter().filter(|m| m.kind == ModuleKind::Regular) {
         let mut files = vec![];
 
         if let Some(ref path) = module.object {
@@ -1236,21 +1264,30 @@ fn produce_final_output_artifacts(sess: &Session,
     // These are used in linking steps and will be cleaned up afterward.
 }
 
-pub(crate) fn dump_incremental_data(codegen_results: &CodegenResults) {
-    println!("[incremental] Re-using {} out of {} modules",
-              codegen_results.modules.iter().filter(|m| m.pre_existing).count(),
-              codegen_results.modules.len());
+pub(crate) fn dump_incremental_data(_codegen_results: &CodegenResults) {
+    // FIXME(mw): This does not work at the moment because the situation has
+    //            become more complicated due to incremental LTO. Now a CGU
+    //            can have more than two caching states.
+    // println!("[incremental] Re-using {} out of {} modules",
+    //           codegen_results.modules.iter().filter(|m| m.pre_existing).count(),
+    //           codegen_results.modules.len());
 }
 
 enum WorkItem {
+    /// Optimize a newly codegened, totally unoptimized module.
     Optimize(ModuleCodegen),
+    /// Copy the post-LTO artifacts from the incremental cache to the output
+    /// directory.
+    CopyPostLtoArtifacts(CachedModuleCodegen),
+    /// Perform (Thin)LTO on the given module.
     LTO(lto::LtoModuleCodegen),
 }
 
 impl WorkItem {
-    fn kind(&self) -> ModuleKind {
+    fn module_kind(&self) -> ModuleKind {
         match *self {
             WorkItem::Optimize(ref m) => m.kind,
+            WorkItem::CopyPostLtoArtifacts(_) |
             WorkItem::LTO(_) => ModuleKind::Regular,
         }
     }
@@ -1258,6 +1295,7 @@ impl WorkItem {
     fn name(&self) -> String {
         match *self {
             WorkItem::Optimize(ref m) => format!("optimize: {}", m.name),
+            WorkItem::CopyPostLtoArtifacts(ref m) => format!("copy post LTO artifacts: {}", m.name),
             WorkItem::LTO(ref m) => format!("lto: {}", m.name()),
         }
     }
@@ -1273,141 +1311,168 @@ fn execute_work_item(cgcx: &CodegenContext,
                      timeline: &mut Timeline)
     -> Result<WorkItemResult, FatalError>
 {
-    let diag_handler = cgcx.create_diag_handler();
-    let config = cgcx.config(work_item.kind());
-    let module = match work_item {
-        WorkItem::Optimize(module) => module,
-        WorkItem::LTO(mut lto) => {
-            unsafe {
-                let module = lto.optimize(cgcx, timeline)?;
-                let module = codegen(cgcx, &diag_handler, module, config, timeline)?;
-                return Ok(WorkItemResult::Compiled(module))
-            }
+    let module_config = cgcx.config(work_item.module_kind());
+
+    match work_item {
+        WorkItem::Optimize(module) => {
+            execute_optimize_work_item(cgcx, module, module_config, timeline)
         }
-    };
-    let module_name = module.name.clone();
+        WorkItem::CopyPostLtoArtifacts(module) => {
+            execute_copy_from_cache_work_item(cgcx, module, module_config, timeline)
+        }
+        WorkItem::LTO(module) => {
+            execute_lto_work_item(cgcx, module, module_config, timeline)
+        }
+    }
+}
 
-    let pre_existing = match module.source {
-        ModuleSource::Codegened(_) => None,
-        ModuleSource::Preexisting(ref wp) => Some(wp.clone()),
-    };
+fn execute_optimize_work_item(cgcx: &CodegenContext,
+                              module: ModuleCodegen,
+                              module_config: &ModuleConfig,
+                              timeline: &mut Timeline)
+    -> Result<WorkItemResult, FatalError>
+{
+    let diag_handler = cgcx.create_diag_handler();
 
-    if let Some(wp) = pre_existing {
-        let incr_comp_session_dir = cgcx.incr_comp_session_dir
-                                        .as_ref()
-                                        .unwrap();
-        let name = &module.name;
-        let mut object = None;
-        let mut bytecode = None;
-        let mut bytecode_compressed = None;
-        for (kind, saved_file) in wp.saved_files {
-            let obj_out = match kind {
-                WorkProductFileKind::Object => {
-                    let path = cgcx.output_filenames.temp_path(OutputType::Object, Some(name));
-                    object = Some(path.clone());
-                    path
-                }
-                WorkProductFileKind::Bytecode => {
-                    let path = cgcx.output_filenames.temp_path(OutputType::Bitcode, Some(name));
-                    bytecode = Some(path.clone());
-                    path
-                }
-                WorkProductFileKind::BytecodeCompressed => {
-                    let path = cgcx.output_filenames.temp_path(OutputType::Bitcode, Some(name))
-                        .with_extension(RLIB_BYTECODE_EXTENSION);
-                    bytecode_compressed = Some(path.clone());
-                    path
-                }
-            };
-            let source_file = in_incr_comp_dir(&incr_comp_session_dir,
-                                               &saved_file);
-            debug!("copying pre-existing module `{}` from {:?} to {}",
-                   module.name,
-                   source_file,
-                   obj_out.display());
-            match link_or_copy(&source_file, &obj_out) {
-                Ok(_) => { }
-                Err(err) => {
-                    diag_handler.err(&format!("unable to copy {} to {}: {}",
-                                              source_file.display(),
-                                              obj_out.display(),
-                                              err));
-                }
-            }
+    unsafe {
+        optimize(cgcx, &diag_handler, &module, module_config, timeline)?;
+    }
+
+    let linker_does_lto = cgcx.opts.debugging_opts.cross_lang_lto.enabled();
+
+    // After we've done the initial round of optimizations we need to
+    // decide whether to synchronously codegen this module or ship it
+    // back to the coordinator thread for further LTO processing (which
+    // has to wait for all the initial modules to be optimized).
+    //
+    // Here we dispatch based on the `cgcx.lto` and kind of module we're
+    // codegenning...
+    let needs_lto = match cgcx.lto {
+        Lto::No => false,
+
+        // If the linker does LTO, we don't have to do it. Note that we
+        // keep doing full LTO, if it is requested, as not to break the
+        // assumption that the output will be a single module.
+        Lto::Thin | Lto::ThinLocal if linker_does_lto => false,
+
+        // Here we've got a full crate graph LTO requested. We ignore
+        // this, however, if the crate type is only an rlib as there's
+        // no full crate graph to process, that'll happen later.
+        //
+        // This use case currently comes up primarily for targets that
+        // require LTO so the request for LTO is always unconditionally
+        // passed down to the backend, but we don't actually want to do
+        // anything about it yet until we've got a final product.
+        Lto::Yes | Lto::Fat | Lto::Thin => {
+            cgcx.crate_types.len() != 1 ||
+                cgcx.crate_types[0] != config::CrateType::Rlib
         }
-        assert_eq!(object.is_some(), config.emit_obj);
-        assert_eq!(bytecode.is_some(), config.emit_bc);
-        assert_eq!(bytecode_compressed.is_some(), config.emit_bc_compressed);
-
-        Ok(WorkItemResult::Compiled(CompiledModule {
-            name: module_name,
-            kind: ModuleKind::Regular,
-            pre_existing: true,
-            object,
-            bytecode,
-            bytecode_compressed,
-        }))
-    } else {
-        debug!("llvm-optimizing {:?}", module_name);
 
-        unsafe {
-            optimize(cgcx, &diag_handler, &module, config, timeline)?;
-
-            let linker_does_lto = cgcx.opts.debugging_opts.cross_lang_lto.enabled();
-
-            // After we've done the initial round of optimizations we need to
-            // decide whether to synchronously codegen this module or ship it
-            // back to the coordinator thread for further LTO processing (which
-            // has to wait for all the initial modules to be optimized).
-            //
-            // Here we dispatch based on the `cgcx.lto` and kind of module we're
-            // codegenning...
-            let needs_lto = match cgcx.lto {
-                Lto::No => false,
-
-                // If the linker does LTO, we don't have to do it. Note that we
-                // keep doing full LTO, if it is requested, as not to break the
-                // assumption that the output will be a single module.
-                Lto::Thin | Lto::ThinLocal if linker_does_lto => false,
-
-                // Here we've got a full crate graph LTO requested. We ignore
-                // this, however, if the crate type is only an rlib as there's
-                // no full crate graph to process, that'll happen later.
-                //
-                // This use case currently comes up primarily for targets that
-                // require LTO so the request for LTO is always unconditionally
-                // passed down to the backend, but we don't actually want to do
-                // anything about it yet until we've got a final product.
-                Lto::Yes | Lto::Fat | Lto::Thin => {
-                    cgcx.crate_types.len() != 1 ||
-                        cgcx.crate_types[0] != config::CrateType::Rlib
-                }
+        // When we're automatically doing ThinLTO for multi-codegen-unit
+        // builds we don't actually want to LTO the allocator modules if
+        // it shows up. This is due to various linker shenanigans that
+        // we'll encounter later.
+        //
+        // Additionally here's where we also factor in the current LLVM
+        // version. If it doesn't support ThinLTO we skip this.
+        Lto::ThinLocal => {
+            module.kind != ModuleKind::Allocator &&
+                unsafe { llvm::LLVMRustThinLTOAvailable() }
+        }
+    };
 
-                // When we're automatically doing ThinLTO for multi-codegen-unit
-                // builds we don't actually want to LTO the allocator modules if
-                // it shows up. This is due to various linker shenanigans that
-                // we'll encounter later.
-                //
-                // Additionally here's where we also factor in the current LLVM
-                // version. If it doesn't support ThinLTO we skip this.
-                Lto::ThinLocal => {
-                    module.kind != ModuleKind::Allocator &&
-                        llvm::LLVMRustThinLTOAvailable()
-                }
-            };
+    // Metadata modules never participate in LTO regardless of the lto
+    // settings.
+    let needs_lto = needs_lto && module.kind != ModuleKind::Metadata;
 
-            // Metadata modules never participate in LTO regardless of the lto
-            // settings.
-            let needs_lto = needs_lto && module.kind != ModuleKind::Metadata;
+    if needs_lto {
+        Ok(WorkItemResult::NeedsLTO(module))
+    } else {
+        let module = unsafe {
+            codegen(cgcx, &diag_handler, module, module_config, timeline)?
+        };
+        Ok(WorkItemResult::Compiled(module))
+    }
+}
 
-            if needs_lto {
-                Ok(WorkItemResult::NeedsLTO(module))
-            } else {
-                let module = codegen(cgcx, &diag_handler, module, config, timeline)?;
-                Ok(WorkItemResult::Compiled(module))
+fn execute_copy_from_cache_work_item(cgcx: &CodegenContext,
+                                     module: CachedModuleCodegen,
+                                     module_config: &ModuleConfig,
+                                     _: &mut Timeline)
+    -> Result<WorkItemResult, FatalError>
+{
+    let incr_comp_session_dir = cgcx.incr_comp_session_dir
+                                    .as_ref()
+                                    .unwrap();
+    let mut object = None;
+    let mut bytecode = None;
+    let mut bytecode_compressed = None;
+    for (kind, saved_file) in &module.source.saved_files {
+        let obj_out = match kind {
+            WorkProductFileKind::Object => {
+                let path = cgcx.output_filenames.temp_path(OutputType::Object,
+                                                           Some(&module.name));
+                object = Some(path.clone());
+                path
+            }
+            WorkProductFileKind::Bytecode => {
+                let path = cgcx.output_filenames.temp_path(OutputType::Bitcode,
+                                                           Some(&module.name));
+                bytecode = Some(path.clone());
+                path
+            }
+            WorkProductFileKind::BytecodeCompressed => {
+                let path = cgcx.output_filenames.temp_path(OutputType::Bitcode,
+                                                           Some(&module.name))
+                    .with_extension(RLIB_BYTECODE_EXTENSION);
+                bytecode_compressed = Some(path.clone());
+                path
+            }
+        };
+        let source_file = in_incr_comp_dir(&incr_comp_session_dir,
+                                           &saved_file);
+        debug!("copying pre-existing module `{}` from {:?} to {}",
+               module.name,
+               source_file,
+               obj_out.display());
+        match link_or_copy(&source_file, &obj_out) {
+            Ok(_) => { }
+            Err(err) => {
+                let diag_handler = cgcx.create_diag_handler();
+                diag_handler.err(&format!("unable to copy {} to {}: {}",
+                                          source_file.display(),
+                                          obj_out.display(),
+                                          err));
             }
         }
     }
+
+    assert_eq!(object.is_some(), module_config.emit_obj);
+    assert_eq!(bytecode.is_some(), module_config.emit_bc);
+    assert_eq!(bytecode_compressed.is_some(), module_config.emit_bc_compressed);
+
+    Ok(WorkItemResult::Compiled(CompiledModule {
+        name: module.name,
+        kind: ModuleKind::Regular,
+        object,
+        bytecode,
+        bytecode_compressed,
+    }))
+}
+
+fn execute_lto_work_item(cgcx: &CodegenContext,
+                         mut module: lto::LtoModuleCodegen,
+                         module_config: &ModuleConfig,
+                         timeline: &mut Timeline)
+    -> Result<WorkItemResult, FatalError>
+{
+    let diag_handler = cgcx.create_diag_handler();
+
+    unsafe {
+        let module = module.optimize(cgcx, timeline)?;
+        let module = codegen(cgcx, &diag_handler, module, module_config, timeline)?;
+        Ok(WorkItemResult::Compiled(module))
+    }
 }
 
 enum Message {
@@ -1424,6 +1489,10 @@ enum Message {
         llvm_work_item: WorkItem,
         cost: u64,
     },
+    AddImportOnlyModule {
+        module_data: SerializedModule,
+        work_product: WorkProduct,
+    },
     CodegenComplete,
     CodegenItem,
 }
@@ -1703,6 +1772,7 @@ fn start_executing_work(tcx: TyCtxt,
         let mut compiled_metadata_module = None;
         let mut compiled_allocator_module = None;
         let mut needs_lto = Vec::new();
+        let mut lto_import_only_modules = Vec::new();
         let mut started_lto = false;
 
         // This flag tracks whether all items have gone through codegens
@@ -1726,6 +1796,7 @@ fn start_executing_work(tcx: TyCtxt,
               work_items.len() > 0 ||
               running > 0 ||
               needs_lto.len() > 0 ||
+              lto_import_only_modules.len() > 0 ||
               main_thread_worker_state != MainThreadWorkerState::Idle {
 
             // While there are still CGUs to be codegened, the coordinator has
@@ -1749,7 +1820,7 @@ fn start_executing_work(tcx: TyCtxt,
                             worker: get_worker_id(&mut free_worker_ids),
                             .. cgcx.clone()
                         };
-                        maybe_start_llvm_timer(cgcx.config(item.kind()),
+                        maybe_start_llvm_timer(cgcx.config(item.module_kind()),
                                                &mut llvm_start_time);
                         main_thread_worker_state = MainThreadWorkerState::LLVMing;
                         spawn_work(cgcx, item);
@@ -1765,10 +1836,12 @@ fn start_executing_work(tcx: TyCtxt,
                    running == 0 &&
                    main_thread_worker_state == MainThreadWorkerState::Idle {
                     assert!(!started_lto);
-                    assert!(needs_lto.len() > 0);
+                    assert!(needs_lto.len() + lto_import_only_modules.len() > 0);
                     started_lto = true;
                     let modules = mem::replace(&mut needs_lto, Vec::new());
-                    for (work, cost) in generate_lto_work(&cgcx, modules) {
+                    let import_only_modules =
+                        mem::replace(&mut lto_import_only_modules, Vec::new());
+                    for (work, cost) in generate_lto_work(&cgcx, modules, import_only_modules) {
                         let insertion_index = work_items
                             .binary_search_by_key(&cost, |&(_, cost)| cost)
                             .unwrap_or_else(|e| e);
@@ -1789,7 +1862,7 @@ fn start_executing_work(tcx: TyCtxt,
                                 worker: get_worker_id(&mut free_worker_ids),
                                 .. cgcx.clone()
                             };
-                            maybe_start_llvm_timer(cgcx.config(item.kind()),
+                            maybe_start_llvm_timer(cgcx.config(item.module_kind()),
                                                    &mut llvm_start_time);
                             main_thread_worker_state = MainThreadWorkerState::LLVMing;
                             spawn_work(cgcx, item);
@@ -1820,7 +1893,7 @@ fn start_executing_work(tcx: TyCtxt,
             while work_items.len() > 0 && running < tokens.len() {
                 let (item, _) = work_items.pop().unwrap();
 
-                maybe_start_llvm_timer(cgcx.config(item.kind()),
+                maybe_start_llvm_timer(cgcx.config(item.module_kind()),
                                        &mut llvm_start_time);
 
                 let cgcx = CodegenContext {
@@ -1932,10 +2005,17 @@ fn start_executing_work(tcx: TyCtxt,
                     } else {
                         running -= 1;
                     }
-
                     free_worker_ids.push(worker_id);
                     needs_lto.push(result);
                 }
+                Message::AddImportOnlyModule { module_data, work_product } => {
+                    assert!(!started_lto);
+                    assert!(!codegen_done);
+                    assert_eq!(main_thread_worker_state,
+                               MainThreadWorkerState::Codegenning);
+                    lto_import_only_modules.push((module_data, work_product));
+                    main_thread_worker_state = MainThreadWorkerState::Idle;
+                }
                 Message::Done { result: Err(()), worker_id: _ } => {
                     shared_emitter.fatal("aborting due to worker thread failure");
                     // Exit the coordinator thread
@@ -2308,9 +2388,9 @@ impl OngoingCodegen {
             time_graph.dump(&format!("{}-timings", self.crate_name));
         }
 
-        let work_products = copy_all_cgu_workproducts_to_incr_comp_cache_dir(sess,
-                                                                             &compiled_modules);
-
+        let work_products =
+            copy_all_cgu_workproducts_to_incr_comp_cache_dir(sess,
+                                                             &compiled_modules);
         produce_final_output_artifacts(sess,
                                        &compiled_modules,
                                        &self.output_filenames);
@@ -2371,8 +2451,8 @@ impl OngoingCodegen {
 }
 
 pub(crate) fn submit_codegened_module_to_llvm(tcx: TyCtxt,
-                                               module: ModuleCodegen,
-                                               cost: u64) {
+                                              module: ModuleCodegen,
+                                              cost: u64) {
     let llvm_work_item = WorkItem::Optimize(module);
     drop(tcx.tx_to_llvm_workers.lock().send(Box::new(Message::CodegenDone {
         llvm_work_item,
@@ -2380,6 +2460,40 @@ pub(crate) fn submit_codegened_module_to_llvm(tcx: TyCtxt,
     })));
 }
 
+pub(crate) fn submit_post_lto_module_to_llvm(tcx: TyCtxt,
+                                             module: CachedModuleCodegen) {
+    let llvm_work_item = WorkItem::CopyPostLtoArtifacts(module);
+    drop(tcx.tx_to_llvm_workers.lock().send(Box::new(Message::CodegenDone {
+        llvm_work_item,
+        cost: 0,
+    })));
+}
+
+pub(crate) fn submit_pre_lto_module_to_llvm(tcx: TyCtxt,
+                                            module: CachedModuleCodegen) {
+    let filename = pre_lto_bitcode_filename(&module.name);
+    let bc_path = in_incr_comp_dir_sess(tcx.sess, &filename);
+    let file = fs::File::open(&bc_path).unwrap_or_else(|e| {
+        panic!("failed to open bitcode file `{}`: {}", bc_path.display(), e)
+    });
+
+    let mmap = unsafe {
+        memmap::Mmap::map(&file).unwrap_or_else(|e| {
+            panic!("failed to mmap bitcode file `{}`: {}", bc_path.display(), e)
+        })
+    };
+
+    // Schedule the module to be loaded
+    drop(tcx.tx_to_llvm_workers.lock().send(Box::new(Message::AddImportOnlyModule {
+        module_data: SerializedModule::FromUncompressedFile(mmap, file),
+        work_product: module.source,
+    })));
+}
+
+pub(super) fn pre_lto_bitcode_filename(module_name: &str) -> String {
+    format!("{}.{}", module_name, PRE_THIN_LTO_BC_EXT)
+}
+
 fn msvc_imps_needed(tcx: TyCtxt) -> bool {
     // This should never be true (because it's not supported). If it is true,
     // something is wrong with commandline arg validation.
diff --git a/src/librustc_codegen_llvm/base.rs b/src/librustc_codegen_llvm/base.rs
index 009c6da9d8d15..c1f6006e684be 100644
--- a/src/librustc_codegen_llvm/base.rs
+++ b/src/librustc_codegen_llvm/base.rs
@@ -24,9 +24,9 @@
 //!     int) and rec(x=int, y=int, z=int) will have the same llvm::Type.
 
 use super::ModuleLlvm;
-use super::ModuleSource;
 use super::ModuleCodegen;
 use super::ModuleKind;
+use super::CachedModuleCodegen;
 
 use abi;
 use back::write::{self, OngoingCodegen};
@@ -40,12 +40,11 @@ use rustc::middle::cstore::{EncodedMetadata};
 use rustc::ty::{self, Ty, TyCtxt};
 use rustc::ty::layout::{self, Align, TyLayout, LayoutOf};
 use rustc::ty::query::Providers;
-use rustc::dep_graph::{DepNode, DepConstructor};
 use rustc::middle::cstore::{self, LinkagePreference};
 use rustc::middle::exported_symbols;
 use rustc::util::common::{time, print_time_passes_entry};
 use rustc::util::profiling::ProfileCategory;
-use rustc::session::config::{self, DebugInfo, EntryFnType};
+use rustc::session::config::{self, DebugInfo, EntryFnType, Lto};
 use rustc::session::Session;
 use rustc_incremental;
 use allocator;
@@ -698,6 +697,50 @@ pub fn iter_globals(llmod: &'ll llvm::Module) -> ValueIter<'ll> {
     }
 }
 
+#[derive(Debug)]
+enum CguReUsable {
+    PreLto,
+    PostLto,
+    No
+}
+
+fn determine_cgu_reuse<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
+                                 cgu: &CodegenUnit<'tcx>)
+                                 -> CguReUsable {
+    if !tcx.dep_graph.is_fully_enabled() {
+        return CguReUsable::No
+    }
+
+    let work_product_id = &cgu.work_product_id();
+    if tcx.dep_graph.previous_work_product(work_product_id).is_none() {
+        // We don't have anything cached for this CGU. This can happen
+        // if the CGU did not exist in the previous session.
+        return CguReUsable::No
+    }
+
+    // Try to mark the CGU as green. If it we can do so, it means that nothing
+    // affecting the LLVM module has changed and we can re-use a cached version.
+    // If we compile with any kind of LTO, this means we can re-use the bitcode
+    // of the Pre-LTO stage (possibly also the Post-LTO version but we'll only
+    // know that later). If we are not doing LTO, there is only one optimized
+    // version of each module, so we re-use that.
+    let dep_node = cgu.codegen_dep_node(tcx);
+    assert!(!tcx.dep_graph.dep_node_exists(&dep_node),
+        "CompileCodegenUnit dep-node for CGU `{}` already exists before marking.",
+        cgu.name());
+
+    if tcx.dep_graph.try_mark_green(tcx, &dep_node).is_some() {
+        // We can re-use either the pre- or the post-thinlto state
+        if tcx.sess.lto() != Lto::No {
+            CguReUsable::PreLto
+        } else {
+            CguReUsable::PostLto
+        }
+    } else {
+        CguReUsable::No
+    }
+}
+
 pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
                              rx: mpsc::Receiver<Box<dyn Any + Send>>)
                              -> OngoingCodegen {
@@ -734,7 +777,7 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
 
     let metadata_module = ModuleCodegen {
         name: metadata_cgu_name,
-        source: ModuleSource::Codegened(metadata_llvm_module),
+        module_llvm: metadata_llvm_module,
         kind: ModuleKind::Metadata,
     };
 
@@ -823,7 +866,7 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
 
         Some(ModuleCodegen {
             name: llmod_id,
-            source: ModuleSource::Codegened(modules),
+            module_llvm: modules,
             kind: ModuleKind::Allocator,
         })
     } else {
@@ -851,48 +894,40 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
         ongoing_codegen.wait_for_signal_to_codegen_item();
         ongoing_codegen.check_for_errors(tcx.sess);
 
-        // First, if incremental compilation is enabled, we try to re-use the
-        // codegen unit from the cache.
-        if tcx.dep_graph.is_fully_enabled() {
-            let cgu_id = cgu.work_product_id();
-
-            // Check whether there is a previous work-product we can
-            // re-use.  Not only must the file exist, and the inputs not
-            // be dirty, but the hash of the symbols we will generate must
-            // be the same.
-            if let Some(buf) = tcx.dep_graph.previous_work_product(&cgu_id) {
-                let dep_node = &DepNode::new(tcx,
-                    DepConstructor::CompileCodegenUnit(cgu.name().clone()));
-
-                // We try to mark the DepNode::CompileCodegenUnit green. If we
-                // succeed it means that none of the dependencies has changed
-                // and we can safely re-use.
-                if let Some(dep_node_index) = tcx.dep_graph.try_mark_green(tcx, dep_node) {
-                    let module = ModuleCodegen {
-                        name: cgu.name().to_string(),
-                        source: ModuleSource::Preexisting(buf),
-                        kind: ModuleKind::Regular,
-                    };
-                    tcx.dep_graph.mark_loaded_from_cache(dep_node_index, true);
-                    write::submit_codegened_module_to_llvm(tcx, module, 0);
-                    // Continue to next cgu, this one is done.
-                    continue
-                }
-            } else {
-                // This can happen if files were  deleted from the cache
-                // directory for some reason. We just re-compile then.
+        let loaded_from_cache = match determine_cgu_reuse(tcx, &cgu) {
+            CguReUsable::No => {
+                let _timing_guard = time_graph.as_ref().map(|time_graph| {
+                    time_graph.start(write::CODEGEN_WORKER_TIMELINE,
+                                     write::CODEGEN_WORK_PACKAGE_KIND,
+                                     &format!("codegen {}", cgu.name()))
+                });
+                let start_time = Instant::now();
+                let stats = compile_codegen_unit(tcx, *cgu.name());
+                all_stats.extend(stats);
+                total_codegen_time += start_time.elapsed();
+                false
             }
-        }
+            CguReUsable::PreLto => {
+                write::submit_pre_lto_module_to_llvm(tcx, CachedModuleCodegen {
+                    name: cgu.name().to_string(),
+                    source: cgu.work_product(tcx),
+                });
+                true
+            }
+            CguReUsable::PostLto => {
+                write::submit_post_lto_module_to_llvm(tcx, CachedModuleCodegen {
+                    name: cgu.name().to_string(),
+                    source: cgu.work_product(tcx),
+                });
+                true
+            }
+        };
 
-        let _timing_guard = time_graph.as_ref().map(|time_graph| {
-            time_graph.start(write::CODEGEN_WORKER_TIMELINE,
-                             write::CODEGEN_WORK_PACKAGE_KIND,
-                             &format!("codegen {}", cgu.name()))
-        });
-        let start_time = Instant::now();
-        all_stats.extend(tcx.compile_codegen_unit(*cgu.name()));
-        total_codegen_time += start_time.elapsed();
-        ongoing_codegen.check_for_errors(tcx.sess);
+        if tcx.dep_graph.is_fully_enabled() {
+            let dep_node = cgu.codegen_dep_node(tcx);
+            let dep_node_index = tcx.dep_graph.dep_node_index_of(&dep_node);
+            tcx.dep_graph.mark_loaded_from_cache(dep_node_index, loaded_from_cache);
+        }
     }
 
     ongoing_codegen.codegen_finished(tcx);
@@ -1156,11 +1191,15 @@ fn is_codegened_item(tcx: TyCtxt, id: DefId) -> bool {
 }
 
 fn compile_codegen_unit<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
-                                  cgu: InternedString) -> Stats {
-    let cgu = tcx.codegen_unit(cgu);
-
+                                  cgu_name: InternedString)
+                                  -> Stats {
     let start_time = Instant::now();
-    let (stats, module) = module_codegen(tcx, cgu);
+
+    let dep_node = tcx.codegen_unit(cgu_name).codegen_dep_node(tcx);
+    let ((stats, module), _) = tcx.dep_graph.with_task(dep_node,
+                                                       tcx,
+                                                       cgu_name,
+                                                       module_codegen);
     let time_to_codegen = start_time.elapsed();
 
     // We assume that the cost to run LLVM on a CGU is proportional to
@@ -1169,23 +1208,23 @@ fn compile_codegen_unit<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
                time_to_codegen.subsec_nanos() as u64;
 
     write::submit_codegened_module_to_llvm(tcx,
-                                            module,
-                                            cost);
+                                           module,
+                                           cost);
     return stats;
 
     fn module_codegen<'a, 'tcx>(
         tcx: TyCtxt<'a, 'tcx, 'tcx>,
-        cgu: Arc<CodegenUnit<'tcx>>)
+        cgu_name: InternedString)
         -> (Stats, ModuleCodegen)
     {
-        let cgu_name = cgu.name().to_string();
+        let cgu = tcx.codegen_unit(cgu_name);
 
         // Instantiate monomorphizations without filling out definitions yet...
-        let llvm_module = ModuleLlvm::new(tcx.sess, &cgu_name);
+        let llvm_module = ModuleLlvm::new(tcx.sess, &cgu_name.as_str());
         let stats = {
             let cx = CodegenCx::new(tcx, cgu, &llvm_module);
             let mono_items = cx.codegen_unit
-                                 .items_in_deterministic_order(cx.tcx);
+                               .items_in_deterministic_order(cx.tcx);
             for &(mono_item, (linkage, visibility)) in &mono_items {
                 mono_item.predefine(&cx, linkage, visibility);
             }
@@ -1234,8 +1273,8 @@ fn compile_codegen_unit<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
         };
 
         (stats, ModuleCodegen {
-            name: cgu_name,
-            source: ModuleSource::Codegened(llvm_module),
+            name: cgu_name.to_string(),
+            module_llvm: llvm_module,
             kind: ModuleKind::Regular,
         })
     }
@@ -1254,7 +1293,6 @@ pub fn provide(providers: &mut Providers) {
             .cloned()
             .unwrap_or_else(|| panic!("failed to find cgu with name {:?}", name))
     };
-    providers.compile_codegen_unit = compile_codegen_unit;
 
     provide_extern(providers);
 }
diff --git a/src/librustc_codegen_llvm/lib.rs b/src/librustc_codegen_llvm/lib.rs
index 31eeb5633fba9..dcdd8c1f6e9f9 100644
--- a/src/librustc_codegen_llvm/lib.rs
+++ b/src/librustc_codegen_llvm/lib.rs
@@ -66,13 +66,13 @@ extern crate rustc_errors as errors;
 extern crate serialize;
 extern crate cc; // Used to locate MSVC
 extern crate tempfile;
+extern crate memmap;
 
 use back::bytecode::RLIB_BYTECODE_EXTENSION;
 
 pub use llvm_util::target_features;
-
 use std::any::Any;
-use std::path::PathBuf;
+use std::path::{PathBuf};
 use std::sync::mpsc;
 use rustc_data_structures::sync::Lrc;
 
@@ -100,7 +100,7 @@ mod back {
     mod command;
     pub mod linker;
     pub mod link;
-    mod lto;
+    pub mod lto;
     pub mod symbol_export;
     pub mod write;
     mod rpath;
@@ -273,10 +273,15 @@ struct ModuleCodegen {
     /// as the crate name and disambiguator.
     /// We currently generate these names via CodegenUnit::build_cgu_name().
     name: String,
-    source: ModuleSource,
+    module_llvm: ModuleLlvm,
     kind: ModuleKind,
 }
 
+struct CachedModuleCodegen {
+    name: String,
+    source: WorkProduct,
+}
+
 #[derive(Copy, Clone, Debug, PartialEq)]
 enum ModuleKind {
     Regular,
@@ -285,22 +290,11 @@ enum ModuleKind {
 }
 
 impl ModuleCodegen {
-    fn llvm(&self) -> Option<&ModuleLlvm> {
-        match self.source {
-            ModuleSource::Codegened(ref llvm) => Some(llvm),
-            ModuleSource::Preexisting(_) => None,
-        }
-    }
-
     fn into_compiled_module(self,
-                                emit_obj: bool,
-                                emit_bc: bool,
-                                emit_bc_compressed: bool,
-                                outputs: &OutputFilenames) -> CompiledModule {
-        let pre_existing = match self.source {
-            ModuleSource::Preexisting(_) => true,
-            ModuleSource::Codegened(_) => false,
-        };
+                            emit_obj: bool,
+                            emit_bc: bool,
+                            emit_bc_compressed: bool,
+                            outputs: &OutputFilenames) -> CompiledModule {
         let object = if emit_obj {
             Some(outputs.temp_path(OutputType::Object, Some(&self.name)))
         } else {
@@ -321,7 +315,6 @@ impl ModuleCodegen {
         CompiledModule {
             name: self.name.clone(),
             kind: self.kind,
-            pre_existing,
             object,
             bytecode,
             bytecode_compressed,
@@ -333,20 +326,11 @@ impl ModuleCodegen {
 struct CompiledModule {
     name: String,
     kind: ModuleKind,
-    pre_existing: bool,
     object: Option<PathBuf>,
     bytecode: Option<PathBuf>,
     bytecode_compressed: Option<PathBuf>,
 }
 
-enum ModuleSource {
-    /// Copy the `.o` files or whatever from the incr. comp. directory.
-    Preexisting(WorkProduct),
-
-    /// Rebuild from this LLVM module.
-    Codegened(ModuleLlvm),
-}
-
 struct ModuleLlvm {
     llcx: &'static mut llvm::Context,
     llmod_raw: *const llvm::Module,
diff --git a/src/librustc_codegen_llvm/llvm/ffi.rs b/src/librustc_codegen_llvm/llvm/ffi.rs
index 51b0299e63f46..6c2601bf1ef12 100644
--- a/src/librustc_codegen_llvm/llvm/ffi.rs
+++ b/src/librustc_codegen_llvm/llvm/ffi.rs
@@ -363,6 +363,10 @@ extern { pub type ThinLTOData; }
 /// LLVMRustThinLTOBuffer
 extern { pub type ThinLTOBuffer; }
 
+// LLVMRustModuleNameCallback
+pub type ThinLTOModuleNameCallback =
+    unsafe extern "C" fn(*mut c_void, *const c_char, *const c_char);
+
 /// LLVMRustThinLTOModule
 #[repr(C)]
 pub struct ThinLTOModule {
@@ -1622,6 +1626,11 @@ extern "C" {
         Data: &ThinLTOData,
         Module: &Module,
     ) -> bool;
+    pub fn LLVMRustGetThinLTOModuleImports(
+        Data: *const ThinLTOData,
+        ModuleNameCallback: ThinLTOModuleNameCallback,
+        CallbackPayload: *mut c_void,
+    );
     pub fn LLVMRustFreeThinLTOData(Data: &'static mut ThinLTOData);
     pub fn LLVMRustParseBitcodeForThinLTO(
         Context: &Context,
diff --git a/src/librustc_incremental/lib.rs b/src/librustc_incremental/lib.rs
index e100b49c7f244..4ffd726c1d47c 100644
--- a/src/librustc_incremental/lib.rs
+++ b/src/librustc_incremental/lib.rs
@@ -44,6 +44,7 @@ pub use persist::copy_cgu_workproducts_to_incr_comp_cache_dir;
 pub use persist::save_dep_graph;
 pub use persist::save_work_product_index;
 pub use persist::in_incr_comp_dir;
+pub use persist::in_incr_comp_dir_sess;
 pub use persist::prepare_session_directory;
 pub use persist::finalize_session_directory;
 pub use persist::delete_workproduct_files;
diff --git a/src/librustc_incremental/persist/mod.rs b/src/librustc_incremental/persist/mod.rs
index e1f00db56d5cb..17d36ba3fa7f4 100644
--- a/src/librustc_incremental/persist/mod.rs
+++ b/src/librustc_incremental/persist/mod.rs
@@ -23,6 +23,7 @@ mod file_format;
 pub use self::fs::finalize_session_directory;
 pub use self::fs::garbage_collect_session_directories;
 pub use self::fs::in_incr_comp_dir;
+pub use self::fs::in_incr_comp_dir_sess;
 pub use self::fs::prepare_session_directory;
 pub use self::load::dep_graph_tcx_init;
 pub use self::load::load_dep_graph;
diff --git a/src/librustc_mir/monomorphize/partitioning.rs b/src/librustc_mir/monomorphize/partitioning.rs
index c480fa4124665..fd094ffc1cf4f 100644
--- a/src/librustc_mir/monomorphize/partitioning.rs
+++ b/src/librustc_mir/monomorphize/partitioning.rs
@@ -103,7 +103,7 @@
 //! inlining, even when they are not marked #[inline].
 
 use monomorphize::collector::InliningMap;
-use rustc::dep_graph::WorkProductId;
+use rustc::dep_graph::{WorkProductId, WorkProduct, DepNode, DepConstructor};
 use rustc::hir::CodegenFnAttrFlags;
 use rustc::hir::def_id::{DefId, LOCAL_CRATE, CRATE_DEF_INDEX};
 use rustc::hir::map::DefPathData;
@@ -150,6 +150,15 @@ pub trait CodegenUnitExt<'tcx> {
         WorkProductId::from_cgu_name(&self.name().as_str())
     }
 
+    fn work_product(&self, tcx: TyCtxt) -> WorkProduct {
+        let work_product_id = self.work_product_id();
+        tcx.dep_graph
+           .previous_work_product(&work_product_id)
+           .unwrap_or_else(|| {
+                panic!("Could not find work-product for CGU `{}`", self.name())
+            })
+    }
+
     fn items_in_deterministic_order<'a>(&self,
                                         tcx: TyCtxt<'a, 'tcx, 'tcx>)
                                         -> Vec<(MonoItem<'tcx>,
@@ -194,6 +203,10 @@ pub trait CodegenUnitExt<'tcx> {
         items.sort_by_cached_key(|&(i, _)| item_sort_key(tcx, i));
         items
     }
+
+    fn codegen_dep_node(&self, tcx: TyCtxt<'_, 'tcx, 'tcx>) -> DepNode {
+        DepNode::new(tcx, DepConstructor::CompileCodegenUnit(self.name().clone()))
+    }
 }
 
 impl<'tcx> CodegenUnitExt<'tcx> for CodegenUnit<'tcx> {
diff --git a/src/rustllvm/PassWrapper.cpp b/src/rustllvm/PassWrapper.cpp
index 09befdaae37c5..5c4bb61781ed1 100644
--- a/src/rustllvm/PassWrapper.cpp
+++ b/src/rustllvm/PassWrapper.cpp
@@ -1123,6 +1123,28 @@ LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data, LLVMModuleRef M) {
   return true;
 }
 
+extern "C" typedef void (*LLVMRustModuleNameCallback)(void*, // payload
+                                                      const char*, // importing module name
+                                                      const char*); // imported module name
+
+// Calls `module_name_callback` for each module import done by ThinLTO.
+// The callback is provided with regular null-terminated C strings.
+extern "C" void
+LLVMRustGetThinLTOModuleImports(const LLVMRustThinLTOData *data,
+                                LLVMRustModuleNameCallback module_name_callback,
+                                void* callback_payload) {
+  for (const auto& importing_module : data->ImportLists) {
+    const std::string importing_module_id = importing_module.getKey().str();
+    const auto& imports = importing_module.getValue();
+    for (const auto& imported_module : imports) {
+      const std::string imported_module_id = imported_module.getKey().str();
+      module_name_callback(callback_payload,
+                           importing_module_id.c_str(),
+                           imported_module_id.c_str());
+    }
+  }
+}
+
 // This struct and various functions are sort of a hack right now, but the
 // problem is that we've got in-memory LLVM modules after we generate and
 // optimize all codegen-units for one compilation in rustc. To be compatible
@@ -1288,6 +1310,11 @@ LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data, LLVMModuleRef M) {
   report_fatal_error("ThinLTO not available");
 }
 
+extern "C" LLVMRustThinLTOModuleImports
+LLVMRustGetLLVMRustThinLTOModuleImports(const LLVMRustThinLTOData *Data) {
+  report_fatal_error("ThinLTO not available");
+}
+
 extern "C" void
 LLVMRustFreeThinLTOData(LLVMRustThinLTOData *Data) {
   report_fatal_error("ThinLTO not available");
diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs
index 68a4b7898575d..b9549968db4ba 100644
--- a/src/tools/tidy/src/deps.rs
+++ b/src/tools/tidy/src/deps.rs
@@ -94,6 +94,7 @@ static WHITELIST: &'static [Crate] = &[
     Crate("log"),
     Crate("log_settings"),
     Crate("memchr"),
+    Crate("memmap"),
     Crate("memoffset"),
     Crate("miniz-sys"),
     Crate("nodrop"),