diff --git a/compiler/rustc_codegen_gcc/src/back/lto.rs b/compiler/rustc_codegen_gcc/src/back/lto.rs
index dda777a540027..9a9040708ef89 100644
--- a/compiler/rustc_codegen_gcc/src/back/lto.rs
+++ b/compiler/rustc_codegen_gcc/src/back/lto.rs
@@ -30,6 +30,7 @@ use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput, SharedEmitter}
 use rustc_codegen_ssa::traits::*;
 use rustc_codegen_ssa::{ModuleCodegen, ModuleKind, looks_like_rust_object_file};
 use rustc_data_structures::memmap::Mmap;
+use rustc_data_structures::profiling::SelfProfilerRef;
 use rustc_errors::{DiagCtxt, DiagCtxtHandle};
 use rustc_log::tracing::info;
 use rustc_middle::bug;
@@ -112,6 +113,7 @@ fn save_as_file(obj: &[u8], path: &Path) -> Result<(), LtoBitcodeFromRlib> {
 /// for further optimization.
 pub(crate) fn run_fat(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     shared_emitter: &SharedEmitter,
     each_linked_rlib_for_lto: &[PathBuf],
     modules: Vec<FatLtoInput<GccCodegenBackend>>,
@@ -123,6 +125,7 @@ pub(crate) fn run_fat(
     lto_data.symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();*/
     fat_lto(
         cgcx,
+        prof,
         dcx,
         modules,
         lto_data.upstream_modules,
@@ -133,13 +136,14 @@ pub(crate) fn run_fat(
 
 fn fat_lto(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     _dcx: DiagCtxtHandle<'_>,
     modules: Vec<FatLtoInput<GccCodegenBackend>>,
     mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
     tmp_path: TempDir,
     //symbols_below_threshold: &[String],
 ) -> ModuleCodegen<GccContext> {
-    let _timer = cgcx.prof.generic_activity("GCC_fat_lto_build_monolithic_module");
+    let _timer = prof.generic_activity("GCC_fat_lto_build_monolithic_module");
     info!("going for a fat lto");
 
     // Sort out all our lists of incoming modules into two lists.
@@ -223,8 +227,7 @@ fn fat_lto(
         // We add the object files and save in should_combine_object_files that we should combine
         // them into a single object file when compiling later.
         for (bc_decoded, name) in serialized_modules {
-            let _timer = cgcx
-                .prof
+            let _timer = prof
                 .generic_activity_with_arg_recorder("GCC_fat_lto_link_module", |recorder| {
                     recorder.record_arg(format!("{:?}", name))
                 });
@@ -284,6 +287,7 @@ impl ModuleBufferMethods for ModuleBuffer {
 /// can simply be copied over from the incr. comp. cache.
 pub(crate) fn run_thin(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     dcx: DiagCtxtHandle<'_>,
     each_linked_rlib_for_lto: &[PathBuf],
     modules: Vec<(String, ThinBuffer)>,
@@ -298,6 +302,7 @@ pub(crate) fn run_thin(
     }
     thin_lto(
         cgcx,
+        prof,
         dcx,
         modules,
         lto_data.upstream_modules,
@@ -345,7 +350,8 @@ pub(crate) fn prepare_thin(module: ModuleCodegen<GccContext>) -> (String, ThinBu
 /// all of the `LtoModuleCodegen` units returned below and destroyed once
 /// they all go out of scope.
 fn thin_lto(
-    cgcx: &CodegenContext,
+    _cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     _dcx: DiagCtxtHandle<'_>,
     modules: Vec<(String, ThinBuffer)>,
     serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
@@ -353,7 +359,7 @@ fn thin_lto(
     cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
     //_symbols_below_threshold: &[String],
 ) -> (Vec<ThinModule<GccCodegenBackend>>, Vec<WorkProduct>) {
-    let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis");
+    let _timer = prof.generic_activity("LLVM_thin_lto_global_analysis");
     info!("going for that thin, thin LTO");
 
     /*let green_modules: FxHashMap<_, _> =
diff --git a/compiler/rustc_codegen_gcc/src/back/write.rs b/compiler/rustc_codegen_gcc/src/back/write.rs
index 5e96447234758..ddf13558027bd 100644
--- a/compiler/rustc_codegen_gcc/src/back/write.rs
+++ b/compiler/rustc_codegen_gcc/src/back/write.rs
@@ -6,6 +6,7 @@ use rustc_codegen_ssa::back::write::{
     BitcodeSection, CodegenContext, EmitObj, ModuleConfig, SharedEmitter,
 };
 use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
+use rustc_data_structures::profiling::SelfProfilerRef;
 use rustc_errors::DiagCtxt;
 use rustc_fs_util::link_or_copy;
 use rustc_log::tracing::debug;
@@ -18,6 +19,7 @@ use crate::{GccContext, LtoMode};
 
 pub(crate) fn codegen(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     shared_emitter: &SharedEmitter,
     module: ModuleCodegen<GccContext>,
     config: &ModuleConfig,
@@ -25,7 +27,7 @@ pub(crate) fn codegen(
     let dcx = DiagCtxt::new(Box::new(shared_emitter.clone()));
     let dcx = dcx.handle();
 
-    let _timer = cgcx.prof.generic_activity_with_arg("GCC_module_codegen", &*module.name);
+    let _timer = prof.generic_activity_with_arg("GCC_module_codegen", &*module.name);
     {
         let context = &module.module_llvm.context;
 
@@ -44,9 +46,8 @@ pub(crate) fn codegen(
         );
 
         if config.bitcode_needed() {
-            let _timer = cgcx
-                .prof
-                .generic_activity_with_arg("GCC_module_codegen_make_bitcode", &*module.name);
+            let _timer =
+                prof.generic_activity_with_arg("GCC_module_codegen_make_bitcode", &*module.name);
 
             // TODO(antoyo)
             /*if let Some(bitcode_filename) = bc_out.file_name() {
@@ -58,8 +59,7 @@ pub(crate) fn codegen(
             }*/
 
             if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
-                let _timer = cgcx
-                    .prof
+                let _timer = prof
                     .generic_activity_with_arg("GCC_module_codegen_emit_bitcode", &*module.name);
                 if lto_supported {
                     context.add_command_line_option("-flto=auto");
@@ -70,8 +70,7 @@ pub(crate) fn codegen(
             }
 
             if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) {
-                let _timer = cgcx
-                    .prof
+                let _timer = prof
                     .generic_activity_with_arg("GCC_module_codegen_embed_bitcode", &*module.name);
                 if lto_supported {
                     // TODO(antoyo): maybe we should call embed_bitcode to have the proper iOS fixes?
@@ -98,7 +97,7 @@ pub(crate) fn codegen(
 
         if config.emit_asm {
             let _timer =
-                cgcx.prof.generic_activity_with_arg("GCC_module_codegen_emit_asm", &*module.name);
+                prof.generic_activity_with_arg("GCC_module_codegen_emit_asm", &*module.name);
             let path = cgcx.output_filenames.temp_path_for_cgu(
                 OutputType::Assembly,
                 &module.name,
@@ -109,9 +108,8 @@ pub(crate) fn codegen(
 
         match config.emit_obj {
             EmitObj::ObjectCode(_) => {
-                let _timer = cgcx
-                    .prof
-                    .generic_activity_with_arg("GCC_module_codegen_emit_obj", &*module.name);
+                let _timer =
+                    prof.generic_activity_with_arg("GCC_module_codegen_emit_obj", &*module.name);
                 if env::var("CG_GCCJIT_DUMP_MODULE_NAMES").as_deref() == Ok("1") {
                     println!("Module {}", module.name);
                 }
diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs
index d490650c37f76..24a065d69ecac 100644
--- a/compiler/rustc_codegen_gcc/src/lib.rs
+++ b/compiler/rustc_codegen_gcc/src/lib.rs
@@ -90,6 +90,7 @@ use rustc_codegen_ssa::target_features::cfg_target_feature;
 use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, WriteBackendMethods};
 use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen, TargetConfig};
 use rustc_data_structures::fx::FxIndexMap;
+use rustc_data_structures::profiling::SelfProfilerRef;
 use rustc_data_structures::sync::IntoDynSyncSend;
 use rustc_errors::DiagCtxtHandle;
 use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
@@ -427,6 +428,7 @@ impl WriteBackendMethods for GccCodegenBackend {
 
     fn run_and_optimize_fat_lto(
         cgcx: &CodegenContext,
+        prof: &SelfProfilerRef,
         shared_emitter: &SharedEmitter,
         _tm_factory: TargetMachineFactoryFn<Self>,
         // FIXME(bjorn3): Limit LTO exports to these symbols
@@ -434,11 +436,12 @@ impl WriteBackendMethods for GccCodegenBackend {
         each_linked_rlib_for_lto: &[PathBuf],
         modules: Vec<FatLtoInput<Self>>,
     ) -> ModuleCodegen<Self::Module> {
-        back::lto::run_fat(cgcx, shared_emitter, each_linked_rlib_for_lto, modules)
+        back::lto::run_fat(cgcx, prof, shared_emitter, each_linked_rlib_for_lto, modules)
     }
 
     fn run_thin_lto(
         cgcx: &CodegenContext,
+        prof: &SelfProfilerRef,
         dcx: DiagCtxtHandle<'_>,
         // FIXME(bjorn3): Limit LTO exports to these symbols
         _exported_symbols_for_lto: &[String],
@@ -446,7 +449,7 @@ impl WriteBackendMethods for GccCodegenBackend {
         modules: Vec<(String, Self::ThinBuffer)>,
         cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
     ) -> (Vec<ThinModule<Self>>, Vec<WorkProduct>) {
-        back::lto::run_thin(cgcx, dcx, each_linked_rlib_for_lto, modules, cached_modules)
+        back::lto::run_thin(cgcx, prof, dcx, each_linked_rlib_for_lto, modules, cached_modules)
     }
 
     fn print_pass_timings(&self) {
@@ -459,6 +462,7 @@ impl WriteBackendMethods for GccCodegenBackend {
 
     fn optimize(
         _cgcx: &CodegenContext,
+        _prof: &SelfProfilerRef,
         _shared_emitter: &SharedEmitter,
         module: &mut ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
@@ -468,6 +472,7 @@ impl WriteBackendMethods for GccCodegenBackend {
 
     fn optimize_thin(
         cgcx: &CodegenContext,
+        _prof: &SelfProfilerRef,
         _shared_emitter: &SharedEmitter,
         _tm_factory: TargetMachineFactoryFn<Self>,
         thin: ThinModule<Self>,
@@ -477,11 +482,12 @@ impl WriteBackendMethods for GccCodegenBackend {
 
     fn codegen(
         cgcx: &CodegenContext,
+        prof: &SelfProfilerRef,
         shared_emitter: &SharedEmitter,
         module: ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     ) -> CompiledModule {
-        back::write::codegen(cgcx, shared_emitter, module, config)
+        back::write::codegen(cgcx, prof, shared_emitter, module, config)
     }
 
     fn prepare_thin(module: ModuleCodegen<Self::Module>) -> (String, Self::ThinBuffer) {
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index 5bd856a3ac435..5d272d10930b1 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -16,6 +16,7 @@ use rustc_codegen_ssa::traits::*;
 use rustc_codegen_ssa::{ModuleCodegen, ModuleKind, looks_like_rust_object_file};
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::memmap::Mmap;
+use rustc_data_structures::profiling::SelfProfilerRef;
 use rustc_errors::{DiagCtxt, DiagCtxtHandle};
 use rustc_hir::attrs::SanitizerSet;
 use rustc_middle::bug;
@@ -152,6 +153,7 @@ fn get_bitcode_slice_from_object_data<'a>(
 /// for further optimization.
 pub(crate) fn run_fat(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     shared_emitter: &SharedEmitter,
     tm_factory: TargetMachineFactoryFn<LlvmCodegenBackend>,
     exported_symbols_for_lto: &[String],
@@ -166,6 +168,7 @@ pub(crate) fn run_fat(
         symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
     fat_lto(
         cgcx,
+        prof,
         dcx,
         shared_emitter,
         tm_factory,
@@ -180,6 +183,7 @@ pub(crate) fn run_fat(
 /// can simply be copied over from the incr. comp. cache.
 pub(crate) fn run_thin(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     dcx: DiagCtxtHandle<'_>,
     exported_symbols_for_lto: &[String],
     each_linked_rlib_for_lto: &[PathBuf],
@@ -196,7 +200,7 @@ pub(crate) fn run_thin(
                       is deferred to the linker"
         );
     }
-    thin_lto(cgcx, dcx, modules, upstream_modules, cached_modules, &symbols_below_threshold)
+    thin_lto(cgcx, prof, dcx, modules, upstream_modules, cached_modules, &symbols_below_threshold)
 }
 
 pub(crate) fn prepare_thin(module: ModuleCodegen<ModuleLlvm>) -> (String, ThinBuffer) {
@@ -207,6 +211,7 @@ pub(crate) fn prepare_thin(module: ModuleCodegen<ModuleLlvm>) -> (String, ThinBu
 
 fn fat_lto(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     dcx: DiagCtxtHandle<'_>,
     shared_emitter: &SharedEmitter,
     tm_factory: TargetMachineFactoryFn<LlvmCodegenBackend>,
@@ -214,7 +219,7 @@ fn fat_lto(
     mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
     symbols_below_threshold: &[*const libc::c_char],
 ) -> ModuleCodegen<ModuleLlvm> {
-    let _timer = cgcx.prof.generic_activity("LLVM_fat_lto_build_monolithic_module");
+    let _timer = prof.generic_activity("LLVM_fat_lto_build_monolithic_module");
     info!("going for a fat lto");
 
     // Sort out all our lists of incoming modules into two lists.
@@ -303,8 +308,7 @@ fn fat_lto(
         // above, this is all mostly handled in C++.
         let mut linker = Linker::new(llmod);
         for (bc_decoded, name) in serialized_modules {
-            let _timer = cgcx
-                .prof
+            let _timer = prof
                 .generic_activity_with_arg_recorder("LLVM_fat_lto_link_module", |recorder| {
                     recorder.record_arg(format!("{name:?}"))
                 });
@@ -394,13 +398,14 @@ impl Drop for Linker<'_> {
 /// they all go out of scope.
 fn thin_lto(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     dcx: DiagCtxtHandle<'_>,
     modules: Vec<(String, ThinBuffer)>,
     serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
     cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
     symbols_below_threshold: &[*const libc::c_char],
 ) -> (Vec<ThinModule<LlvmCodegenBackend>>, Vec<WorkProduct>) {
-    let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis");
+    let _timer = prof.generic_activity("LLVM_thin_lto_global_analysis");
     unsafe {
         info!("going for that thin, thin LTO");
 
@@ -598,11 +603,12 @@ pub(crate) fn enable_autodiff_settings(ad: &[config::AutoDiff]) {
 
 pub(crate) fn run_pass_manager(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     dcx: DiagCtxtHandle<'_>,
     module: &mut ModuleCodegen<ModuleLlvm>,
     thin: bool,
 ) {
-    let _timer = cgcx.prof.generic_activity_with_arg("LLVM_lto_optimize", &*module.name);
+    let _timer = prof.generic_activity_with_arg("LLVM_lto_optimize", &*module.name);
     let config = &cgcx.module_config;
 
     // Now we have one massive module inside of llmod. Time to run the
@@ -628,7 +634,7 @@ pub(crate) fn run_pass_manager(
     };
 
     unsafe {
-        write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage);
+        write::llvm_optimize(cgcx, prof, dcx, module, None, config, opt_level, opt_stage, stage);
     }
 
     if cfg!(feature = "llvm_enzyme") && enable_ad && !thin {
@@ -636,7 +642,9 @@ pub(crate) fn run_pass_manager(
         let stage = write::AutodiffStage::PostAD;
         if !config.autodiff.contains(&config::AutoDiff::NoPostopt) {
             unsafe {
-                write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage);
+                write::llvm_optimize(
+                    cgcx, prof, dcx, module, None, config, opt_level, opt_stage, stage,
+                );
             }
         }
 
@@ -739,6 +747,7 @@ impl Drop for ThinBuffer {
 
 pub(crate) fn optimize_thin_module(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     shared_emitter: &SharedEmitter,
     tm_factory: TargetMachineFactoryFn<LlvmCodegenBackend>,
     thin_module: ThinModule<LlvmCodegenBackend>,
@@ -773,8 +782,7 @@ pub(crate) fn optimize_thin_module(
         // You can find some more comments about these functions in the LLVM
         // bindings we've got (currently `PassWrapper.cpp`)
         {
-            let _timer =
-                cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
+            let _timer = prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
             unsafe {
                 llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target.raw())
             };
@@ -782,9 +790,8 @@ pub(crate) fn optimize_thin_module(
         }
 
         {
-            let _timer = cgcx
-                .prof
-                .generic_activity_with_arg("LLVM_thin_lto_resolve_weak", thin_module.name());
+            let _timer =
+                prof.generic_activity_with_arg("LLVM_thin_lto_resolve_weak", thin_module.name());
             if unsafe { !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) }
             {
                 write::llvm_err(dcx, LlvmError::PrepareThinLtoModule);
@@ -793,9 +800,8 @@ pub(crate) fn optimize_thin_module(
         }
 
         {
-            let _timer = cgcx
-                .prof
-                .generic_activity_with_arg("LLVM_thin_lto_internalize", thin_module.name());
+            let _timer =
+                prof.generic_activity_with_arg("LLVM_thin_lto_internalize", thin_module.name());
             if unsafe { !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) }
             {
                 write::llvm_err(dcx, LlvmError::PrepareThinLtoModule);
@@ -804,8 +810,7 @@ pub(crate) fn optimize_thin_module(
         }
 
         {
-            let _timer =
-                cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_import", thin_module.name());
+            let _timer = prof.generic_activity_with_arg("LLVM_thin_lto_import", thin_module.name());
             if unsafe {
                 !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target.raw())
             } {
@@ -821,7 +826,7 @@ pub(crate) fn optimize_thin_module(
         // little differently.
         {
             info!("running thin lto passes over {}", module.name);
-            run_pass_manager(cgcx, dcx, &mut module, true);
+            run_pass_manager(cgcx, prof, dcx, &mut module, true);
             save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
         }
     }
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index 2bb5b5db5e485..3e3ccd39e674c 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -560,6 +560,7 @@ pub(crate) enum AutodiffStage {
 
 pub(crate) unsafe fn llvm_optimize(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     dcx: DiagCtxtHandle<'_>,
     module: &ModuleCodegen<ModuleLlvm>,
     thin_lto_buffer: Option<&mut *mut llvm::ThinLTOBuffer>,
@@ -756,10 +757,9 @@ pub(crate) unsafe fn llvm_optimize(
         }
     }
 
-    let mut llvm_profiler = cgcx
-        .prof
+    let mut llvm_profiler = prof
         .llvm_recording_enabled()
-        .then(|| LlvmSelfProfiler::new(cgcx.prof.get_self_profiler().unwrap()));
+        .then(|| LlvmSelfProfiler::new(prof.get_self_profiler().unwrap()));
 
     let llvm_selfprofiler =
         llvm_profiler.as_mut().map(|s| s as *mut _ as *mut c_void).unwrap_or(std::ptr::null_mut());
@@ -878,7 +878,7 @@ pub(crate) unsafe fn llvm_optimize(
                 &out_obj,
                 None,
                 llvm::FileType::ObjectFile,
-                &cgcx.prof,
+                prof,
                 true,
             );
             // We ignore cgcx.save_temps here and unconditionally always keep our `host.out` artifact.
@@ -892,11 +892,12 @@ pub(crate) unsafe fn llvm_optimize(
 // Unsafe due to LLVM calls.
 pub(crate) fn optimize(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     shared_emitter: &SharedEmitter,
     module: &mut ModuleCodegen<ModuleLlvm>,
     config: &ModuleConfig,
 ) {
-    let _timer = cgcx.prof.generic_activity_with_arg("LLVM_module_optimize", &*module.name);
+    let _timer = prof.generic_activity_with_arg("LLVM_module_optimize", &*module.name);
 
     let dcx = DiagCtxt::new(Box::new(shared_emitter.clone()));
     let dcx = dcx.handle();
@@ -943,6 +944,7 @@ pub(crate) fn optimize(
         unsafe {
             llvm_optimize(
                 cgcx,
+                prof,
                 dcx,
                 module,
                 thin_lto_buffer.as_mut(),
@@ -964,12 +966,12 @@ pub(crate) fn optimize(
                 && let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
             {
                 let summary_data = thin_lto_buffer.thin_link_data();
-                cgcx.prof.artifact_size(
+                prof.artifact_size(
                     "llvm_bitcode_summary",
                     thin_link_bitcode_filename.to_string_lossy(),
                     summary_data.len() as u64,
                 );
-                let _timer = cgcx.prof.generic_activity_with_arg(
+                let _timer = prof.generic_activity_with_arg(
                     "LLVM_module_codegen_emit_bitcode_summary",
                     &*module.name,
                 );
@@ -983,11 +985,12 @@ pub(crate) fn optimize(
 
 pub(crate) fn codegen(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     shared_emitter: &SharedEmitter,
     module: ModuleCodegen<ModuleLlvm>,
     config: &ModuleConfig,
 ) -> CompiledModule {
-    let _timer = cgcx.prof.generic_activity_with_arg("LLVM_module_codegen", &*module.name);
+    let _timer = prof.generic_activity_with_arg("LLVM_module_codegen", &*module.name);
 
     let dcx = DiagCtxt::new(Box::new(shared_emitter.clone()));
     let dcx = dcx.handle();
@@ -1026,18 +1029,17 @@ pub(crate) fn codegen(
         if config.bitcode_needed() {
             if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
                 let thin = {
-                    let _timer = cgcx.prof.generic_activity_with_arg(
+                    let _timer = prof.generic_activity_with_arg(
                         "LLVM_module_codegen_make_bitcode",
                         &*module.name,
                     );
                     ThinBuffer::new(llmod, config.emit_thin_lto)
                 };
                 let data = thin.data();
-                let _timer = cgcx
-                    .prof
+                let _timer = prof
                     .generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name);
                 if let Some(bitcode_filename) = bc_out.file_name() {
-                    cgcx.prof.artifact_size(
+                    prof.artifact_size(
                         "llvm_bitcode",
                         bitcode_filename.to_string_lossy(),
                         data.len() as u64,
@@ -1049,8 +1051,7 @@ pub(crate) fn codegen(
             }
 
             if config.embed_bitcode() && module.kind == ModuleKind::Regular {
-                let _timer = cgcx
-                    .prof
+                let _timer = prof
                     .generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name);
                 let thin_bc =
                     module.thin_lto_buffer.as_deref().expect("cannot find embedded bitcode");
@@ -1060,7 +1061,7 @@ pub(crate) fn codegen(
 
         if config.emit_ir {
             let _timer =
-                cgcx.prof.generic_activity_with_arg("LLVM_module_codegen_emit_ir", &*module.name);
+                prof.generic_activity_with_arg("LLVM_module_codegen_emit_ir", &*module.name);
             let out = cgcx.output_filenames.temp_path_for_cgu(
                 OutputType::LlvmAssembly,
                 &module.name,
@@ -1098,7 +1099,7 @@ pub(crate) fn codegen(
                 unsafe { llvm::LLVMRustPrintModule(llmod, out_c.as_ptr(), demangle_callback) };
 
             if result == llvm::LLVMRustResult::Success {
-                record_artifact_size(&cgcx.prof, "llvm_ir", &out);
+                record_artifact_size(prof, "llvm_ir", &out);
             }
 
             result
@@ -1108,7 +1109,7 @@ pub(crate) fn codegen(
 
         if config.emit_asm {
             let _timer =
-                cgcx.prof.generic_activity_with_arg("LLVM_module_codegen_emit_asm", &*module.name);
+                prof.generic_activity_with_arg("LLVM_module_codegen_emit_asm", &*module.name);
             let path = cgcx.output_filenames.temp_path_for_cgu(
                 OutputType::Assembly,
                 &module.name,
@@ -1132,16 +1133,15 @@ pub(crate) fn codegen(
                 &path,
                 None,
                 llvm::FileType::AssemblyFile,
-                &cgcx.prof,
+                prof,
                 config.verify_llvm_ir,
             );
         }
 
         match config.emit_obj {
             EmitObj::ObjectCode(_) => {
-                let _timer = cgcx
-                    .prof
-                    .generic_activity_with_arg("LLVM_module_codegen_emit_obj", &*module.name);
+                let _timer =
+                    prof.generic_activity_with_arg("LLVM_module_codegen_emit_obj", &*module.name);
 
                 let dwo_out = cgcx
                     .output_filenames
@@ -1168,7 +1168,7 @@ pub(crate) fn codegen(
                     &obj_out,
                     dwo_out,
                     llvm::FileType::ObjectFile,
-                    &cgcx.prof,
+                    prof,
                     config.verify_llvm_ir,
                 );
             }
@@ -1188,7 +1188,7 @@ pub(crate) fn codegen(
             EmitObj::None => {}
         }
 
-        record_llvm_cgu_instructions_stats(&cgcx.prof, &module.name, llmod);
+        record_llvm_cgu_instructions_stats(prof, &module.name, llmod);
     }
 
     // `.dwo` files are only emitted if:
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index 577c8a98b18c6..c92de64a3349f 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -35,6 +35,7 @@ use rustc_codegen_ssa::back::write::{
 use rustc_codegen_ssa::traits::*;
 use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen, TargetConfig};
 use rustc_data_structures::fx::FxIndexMap;
+use rustc_data_structures::profiling::SelfProfilerRef;
 use rustc_errors::{DiagCtxt, DiagCtxtHandle};
 use rustc_metadata::EncodedMetadata;
 use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
@@ -163,6 +164,7 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     }
     fn run_and_optimize_fat_lto(
         cgcx: &CodegenContext,
+        prof: &SelfProfilerRef,
         shared_emitter: &SharedEmitter,
         tm_factory: TargetMachineFactoryFn<LlvmCodegenBackend>,
         exported_symbols_for_lto: &[String],
@@ -171,6 +173,7 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     ) -> ModuleCodegen<Self::Module> {
         let mut module = back::lto::run_fat(
             cgcx,
+            prof,
             shared_emitter,
             tm_factory,
             exported_symbols_for_lto,
@@ -180,12 +183,13 @@ impl WriteBackendMethods for LlvmCodegenBackend {
 
         let dcx = DiagCtxt::new(Box::new(shared_emitter.clone()));
         let dcx = dcx.handle();
-        back::lto::run_pass_manager(cgcx, dcx, &mut module, false);
+        back::lto::run_pass_manager(cgcx, prof, dcx, &mut module, false);
 
         module
     }
     fn run_thin_lto(
         cgcx: &CodegenContext,
+        prof: &SelfProfilerRef,
         dcx: DiagCtxtHandle<'_>,
         exported_symbols_for_lto: &[String],
         each_linked_rlib_for_lto: &[PathBuf],
@@ -194,6 +198,7 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     ) -> (Vec<ThinModule<Self>>, Vec<WorkProduct>) {
         back::lto::run_thin(
             cgcx,
+            prof,
             dcx,
             exported_symbols_for_lto,
             each_linked_rlib_for_lto,
@@ -203,27 +208,30 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     }
     fn optimize(
         cgcx: &CodegenContext,
+        prof: &SelfProfilerRef,
         shared_emitter: &SharedEmitter,
         module: &mut ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     ) {
-        back::write::optimize(cgcx, shared_emitter, module, config)
+        back::write::optimize(cgcx, prof, shared_emitter, module, config)
     }
     fn optimize_thin(
         cgcx: &CodegenContext,
+        prof: &SelfProfilerRef,
         shared_emitter: &SharedEmitter,
         tm_factory: TargetMachineFactoryFn<LlvmCodegenBackend>,
         thin: ThinModule<Self>,
     ) -> ModuleCodegen<Self::Module> {
-        back::lto::optimize_thin_module(cgcx, shared_emitter, tm_factory, thin)
+        back::lto::optimize_thin_module(cgcx, prof, shared_emitter, tm_factory, thin)
     }
     fn codegen(
         cgcx: &CodegenContext,
+        prof: &SelfProfilerRef,
         shared_emitter: &SharedEmitter,
         module: ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     ) -> CompiledModule {
-        back::write::codegen(cgcx, shared_emitter, module, config)
+        back::write::codegen(cgcx, prof, shared_emitter, module, config)
     }
     fn prepare_thin(module: ModuleCodegen<Self::Module>) -> (String, Self::ThinBuffer) {
         back::lto::prepare_thin(module)
diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index c814f8db521cf..87a043fbdf245 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -23,6 +23,7 @@ use rustc_hir::find_attr;
 use rustc_incremental::{
     copy_cgu_workproduct_to_incr_comp_cache_dir, in_incr_comp_dir, in_incr_comp_dir_sess,
 };
+use rustc_macros::{Decodable, Encodable};
 use rustc_metadata::fs::copy_to_stdout;
 use rustc_middle::bug;
 use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
@@ -50,7 +51,7 @@ use crate::{
 const PRE_LTO_BC_EXT: &str = "pre-lto.bc";
 
 /// What kind of object file to emit.
-#[derive(Clone, Copy, PartialEq)]
+#[derive(Clone, Copy, PartialEq, Encodable, Decodable)]
 pub enum EmitObj {
     // No object file.
     None,
@@ -64,7 +65,7 @@ pub enum EmitObj {
 }
 
 /// What kind of llvm bitcode section to embed in an object file.
-#[derive(Clone, Copy, PartialEq)]
+#[derive(Clone, Copy, PartialEq, Encodable, Decodable)]
 pub enum BitcodeSection {
     // No bitcode section.
     None,
@@ -74,6 +75,7 @@ pub enum BitcodeSection {
 }
 
 /// Module-specific configuration for `optimize_and_codegen`.
+#[derive(Encodable, Decodable)]
 pub struct ModuleConfig {
     /// Names of additional optimization passes to run.
     pub passes: Vec<String>,
@@ -319,10 +321,9 @@ pub type TargetMachineFactoryFn<B> = Arc<
 >;
 
 /// Additional resources used by optimize_and_codegen (not module specific)
-#[derive(Clone)]
+#[derive(Clone, Encodable, Decodable)]
 pub struct CodegenContext {
     // Resources needed when running LTO
-    pub prof: SelfProfilerRef,
     pub lto: Lto,
     pub use_linker_plugin_lto: bool,
     pub dylib_lto: bool,
@@ -363,16 +364,18 @@ pub struct CodegenContext {
 
 fn generate_thin_lto_work<B: ExtraBackendMethods>(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     dcx: DiagCtxtHandle<'_>,
     exported_symbols_for_lto: &[String],
     each_linked_rlib_for_lto: &[PathBuf],
     needs_thin_lto: Vec<(String, B::ThinBuffer)>,
     import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
 ) -> Vec<(ThinLtoWorkItem<B>, u64)> {
-    let _prof_timer = cgcx.prof.generic_activity("codegen_thin_generate_lto_work");
+    let _prof_timer = prof.generic_activity("codegen_thin_generate_lto_work");
 
     let (lto_modules, copy_jobs) = B::run_thin_lto(
         cgcx,
+        prof,
         dcx,
         exported_symbols_for_lto,
         each_linked_rlib_for_lto,
@@ -841,12 +844,13 @@ pub(crate) fn compute_per_cgu_lto_type(
 
 fn execute_optimize_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     shared_emitter: SharedEmitter,
     mut module: ModuleCodegen<B::Module>,
 ) -> WorkItemResult<B> {
-    let _timer = cgcx.prof.generic_activity_with_arg("codegen_module_optimize", &*module.name);
+    let _timer = prof.generic_activity_with_arg("codegen_module_optimize", &*module.name);
 
-    B::optimize(cgcx, &shared_emitter, &mut module, &cgcx.module_config);
+    B::optimize(cgcx, prof, &shared_emitter, &mut module, &cgcx.module_config);
 
     // After we've done the initial round of optimizations we need to
     // decide whether to synchronously codegen this module or ship it
@@ -867,7 +871,7 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
 
     match lto_type {
         ComputedLtoType::No => {
-            let module = B::codegen(cgcx, &shared_emitter, module, &cgcx.module_config);
+            let module = B::codegen(cgcx, &prof, &shared_emitter, module, &cgcx.module_config);
             WorkItemResult::Finished(module)
         }
         ComputedLtoType::Thin => {
@@ -897,12 +901,12 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
 
 fn execute_copy_from_cache_work_item(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     shared_emitter: SharedEmitter,
     module: CachedModuleCodegen,
 ) -> CompiledModule {
-    let _timer = cgcx
-        .prof
-        .generic_activity_with_arg("codegen_copy_artifacts_from_incr_cache", &*module.name);
+    let _timer =
+        prof.generic_activity_with_arg("codegen_copy_artifacts_from_incr_cache", &*module.name);
 
     let dcx = DiagCtxt::new(Box::new(shared_emitter));
     let dcx = dcx.handle();
@@ -985,6 +989,7 @@ fn execute_copy_from_cache_work_item(
 
 fn do_fat_lto<B: ExtraBackendMethods>(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     shared_emitter: SharedEmitter,
     tm_factory: TargetMachineFactoryFn<B>,
     exported_symbols_for_lto: &[String],
@@ -992,7 +997,7 @@ fn do_fat_lto<B: ExtraBackendMethods>(
     mut needs_fat_lto: Vec<FatLtoInput<B>>,
     import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
 ) -> CompiledModule {
-    let _timer = cgcx.prof.verbose_generic_activity("LLVM_fatlto");
+    let _timer = prof.verbose_generic_activity("LLVM_fatlto");
 
     let dcx = DiagCtxt::new(Box::new(shared_emitter.clone()));
     let dcx = dcx.handle();
@@ -1005,17 +1010,19 @@ fn do_fat_lto<B: ExtraBackendMethods>(
 
     let module = B::run_and_optimize_fat_lto(
         cgcx,
+        prof,
         &shared_emitter,
         tm_factory,
         exported_symbols_for_lto,
         each_linked_rlib_for_lto,
         needs_fat_lto,
     );
-    B::codegen(cgcx, &shared_emitter, module, &cgcx.module_config)
+    B::codegen(cgcx, prof, &shared_emitter, module, &cgcx.module_config)
 }
 
-fn do_thin_lto<'a, B: ExtraBackendMethods>(
-    cgcx: &'a CodegenContext,
+fn do_thin_lto<B: ExtraBackendMethods>(
+    cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     shared_emitter: SharedEmitter,
     tm_factory: TargetMachineFactoryFn<B>,
     exported_symbols_for_lto: Arc<Vec<String>>,
@@ -1026,7 +1033,7 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>(
         WorkProduct,
     )>,
 ) -> Vec<CompiledModule> {
-    let _timer = cgcx.prof.verbose_generic_activity("LLVM_thinlto");
+    let _timer = prof.verbose_generic_activity("LLVM_thinlto");
 
     let dcx = DiagCtxt::new(Box::new(shared_emitter.clone()));
     let dcx = dcx.handle();
@@ -1056,6 +1063,7 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>(
     // we don't worry about tokens.
     for (work, cost) in generate_thin_lto_work::<B>(
         cgcx,
+        prof,
         dcx,
         &exported_symbols_for_lto,
         &each_linked_rlib_for_lto,
@@ -1100,6 +1108,7 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>(
             {
                 spawn_thin_lto_work(
                     &cgcx,
+                    prof,
                     shared_emitter.clone(),
                     Arc::clone(&tm_factory),
                     coordinator_send.clone(),
@@ -1166,14 +1175,15 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>(
 
 fn execute_thin_lto_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     shared_emitter: SharedEmitter,
     tm_factory: TargetMachineFactoryFn<B>,
     module: lto::ThinModule<B>,
 ) -> CompiledModule {
-    let _timer = cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", module.name());
+    let _timer = prof.generic_activity_with_arg("codegen_module_perform_lto", module.name());
 
-    let module = B::optimize_thin(cgcx, &shared_emitter, tm_factory, module);
-    B::codegen(cgcx, &shared_emitter, module, &cgcx.module_config)
+    let module = B::optimize_thin(cgcx, prof, &shared_emitter, tm_factory, module);
+    B::codegen(cgcx, prof, &shared_emitter, module, &cgcx.module_config)
 }
 
 /// Messages sent to the coordinator.
@@ -1273,6 +1283,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
     coordinator_send: Sender<Message<B>>,
 ) -> thread::JoinHandle<Result<MaybeLtoModules<B>, ()>> {
     let sess = tcx.sess;
+    let prof = sess.prof.clone();
 
     let mut each_linked_rlib_for_lto = Vec::new();
     let mut each_linked_rlib_file_for_lto = Vec::new();
@@ -1323,7 +1334,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
         fewer_names: sess.fewer_names(),
         save_temps: sess.opts.cg.save_temps,
         time_trace: sess.opts.unstable_opts.llvm_time_trace,
-        prof: sess.prof.clone(),
         remark: sess.opts.cg.remark.clone(),
         remark_dir,
         incr_comp_session_dir: sess.incr_comp_session_dir_opt().map(|r| r.clone()),
@@ -1524,7 +1534,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
         let mut llvm_start_time: Option<VerboseTimingGuard<'_>> = None;
 
         if let Some(allocator_module) = &mut allocator_module {
-            B::optimize(&cgcx, &shared_emitter, allocator_module, &allocator_config);
+            B::optimize(&cgcx, &prof, &shared_emitter, allocator_module, &allocator_config);
         }
 
         // Run the message loop while there's still anything that needs message
@@ -1564,6 +1574,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                         main_thread_state = MainThreadState::Lending;
                         spawn_work(
                             &cgcx,
+                            &prof,
                             shared_emitter.clone(),
                             coordinator_send.clone(),
                             &mut llvm_start_time,
@@ -1588,6 +1599,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                             main_thread_state = MainThreadState::Lending;
                             spawn_work(
                                 &cgcx,
+                                &prof,
                                 shared_emitter.clone(),
                                 coordinator_send.clone(),
                                 &mut llvm_start_time,
@@ -1630,6 +1642,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 {
                     spawn_work(
                         &cgcx,
+                        &prof,
                         shared_emitter.clone(),
                         coordinator_send.clone(),
                         &mut llvm_start_time,
@@ -1788,6 +1801,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
             if cgcx.lto == Lto::ThinLocal {
                 compiled_modules.extend(do_thin_lto::<B>(
                     &cgcx,
+                    &prof,
                     shared_emitter.clone(),
                     tm_factory,
                     exported_symbols_for_lto,
@@ -1814,7 +1828,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
         Ok(MaybeLtoModules::NoLto {
             modules: compiled_modules,
             allocator_module: allocator_module.map(|allocator_module| {
-                B::codegen(&cgcx, &shared_emitter, allocator_module, &allocator_config)
+                B::codegen(&cgcx, &prof, &shared_emitter, allocator_module, &allocator_config)
             }),
         })
     })
@@ -1883,23 +1897,25 @@ fn start_executing_work<B: ExtraBackendMethods>(
 pub(crate) struct WorkerFatalError;
 
 fn spawn_work<'a, B: ExtraBackendMethods>(
-    cgcx: &'a CodegenContext,
+    cgcx: &CodegenContext,
+    prof: &'a SelfProfilerRef,
     shared_emitter: SharedEmitter,
     coordinator_send: Sender<Message<B>>,
     llvm_start_time: &mut Option<VerboseTimingGuard<'a>>,
     work: WorkItem<B>,
 ) {
     if llvm_start_time.is_none() {
-        *llvm_start_time = Some(cgcx.prof.verbose_generic_activity("LLVM_passes"));
+        *llvm_start_time = Some(prof.verbose_generic_activity("LLVM_passes"));
     }
 
     let cgcx = cgcx.clone();
+    let prof = prof.clone();
 
     B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {
         let result = std::panic::catch_unwind(AssertUnwindSafe(|| match work {
-            WorkItem::Optimize(m) => execute_optimize_work_item(&cgcx, shared_emitter, m),
+            WorkItem::Optimize(m) => execute_optimize_work_item(&cgcx, &prof, shared_emitter, m),
             WorkItem::CopyPostLtoArtifacts(m) => WorkItemResult::Finished(
-                execute_copy_from_cache_work_item(&cgcx, shared_emitter, m),
+                execute_copy_from_cache_work_item(&cgcx, &prof, shared_emitter, m),
             ),
         }));
 
@@ -1920,22 +1936,24 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
     .expect("failed to spawn work thread");
 }
 
-fn spawn_thin_lto_work<'a, B: ExtraBackendMethods>(
-    cgcx: &'a CodegenContext,
+fn spawn_thin_lto_work<B: ExtraBackendMethods>(
+    cgcx: &CodegenContext,
+    prof: &SelfProfilerRef,
     shared_emitter: SharedEmitter,
     tm_factory: TargetMachineFactoryFn<B>,
     coordinator_send: Sender<ThinLtoMessage>,
     work: ThinLtoWorkItem<B>,
 ) {
     let cgcx = cgcx.clone();
+    let prof = prof.clone();
 
     B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {
         let result = std::panic::catch_unwind(AssertUnwindSafe(|| match work {
             ThinLtoWorkItem::CopyPostLtoArtifacts(m) => {
-                execute_copy_from_cache_work_item(&cgcx, shared_emitter, m)
+                execute_copy_from_cache_work_item(&cgcx, &prof, shared_emitter, m)
             }
             ThinLtoWorkItem::ThinLto(m) => {
-                execute_thin_lto_work_item(&cgcx, shared_emitter, tm_factory, m)
+                execute_thin_lto_work_item(&cgcx, &prof, shared_emitter, tm_factory, m)
             }
         }));
 
@@ -2183,6 +2201,7 @@ impl<B: ExtraBackendMethods> OngoingCodegen<B> {
                 CompiledModules {
                     modules: vec![do_fat_lto(
                         &cgcx,
+                        &sess.prof,
                         shared_emitter,
                         tm_factory,
                         &exported_symbols_for_lto,
@@ -2209,6 +2228,7 @@ impl<B: ExtraBackendMethods> OngoingCodegen<B> {
                 CompiledModules {
                     modules: do_thin_lto::<B>(
                         &cgcx,
+                        &sess.prof,
                         shared_emitter,
                         tm_factory,
                         exported_symbols_for_lto,
diff --git a/compiler/rustc_codegen_ssa/src/traits/write.rs b/compiler/rustc_codegen_ssa/src/traits/write.rs
index 0232ba39ac1f3..5f5d0ac5d9fc4 100644
--- a/compiler/rustc_codegen_ssa/src/traits/write.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/write.rs
@@ -1,5 +1,6 @@
 use std::path::PathBuf;
 
+use rustc_data_structures::profiling::SelfProfilerRef;
 use rustc_errors::DiagCtxtHandle;
 use rustc_middle::dep_graph::WorkProduct;
 
@@ -20,6 +21,7 @@ pub trait WriteBackendMethods: Clone + 'static {
     /// if necessary and running any further optimizations
     fn run_and_optimize_fat_lto(
         cgcx: &CodegenContext,
+        prof: &SelfProfilerRef,
         shared_emitter: &SharedEmitter,
         tm_factory: TargetMachineFactoryFn<Self>,
         exported_symbols_for_lto: &[String],
@@ -31,6 +33,7 @@ pub trait WriteBackendMethods: Clone + 'static {
     /// can simply be copied over from the incr. comp. cache.
     fn run_thin_lto(
         cgcx: &CodegenContext,
+        prof: &SelfProfilerRef,
         dcx: DiagCtxtHandle<'_>,
         exported_symbols_for_lto: &[String],
         each_linked_rlib_for_lto: &[PathBuf],
@@ -41,18 +44,21 @@ pub trait WriteBackendMethods: Clone + 'static {
     fn print_statistics(&self);
     fn optimize(
         cgcx: &CodegenContext,
+        prof: &SelfProfilerRef,
         shared_emitter: &SharedEmitter,
         module: &mut ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     );
     fn optimize_thin(
         cgcx: &CodegenContext,
+        prof: &SelfProfilerRef,
         shared_emitter: &SharedEmitter,
         tm_factory: TargetMachineFactoryFn<Self>,
         thin: ThinModule<Self>,
     ) -> ModuleCodegen<Self::Module>;
     fn codegen(
         cgcx: &CodegenContext,
+        prof: &SelfProfilerRef,
         shared_emitter: &SharedEmitter,
         module: ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs
index 5e2671ef4ef6b..b278a6179fe7f 100644
--- a/compiler/rustc_session/src/config.rs
+++ b/compiler/rustc_session/src/config.rs
@@ -88,7 +88,7 @@ pub enum CFProtection {
     Full,
 }
 
-#[derive(Clone, Copy, Debug, PartialEq, Hash, HashStable_Generic)]
+#[derive(Clone, Copy, Debug, PartialEq, Hash, HashStable_Generic, Encodable, Decodable)]
 pub enum OptLevel {
     /// `-Copt-level=0`
     No,
@@ -108,7 +108,7 @@ pub enum OptLevel {
 /// and taking other command line options into account.
 ///
 /// Note that linker plugin-based LTO is a different mechanism entirely.
-#[derive(Clone, PartialEq)]
+#[derive(Clone, PartialEq, Encodable, Decodable)]
 pub enum Lto {
     /// Don't do any LTO whatsoever.
     No,
@@ -190,7 +190,7 @@ pub enum CoverageLevel {
 }
 
 // The different settings that the `-Z offload` flag can have.
-#[derive(Clone, PartialEq, Hash, Debug)]
+#[derive(Clone, PartialEq, Hash, Debug, Encodable, Decodable)]
 pub enum Offload {
     /// Entry point for `std::offload`, enables kernel compilation for a gpu device
     Device,
@@ -201,7 +201,7 @@ pub enum Offload {
 }
 
 /// The different settings that the `-Z autodiff` flag can have.
-#[derive(Clone, PartialEq, Hash, Debug)]
+#[derive(Clone, PartialEq, Hash, Debug, Encodable, Decodable)]
 pub enum AutoDiff {
     /// Enable the autodiff opt pipeline
     Enable,
@@ -528,7 +528,7 @@ impl FmtDebug {
     }
 }
 
-#[derive(Clone, PartialEq, Hash, Debug)]
+#[derive(Clone, PartialEq, Hash, Debug, Encodable, Decodable)]
 pub enum SwitchWithOptPath {
     Enabled(Option<PathBuf>),
     Disabled,
@@ -583,7 +583,7 @@ pub enum MirStripDebugInfo {
 /// DWARF provides a mechanism which allows the linker to skip the sections which don't require
 /// link-time relocation - either by putting those sections in DWARF object files, or by keeping
 /// them in the object file in such a way that the linker will skip them.
-#[derive(Clone, Copy, Debug, PartialEq, Hash)]
+#[derive(Clone, Copy, Debug, PartialEq, Hash, Encodable, Decodable)]
 pub enum SplitDwarfKind {
     /// Sections which do not require relocation are written into object file but ignored by the
     /// linker.
@@ -1539,7 +1539,7 @@ pub enum EntryFnType {
 
 pub use rustc_hir::attrs::CrateType;
 
-#[derive(Clone, Hash, Debug, PartialEq, Eq)]
+#[derive(Clone, Hash, Debug, PartialEq, Eq, Encodable, Decodable)]
 pub enum Passes {
     Some(Vec<String>),
     All,
diff --git a/compiler/rustc_target/src/spec/mod.rs b/compiler/rustc_target/src/spec/mod.rs
index 537185f536ab1..2d2f15651c431 100644
--- a/compiler/rustc_target/src/spec/mod.rs
+++ b/compiler/rustc_target/src/spec/mod.rs
@@ -1082,7 +1082,7 @@ crate::target_spec_enum! {
 }
 
 crate::target_spec_enum! {
-    #[derive(Default)]
+    #[derive(Default, Encodable, Decodable)]
     pub enum SplitDebuginfo {
         /// Split debug-information is disabled, meaning that on supported platforms
         /// you can find all debug information in the executable itself. This is