Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 63 additions & 28 deletions src/eval.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ use rustc_abi::ExternAbi;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_hir::def::Namespace;
use rustc_hir::def_id::DefId;
use rustc_middle::ty::layout::LayoutCx;
use rustc_middle::ty::layout::{HasTyCtxt, HasTypingEnv, LayoutCx};
use rustc_middle::ty::{self, Ty, TyCtxt};
use rustc_session::config::EntryFnType;

use crate::concurrency::GenmcCtx;
use crate::concurrency::thread::TlsAllocAction;
use crate::diagnostics::report_leaks;
use crate::shims::tls;
use crate::shims::{ctor, tls};
use crate::*;

#[derive(Copy, Clone, Debug)]
Expand Down Expand Up @@ -216,9 +216,15 @@ impl Default for MiriConfig {
}

/// The state of the main thread. Implementation detail of `on_main_stack_empty`.
#[derive(Default, Debug)]
#[derive(Debug)]
enum MainThreadState<'tcx> {
#[default]
GlobalCtors {
ctor_state: ctor::GlobalCtorState<'tcx>,
entry_id: DefId,
entry_type: MiriEntryFnType,
argc: ImmTy<'tcx>,
argv: ImmTy<'tcx>,
},
Running,
TlsDtors(tls::TlsDtorsState<'tcx>),
Yield {
Expand All @@ -234,6 +240,15 @@ impl<'tcx> MainThreadState<'tcx> {
) -> InterpResult<'tcx, Poll<()>> {
use MainThreadState::*;
match self {
GlobalCtors { ctor_state, entry_id, entry_type, argc, argv } => {
match ctor_state.on_stack_empty(this)? {
Poll::Pending => {} // just keep going
Poll::Ready(()) => {
call_main(this, *entry_id, *entry_type, argc.clone(), argv.clone())?;
*self = Running;
}
}
}
Running => {
*self = TlsDtors(Default::default());
}
Expand Down Expand Up @@ -309,26 +324,6 @@ pub fn create_ecx<'tcx>(
MiriMachine::new(config, layout_cx, genmc_ctx),
);

// Some parts of initialization require a full `InterpCx`.
MiriMachine::late_init(&mut ecx, config, {
let mut state = MainThreadState::default();
// Cannot capture anything GC-relevant here.
Box::new(move |m| state.on_main_stack_empty(m))
})?;

// Make sure we have MIR. We check MIR for some stable monomorphic function in libcore.
let sentinel =
helpers::try_resolve_path(tcx, &["core", "ascii", "escape_default"], Namespace::ValueNS);
if !matches!(sentinel, Some(s) if tcx.is_mir_available(s.def.def_id())) {
tcx.dcx().fatal(
"the current sysroot was built without `-Zalways-encode-mir`, or libcore seems missing. \
Use `cargo miri setup` to prepare a sysroot that is suitable for Miri."
);
}

// Setup first stack frame.
let entry_instance = ty::Instance::mono(tcx, entry_id);

// First argument is constructed later, because it's skipped for `miri_start.`

// Second argument (argc): length of `config.args`.
Expand Down Expand Up @@ -395,11 +390,51 @@ pub fn create_ecx<'tcx>(
ImmTy::from_immediate(imm, layout)
};

// Some parts of initialization require a full `InterpCx`.
MiriMachine::late_init(&mut ecx, config, {
let mut state = MainThreadState::GlobalCtors {
entry_id,
entry_type,
argc,
argv,
ctor_state: ctor::GlobalCtorState::default(),
};

// Cannot capture anything GC-relevant here.
Box::new(move |m| state.on_main_stack_empty(m))
})?;

// Make sure we have MIR. We check MIR for some stable monomorphic function in libcore.
let sentinel =
helpers::try_resolve_path(tcx, &["core", "ascii", "escape_default"], Namespace::ValueNS);
if !matches!(sentinel, Some(s) if tcx.is_mir_available(s.def.def_id())) {
tcx.dcx().fatal(
"the current sysroot was built without `-Zalways-encode-mir`, or libcore seems missing. \
Use `cargo miri setup` to prepare a sysroot that is suitable for Miri."
);
}

interp_ok(ecx)
}

// Call the entry function.
fn call_main<'tcx>(
ecx: &mut MiriInterpCx<'tcx>,
entry_id: DefId,
entry_type: MiriEntryFnType,
argc: ImmTy<'tcx>,
argv: ImmTy<'tcx>,
) -> InterpResult<'tcx, ()> {
let tcx = ecx.tcx();

// Setup first stack frame.
let entry_instance = ty::Instance::mono(tcx, entry_id);

// Return place (in static memory so that it does not count as leak).
let ret_place = ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
ecx.machine.main_fn_ret_place = Some(ret_place.clone());
// Call start function.

// Call start function.
match entry_type {
MiriEntryFnType::Rustc(EntryFnType::Main { .. }) => {
let start_id = tcx.lang_items().start_fn().unwrap_or_else(|| {
Expand All @@ -409,7 +444,7 @@ pub fn create_ecx<'tcx>(
let main_ret_ty = main_ret_ty.no_bound_vars().unwrap();
let start_instance = ty::Instance::try_resolve(
tcx,
typing_env,
ecx.typing_env(),
start_id,
tcx.mk_args(&[ty::GenericArg::from(main_ret_ty)]),
)
Expand All @@ -427,7 +462,7 @@ pub fn create_ecx<'tcx>(
ExternAbi::Rust,
&[
ImmTy::from_scalar(
Scalar::from_pointer(main_ptr, &ecx),
Scalar::from_pointer(main_ptr, ecx),
// FIXME use a proper fn ptr type
ecx.machine.layouts.const_raw_ptr,
),
Expand All @@ -450,7 +485,7 @@ pub fn create_ecx<'tcx>(
}
}

interp_ok(ecx)
interp_ok(())
}

/// Evaluates the entry function specified by `entry_id`.
Expand Down
9 changes: 6 additions & 3 deletions src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1235,8 +1235,11 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
interp_ok(())
}

/// Lookup an array of immediates stored as a linker section of name `name`.
fn lookup_link_section(&mut self, name: &str) -> InterpResult<'tcx, Vec<ImmTy<'tcx>>> {
/// Lookup an array of immediates from any linker sections matching the provided predicate.
fn lookup_link_section(
&mut self,
include_name: impl Fn(&str) -> bool,
) -> InterpResult<'tcx, Vec<ImmTy<'tcx>>> {
let this = self.eval_context_mut();
let tcx = this.tcx.tcx;

Expand All @@ -1247,7 +1250,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
let Some(link_section) = attrs.link_section else {
return interp_ok(());
};
if link_section.as_str() == name {
if include_name(link_section.as_str()) {
let instance = ty::Instance::mono(tcx, def_id);
let const_val = this.eval_global(instance).unwrap_or_else(|err| {
panic!(
Expand Down
94 changes: 94 additions & 0 deletions src/shims/ctor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
//! Implement global constructors.
use std::task::Poll;

use rustc_abi::ExternAbi;
use rustc_target::spec::BinaryFormat;

use crate::*;

#[derive(Debug, Default)]
pub struct GlobalCtorState<'tcx>(GlobalCtorStatePriv<'tcx>);

#[derive(Debug, Default)]
enum GlobalCtorStatePriv<'tcx> {
#[default]
Init,
/// The list of constructor functions that we still have to call.
Ctors(Vec<ImmTy<'tcx>>),
Done,
}

impl<'tcx> GlobalCtorState<'tcx> {
pub fn on_stack_empty(
&mut self,
this: &mut MiriInterpCx<'tcx>,
) -> InterpResult<'tcx, Poll<()>> {
use GlobalCtorStatePriv::*;
let new_state = 'new_state: {
match &mut self.0 {
Init => {
let this = this.eval_context_mut();

// Lookup constructors from the relevant magic link section.
let ctors = match this.tcx.sess.target.binary_format {
// Read the CRT library section on Windows.
BinaryFormat::Coff =>
this.lookup_link_section(|section| section == ".CRT$XCU")?,

// Read the `__mod_init_func` section on macOS.
BinaryFormat::MachO =>
this.lookup_link_section(|section| {
let mut parts = section.splitn(3, ',');
let (segment_name, section_name, section_type) =
(parts.next(), parts.next(), parts.next());

segment_name == Some("__DATA")
&& section_name == Some("__mod_init_func")
// The `mod_init_funcs` directive ensures that the `S_MOD_INIT_FUNC_POINTERS` flag
// is set on the section, but it is not strictly required.
&& matches!(section_type, None | Some("mod_init_funcs"))
})?,

// Read the standard `.init_array` section on platforms that use ELF, or WASM,
// which supports the same linker directive.
// FIXME: Add support for `.init_array.N`.
BinaryFormat::Elf | BinaryFormat::Wasm =>
this.lookup_link_section(|section| section == ".init_array")?,

// Other platforms have no global ctor support.
_ => break 'new_state Done,
};

break 'new_state Ctors(ctors);
}
Ctors(ctors) => {
if let Some(ctor) = ctors.pop() {
let this = this.eval_context_mut();

let ctor = ctor.to_scalar().to_pointer(this)?;
let thread_callback = this.get_ptr_fn(ctor)?.as_instance()?;

// The signature of this function is `unsafe extern "C" fn()`.
this.call_function(
thread_callback,
ExternAbi::C { unwind: false },
&[],
None,
ReturnContinuation::Stop { cleanup: true },
)?;

return interp_ok(Poll::Pending); // we stay in this state (but `ctors` got shorter)
}

// No more constructors to run.
break 'new_state Done;
}
Done => return interp_ok(Poll::Ready(())),
}
};

self.0 = new_state;
interp_ok(Poll::Pending)
}
}
1 change: 1 addition & 0 deletions src/shims/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ mod wasi;
mod windows;
mod x86;

pub mod ctor;
pub mod env;
pub mod extern_static;
pub mod foreign_items;
Expand Down
2 changes: 1 addition & 1 deletion src/shims/tls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {

// Windows has a special magic linker section that is run on certain events.
// We don't support most of that, but just enough to make thread-local dtors in `std` work.
interp_ok(this.lookup_link_section(".CRT$XLB")?)
interp_ok(this.lookup_link_section(|section| section == ".CRT$XLB")?)
}

fn schedule_windows_tls_dtor(&mut self, dtor: ImmTy<'tcx>) -> InterpResult<'tcx> {
Expand Down
43 changes: 43 additions & 0 deletions tests/pass/shims/ctor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
use std::sync::atomic::{AtomicUsize, Ordering};

static COUNT: AtomicUsize = AtomicUsize::new(0);

unsafe extern "C" fn ctor() {
COUNT.fetch_add(1, Ordering::Relaxed);
}

macro_rules! ctor {
($ident:ident = $ctor:ident) => {
#[cfg_attr(
all(any(
target_os = "linux",
target_os = "android",
target_os = "dragonfly",
target_os = "freebsd",
target_os = "haiku",
target_os = "illumos",
target_os = "netbsd",
target_os = "openbsd",
target_os = "solaris",
target_os = "none",
target_family = "wasm",
)),
link_section = ".init_array"
)]
#[cfg_attr(windows, link_section = ".CRT$XCU")]
#[cfg_attr(
any(target_os = "macos", target_os = "ios"),
link_section = "__DATA,__mod_init_func"
Copy link
Member

@bjorn3 bjorn3 Jul 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
link_section = "__DATA,__mod_init_func"
link_section = "__DATA,__mod_init_func,mod_init_funcs"

Mach-O requires the S_MOD_INIT_FUNC_POINTERS section flag (mod_init_funcs in link_section). LLVM was accidentally adding this for __DATA,__mod_init_func making it work without explicitly adding ,mod_init_funcs, but I don't know if it guarantees this. See also rust-lang/rustc_codegen_cranelift#1588

Copy link
Member Author

@ibraheemdev ibraheemdev Jul 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the Mach-O reference, it looks like mod_init_funcs is not strictly required for the S_MOD_INIT_FUNC_POINTERS to be set. It's possible that this is outdated and retained for backwards compatibility.

S_MOD_INIT_FUNC_POINTERS—This section contains pointers to module initialization functions.
The standard tools create __DATA,__mod_init_funcsections of this type

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docs seem to say that S_MOD_INIT_FUNC_POINTERS indicates init functions, which sounds to me like the flag is required to designate a section of init functions?

Copy link
Member Author

@ibraheemdev ibraheemdev Jul 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the flag is implied on __DATA,__mod_init_func sections, but I'm not sure. LLVM seems to implement it that way and the ecosystem relies on it, so I think it makes sense for Miri to follow.

For this specific test I don't think it matters much whether we specify the flag given that we support it either way, but I can add it if you prefer.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it is questionable whether the version without the extra tag should work (as opposed to "happens to work due to undocumented LLVM accidents"), then at the very least we need a FIXME comment here clarifying this.

the ecosystem relies on it

Do you have a link for that?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ctor and inventory crates do not set it explicitly.

I can add a comment to the code. I'm not even sure where to find the official Mach-O reference, all I can find are outdated mirrors.

Copy link
Member

@bjorn3 bjorn3 Jul 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LLVM currently implies the flag for __DATA,__mod_init_func (https://github.com/llvm/llvm-project/blob/2e8e254d18f51b6ca898bf0b1e4d12109b5b16c7/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp#L1292-L1304), but I don't know if it actually guarantees this. The implication is because it creates a section with this name with the flag set somewhere and then merges the attributes of all definitions of the same section.

Edit: llvm/llvm-project@cb307a2 seems to have introduced this behavior as part of moving away from using the .mod_init_func assembly directive which implied this flag.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added a comment, that should suffice for now -- thanks for the links!

)]
#[used]
static $ident: unsafe extern "C" fn() = $ctor;
};
}

ctor! { CTOR1 = ctor }
ctor! { CTOR2 = ctor }
ctor! { CTOR3 = ctor }

fn main() {
assert_eq!(COUNT.load(Ordering::Relaxed), 3, "ctors did not run");
}
Loading