Skip to content

Commit

Permalink
In resume instruction, update StackLimits from generated code (#223)
Browse files Browse the repository at this point in the history
This PR is part of a series that implements native stack switching.

This particular PR moves one particular aspect out of the `tc_resume`
libcall: Updating the `StackLimits` object of the parent of the
continuation being resumed.

Doing this *inside* the libcall had practical reasons: We needed access
to the updated `last_wasm_exit_pc`/`last_wasm_exit_fp` value in the
`VMRuntimeLimits` in order to copy them into the `StackLimits`. However,
these values are only updated by the libcall mechanism itself, so they
are only available inside the libcall implementation itself.

This PR obtains the corresponding values using code generated for
`resume`, before the actual `tc_resume` libcall happening, and writes
them into the `StackLimits`.

1. We use the `get_frame_pointer` instruction to obtain a value for
`last_wasm_exit_fp`.
2. The value for `last_wasm_exit_pc` is obtained using a new CLIF
instruction, `get_instruction_pointer`. All this does is giving us some
instruction pointer that is guaranteed to be associated with the current
Wasm instruction being translated (i.e., `resume` in our case). While
this means that we will write slightly different values for
`last_wasm_exit_pc` into the `StackLimits` than before, this difference
does not matter at all for backtrace creation. `last_wasm_exit_pc` is
never used for control flow (i.e., it is never branched to), all that
matters is what Wasm instruction it is associated with.

I consider this to be a workaround, once native stack switching is fully
rolled out it would be nice to overhaul the whole backtrace generation
mechanism in the longer term. But this PR's goal is to make it possible
to move to native stack switching with as little changes to backtrace
creation as possible.
  • Loading branch information
frank-emrich authored Sep 11, 2024
1 parent 8a25022 commit 46da60f
Show file tree
Hide file tree
Showing 11 changed files with 146 additions and 41 deletions.
26 changes: 26 additions & 0 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1322,6 +1322,32 @@ pub(crate) fn define(
.operands_out(vec![Operand::new("addr", iAddr)]),
);

// NOTE(frank-emrich)
// This is only used as part of a temporary workaround while our
// cross-continuation implementation of backtraces is built around the
// assumption that we "exit" Wasm on resume.
// The returned instruction pointer is never used for actual control flow
// (i.e., we never branch to it), but only for the construction of
// backtraces.
//
// We conservatively give it all kinds of side-effects to avoid it being
// moved around too much, but all that matters anyway is that during the
// Wasm -> CLIF translation, this CLIF instruction is associated with the
// current Wasm instruction.
ig.push(
Inst::new(
"get_instruction_pointer",
r#"
Get the instruction pointer at this instruction.
"#,
&formats.nullary,
)
.operands_out(vec![Operand::new("addr", iAddr)])
.other_side_effects()
.can_load()
.can_store(),
);

ig.push(
Inst::new(
"iconst",
Expand Down
3 changes: 3 additions & 0 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,9 @@
;; this program point.
(Unwind (inst UnwindInst))

;; Writes the current PC into dst
(GetRip (dst WritableGpr))

;; A pseudoinstruction that just keeps a value alive.
(DummyUse (reg Reg))))

Expand Down
9 changes: 9 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4348,6 +4348,15 @@ pub(crate) fn emit(
Inst::DummyUse { .. } => {
// Nothing.
}

Inst::GetRip { dst } => {
let here = sink.get_label();
sink.bind_label(here, state.ctrl_plane_mut());
let amode = Amode::RipRelative { target: here };
let dst = dst.map(|gpr| Reg::from(gpr));
let inst = Inst::lea(amode, dst);
inst.emit(sink, info, state);
}
}

state.clear_post_insn();
Expand Down
9 changes: 9 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ impl Inst {
| Inst::CoffTlsGetAddr { .. }
| Inst::Unwind { .. }
| Inst::DummyUse { .. }
| Inst::GetRip { .. }
| Inst::AluConstOp { .. } => smallvec![],

Inst::AluRmRVex { op, .. } => op.available_from(),
Expand Down Expand Up @@ -1889,6 +1890,10 @@ impl PrettyPrint for Inst {
let reg = pretty_print_reg(*reg, 8);
format!("dummy_use {reg}")
}

Inst::GetRip { .. } => {
format!("get_rip")
}
}
}
}
Expand Down Expand Up @@ -2538,6 +2543,10 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
Inst::DummyUse { reg } => {
collector.reg_use(reg);
}

Inst::GetRip { dst } => {
collector.reg_def(dst);
}
}
}

Expand Down
5 changes: 5 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3410,6 +3410,11 @@
(Amode.ImmReg 8 (x64_rbp) (mem_flags_trusted))
(ExtKind.None)))

(rule (lower (get_instruction_pointer))
(let ((dst WritableGpr (temp_writable_gpr))
(_ Unit (emit (MInst.GetRip dst))))
(value_reg dst)))

;; Rules for `jump` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower_branch (jump _) (single_target target))
Expand Down
2 changes: 2 additions & 0 deletions cranelift/codegen/src/isa/x64/pcc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,8 @@ pub(crate) fn check(
Inst::Unwind { .. } | Inst::DummyUse { .. } => Ok(()),

Inst::StackSwitchBasic { .. } => Err(PccError::UnimplementedInst),

Inst::GetRip { .. } => Err(PccError::UnimplementedInst),
}
}

Expand Down
1 change: 1 addition & 0 deletions cranelift/interpreter/src/step.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1283,6 +1283,7 @@ where
Opcode::X86Pmulhrsw => unimplemented!("X86Pmulhrsw"),
Opcode::X86Pmaddubsw => unimplemented!("X86Pmaddubsw"),
Opcode::X86Cvtt2dq => unimplemented!("X86Cvtt2dq"),
Opcode::GetInstructionPointer => unimplemented!("GetInstructionPointer"),
Opcode::StackSwitch => unimplemented!("StackSwitch"),
})
}
Expand Down
103 changes: 88 additions & 15 deletions crates/cranelift/src/wasmfx/optimized.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1078,11 +1078,18 @@ pub(crate) mod typed_continuation_helpers {
/// Overwrites the `last_wasm_entry_sp` field of the `StackLimits`
/// object associated with this stack chain by loading the corresponding
/// field from the `VMRuntimeLimits`.
/// If `load_stack_limit` is true, we do the same for the `stack_limit`
/// field.
/// If `wasm_exit_fp`/`wasm_exit_pc` values are provided, we use them to
/// overwrite the respective fields in the `StackLimits`.
pub fn load_limits_from_vmcontext<'a>(
&self,
env: &mut crate::func_environ::FuncEnvironment<'a>,
builder: &mut FunctionBuilder,
vmruntime_limits_ptr: ir::Value,
load_stack_limit: bool,
wasm_exit_fp: Option<ir::Value>,
wasm_exit_pc: Option<ir::Value>,
) {
use wasmtime_continuations::offsets as o;

Expand All @@ -1091,18 +1098,49 @@ pub(crate) mod typed_continuation_helpers {
let memflags = ir::MemFlags::trusted();
let pointer_size = self.pointer_type.bytes() as u8;

let last_wasm_entry_sp = builder.ins().load(
self.pointer_type,
memflags,
vmruntime_limits_ptr,
let mut copy = |runtime_limits_offset, stack_limits_offset| {
let from_vm_runtime_limits = builder.ins().load(
self.pointer_type,
memflags,
vmruntime_limits_ptr,
runtime_limits_offset,
);
builder.ins().store(
memflags,
from_vm_runtime_limits,
stack_limits_ptr,
stack_limits_offset as i32,
);
};
copy(
pointer_size.vmruntime_limits_last_wasm_entry_sp(),
o::stack_limits::LAST_WASM_ENTRY_SP,
);
builder.ins().store(
memflags,
last_wasm_entry_sp,
stack_limits_ptr,
o::stack_limits::LAST_WASM_ENTRY_SP as i32,
);

if load_stack_limit {
copy(
pointer_size.vmruntime_limits_stack_limit(),
o::stack_limits::STACK_LIMIT,
);
}

wasm_exit_fp.inspect(|wasm_exit_fp| {
builder.ins().store(
memflags,
*wasm_exit_fp,
stack_limits_ptr,
o::stack_limits::LAST_WASM_EXIT_FP as i32,
);
});

wasm_exit_pc.inspect(|wasm_exit_pc| {
builder.ins().store(
memflags,
*wasm_exit_pc,
stack_limits_ptr,
o::stack_limits::LAST_WASM_EXIT_PC as i32,
);
});
}
}
}
Expand Down Expand Up @@ -1502,19 +1540,48 @@ pub(crate) fn translate_resume<'a>(
// See the comment on `wasmtime_continuations::StackChain` for a
// description of the invariants that we maintain for the various stack
// limits.
let parent_stacks_limit_pointer = parent_stack_chain.get_stack_limits_ptr(env, builder);

// We mark `resume_contref` to be invoked
let co = tc::VMContRef::new(resume_contref, env.pointer_type());
co.set_state(builder, wasmtime_continuations::State::Invoked);

// We update the `StackLimits` of the parent of the continuation to be resumed
// as well as the `VMRuntimeLimits`.
// See the comment on `wasmtime_continuations::StackChain` for a description
// of the invariants that we maintain for the various stack limits.
// NOTE(frank-emrich) The `last_wasm_exit_pc` field in the `StackLimits`
// of the active continuation is only used for the purposes of backtrace
// creation, it does not affect control flow at all.
// All that matters is that it must contain an arbitrary PC that
// Wasmtime has associated with the current Wasm `resume` instruction
// being translated. Previously, the value for this field was obtained
// inside the `tc_resume` libcall: The `tc_libcall` would automaticall
// set libcall `lasm_wasm_exit_pc` in the `VMRuntimeLimits` to the
// return address of the libcall, which would indeed be a PC within the
// translation of `resume`. We now set the value of `last_wasm_exit_pc`
// directly in generated code by using the get_instruction_pointer CLIF
// instruction.
let vm_runtime_limits_ptr = vmctx.load_vm_runtime_limits_ptr(env, builder);
let last_wasm_exit_fp = builder.ins().get_frame_pointer(env.pointer_type());
let last_wasm_exit_pc = builder.ins().get_instruction_pointer(env.pointer_type());
parent_stack_chain.load_limits_from_vmcontext(
env,
builder,
vm_runtime_limits_ptr,
true,
Some(last_wasm_exit_fp),
Some(last_wasm_exit_pc),
);
let resume_stackchain =
tc::StackChain::from_continuation(builder, resume_contref, env.pointer_type());
resume_stackchain.write_limits_to_vmcontext(env, builder, vm_runtime_limits_ptr);

call_builtin!(
builder,
env,
let result =
tc_resume(
resume_contref,
parent_stacks_limit_pointer)
resume_contref)
);

emit_debug_println!(
Expand All @@ -1526,7 +1593,6 @@ pub(crate) fn translate_resume<'a>(

// Now the parent contref (or main stack) is active again
vmctx.store_stack_chain(env, builder, &parent_stack_chain);
let vm_runtime_limits_ptr = vmctx.load_vm_runtime_limits_ptr(env, builder);

// Extract the result and signal bit.
let result = ControlEffect::new(result);
Expand Down Expand Up @@ -1559,7 +1625,14 @@ pub(crate) fn translate_resume<'a>(
// We store parts of the VMRuntimeLimits into the continuation that just suspended.
let suspended_chain =
tc::StackChain::from_continuation(builder, resume_contref, env.pointer_type());
suspended_chain.load_limits_from_vmcontext(env, builder, vm_runtime_limits_ptr);
suspended_chain.load_limits_from_vmcontext(
env,
builder,
vm_runtime_limits_ptr,
false,
None,
None,
);

// Afterwards (!), restore parts of the VMRuntimeLimits from the
// parent of the suspended continuation (which is now active).
Expand Down
2 changes: 1 addition & 1 deletion crates/environ/src/builtin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ macro_rules! foreach_builtin_function {
tc_cont_new(vmctx: vmctx, r: pointer, param_count: i32, result_count: i32) -> pointer;
// Resumes a continuation. The result value is of type
// wasmtime_continuations::SwitchDirection.
tc_resume(vmctx: vmctx, contref: pointer, parent_stack_limits: pointer) -> pointer;
tc_resume(vmctx: vmctx, contref: pointer) -> pointer;
// Suspends a continuation.
tc_suspend(vmctx: vmctx, tag: pointer);

Expand Down
20 changes: 1 addition & 19 deletions crates/wasmtime/src/runtime/vm/continuation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,6 @@ pub mod optimized {
pub fn resume(
instance: &mut Instance,
contref: *mut VMContRef,
parent_stack_limits: *mut StackLimits,
) -> Result<ControlEffect, TrapReason> {
let cont = unsafe {
contref.as_ref().ok_or_else(|| {
Expand Down Expand Up @@ -335,22 +334,6 @@ pub mod optimized {
}
}

// See the comment on `wasmtime_continuations::StackChain` for a description
// of the invariants that we maintain for the various stack limits.
unsafe {
let runtime_limits = &**instance.runtime_limits();

(*parent_stack_limits).stack_limit = *runtime_limits.stack_limit.get();
(*parent_stack_limits).last_wasm_entry_sp = *runtime_limits.last_wasm_entry_sp.get();
// These last two values were only just updated in the `runtime_limits`
// because we entered the current libcall.
(*parent_stack_limits).last_wasm_exit_fp = *runtime_limits.last_wasm_exit_fp.get();
(*parent_stack_limits).last_wasm_exit_pc = *runtime_limits.last_wasm_exit_pc.get();

*runtime_limits.stack_limit.get() = (*contref).limits.stack_limit;
*runtime_limits.last_wasm_entry_sp.get() = (*contref).limits.last_wasm_entry_sp;
}

Ok(cont.stack.resume())
}

Expand Down Expand Up @@ -934,7 +917,7 @@ pub mod stack_chain {
#[cfg(feature = "wasmfx_baseline")]
pub mod optimized {
use crate::runtime::vm::{Instance, TrapReason};
pub use wasmtime_continuations::{ControlEffect, StackLimits};
pub use wasmtime_continuations::ControlEffect;

pub type VMContRef = super::baseline::VMContRef;

Expand Down Expand Up @@ -964,7 +947,6 @@ pub mod optimized {
pub fn resume(
_instance: &mut Instance,
_contref: *mut VMContRef,
_parent_stack_limits: *mut StackLimits,
) -> Result<ControlEffect, TrapReason> {
panic!("attempt to execute continuation::optimized::resume with `typed_continuation_baseline_implementation` toggled!")
}
Expand Down
7 changes: 1 addition & 6 deletions crates/wasmtime/src/runtime/vm/libcalls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -931,15 +931,10 @@ fn tc_cont_new(
Ok(ans.cast::<u8>())
}

fn tc_resume(
instance: &mut Instance,
contref: *mut u8,
parent_stack_limits: *mut u8,
) -> Result<*mut u8, TrapReason> {
fn tc_resume(instance: &mut Instance, contref: *mut u8) -> Result<*mut u8, TrapReason> {
crate::vm::continuation::optimized::resume(
instance,
contref.cast::<crate::vm::continuation::optimized::VMContRef>(),
parent_stack_limits.cast::<crate::vm::continuation::optimized::StackLimits>(),
)
.map(|reason| reason.into())
}
Expand Down

0 comments on commit 46da60f

Please sign in to comment.