diff --git a/lib/compiler-singlepass/src/emitter_x64.rs b/lib/compiler-singlepass/src/emitter_x64.rs index a4c0ceecdda..17e1818d6c1 100644 --- a/lib/compiler-singlepass/src/emitter_x64.rs +++ b/lib/compiler-singlepass/src/emitter_x64.rs @@ -117,6 +117,7 @@ pub trait Emitter { fn emit_xchg(&mut self, sz: Size, src: Location, dst: Location); fn emit_lock_xadd(&mut self, sz: Size, src: Location, dst: Location); fn emit_lock_cmpxchg(&mut self, sz: Size, src: Location, dst: Location); + fn emit_rep_stosq(&mut self); fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR); fn emit_btc_gpr_imm8_64(&mut self, src: u8, dst: GPR); @@ -1176,6 +1177,9 @@ impl Emitter for Assembler { } } + fn emit_rep_stosq(&mut self) { + dynasm!(self ; rep stosq); + } fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR) { dynasm!(self ; btc Rd(dst as u8), BYTE src as i8); } diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index 17374f8184b..7f446744cd7 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -3,9 +3,12 @@ use crate::emitter_x64::*; use crate::x64_decl::{new_machine_state, X64Register}; use smallvec::smallvec; use smallvec::SmallVec; +use std::cmp; use std::collections::HashSet; use wasmer_compiler::wasmparser::Type as WpType; +const NATIVE_PAGE_SIZE: usize = 4096; + struct MachineStackOffset(usize); pub struct Machine { @@ -447,11 +450,6 @@ impl Machine { } } - // Initialize all normal locals to zero. - for i in n_params..n { - a.emit_mov(Size::S64, Location::Imm32(0), locations[i]); - } - // Load vmctx into R15. a.emit_mov( Size::S64, @@ -459,6 +457,41 @@ impl Machine { Location::GPR(GPR::R15), ); + // Stack probe. + // + // `rep stosq` writes data from low address to high address and may skip the stack guard page. + // so here we probe it explicitly when needed. + for i in (n_params..n).step_by(NATIVE_PAGE_SIZE / 8).skip(1) { + a.emit_mov(Size::S64, Location::Imm32(0), locations[i]); + } + + // Initialize all normal locals to zero. + let mut init_stack_loc_cnt = 0; + let mut last_stack_loc = Location::Memory(GPR::RBP, i32::MAX); + for i in n_params..n { + match locations[i] { + Location::Memory(_, _) => { + init_stack_loc_cnt += 1; + last_stack_loc = cmp::min(last_stack_loc, locations[i]); + } + Location::GPR(_) => { + a.emit_mov(Size::S64, Location::Imm32(0), locations[i]); + } + _ => unreachable!(), + } + } + if init_stack_loc_cnt > 0 { + // Since these assemblies take up to 24 bytes, if more than 2 slots are initialized, then they are smaller. + a.emit_mov( + Size::S64, + Location::Imm64(init_stack_loc_cnt as u64), + Location::GPR(GPR::RCX), + ); + a.emit_xor(Size::S64, Location::GPR(GPR::RAX), Location::GPR(GPR::RAX)); + a.emit_lea(Size::S64, last_stack_loc, Location::GPR(GPR::RDI)); + a.emit_rep_stosq(); + } + // Add the size of all locals allocated to stack. self.stack_offset.0 += static_area_size - callee_saved_regs_size;