From 77458752f5e38f843c9f7029763ec770bbf35daa Mon Sep 17 00:00:00 2001 From: Syrus Date: Thu, 3 Dec 2020 21:17:46 -0800 Subject: [PATCH 1/6] Added address map instructions for traps in singlepass --- lib/compiler-singlepass/src/address_map.rs | 23 +++++ lib/compiler-singlepass/src/codegen_x64.rs | 109 +++++++++++++++++---- lib/compiler-singlepass/src/compiler.rs | 3 +- lib/compiler-singlepass/src/lib.rs | 1 + 4 files changed, 115 insertions(+), 21 deletions(-) create mode 100644 lib/compiler-singlepass/src/address_map.rs diff --git a/lib/compiler-singlepass/src/address_map.rs b/lib/compiler-singlepass/src/address_map.rs new file mode 100644 index 00000000000..5253066a66f --- /dev/null +++ b/lib/compiler-singlepass/src/address_map.rs @@ -0,0 +1,23 @@ +// This file contains code from external sources. +// Attributions: https://github.com/wasmerio/wasmer/blob/master/ATTRIBUTIONS.md + +use wasmer_compiler::{FunctionAddressMap, FunctionBodyData, InstructionAddressMap, SourceLoc}; + +pub fn get_function_address_map<'data>( + instructions: Vec, + data: &FunctionBodyData<'data>, + body_len: usize, +) -> FunctionAddressMap { + // Generate artificial srcloc for function start/end to identify boundary + // within module. It will wrap around if byte code is larger than 4 GB. + let start_srcloc = SourceLoc::new(data.module_offset as u32); + let end_srcloc = SourceLoc::new((data.module_offset + data.data.len()) as u32); + + FunctionAddressMap { + instructions, + start_srcloc, + end_srcloc, + body_offset: 0, + body_len, + } +} diff --git a/lib/compiler-singlepass/src/codegen_x64.rs b/lib/compiler-singlepass/src/codegen_x64.rs index 150c5788610..dd0a06d6f31 100644 --- a/lib/compiler-singlepass/src/codegen_x64.rs +++ b/lib/compiler-singlepass/src/codegen_x64.rs @@ -1,3 +1,4 @@ +use crate::address_map::get_function_address_map; use crate::{common_decl::*, config::Singlepass, emitter_x64::*, machine::Machine, x64_decl::*}; use dynasmrt::{x64::Assembler, DynamicLabel}; use smallvec::{smallvec, SmallVec}; @@ -8,8 +9,8 @@ use wasmer_compiler::wasmparser::{ }; use wasmer_compiler::{ CompiledFunction, CompiledFunctionFrameInfo, CustomSection, CustomSectionProtection, - FunctionBody, Relocation, RelocationKind, RelocationTarget, SectionBody, SectionIndex, - TrapInformation, + FunctionBody, FunctionBodyData, InstructionAddressMap, Relocation, RelocationKind, + RelocationTarget, SectionBody, SectionIndex, SourceLoc, TrapInformation, }; use wasmer_types::{ entity::{EntityRef, PrimaryMap, SecondaryMap}, @@ -80,6 +81,12 @@ pub struct FuncGen<'a> { /// A set of special labels for trapping. special_labels: SpecialLabelSet, + + /// The source location for the current operator + src_loc: u32, + + /// All the InstructionAddressMap + instructions_address_map: Vec, } struct SpecialLabelSet { @@ -251,6 +258,11 @@ struct I2O1 { } impl<'a> FuncGen<'a> { + /// Set the source location of the Wasm to the given offset + pub fn set_srcloc(&mut self, offset: u32) { + self.src_loc = offset; + } + fn get_location_released(&mut self, loc: Location) -> Location { self.machine.release_locations(&mut self.assembler, &[loc]); loc @@ -306,6 +318,11 @@ impl<'a> FuncGen<'a> { for i in begin..end { self.trap_table.offset_to_code.insert(i, code); } + self.instructions_address_map.push(InstructionAddressMap { + srcloc: SourceLoc::new(self.src_loc), + code_offset: begin, + code_len: end - begin, + }); ret } @@ -313,6 +330,11 @@ impl<'a> FuncGen<'a> { fn mark_address_with_trap_code(&mut self, code: TrapCode) { let offset = self.assembler.get_offset().0; self.trap_table.offset_to_code.insert(offset, code); + self.instructions_address_map.push(InstructionAddressMap { + srcloc: SourceLoc::new(self.src_loc), + code_offset: offset, + code_len: 2, // TODO: Check this length + }); } /// Canonicalizes the floating point value at `input` into `output`. @@ -379,16 +401,28 @@ impl<'a> FuncGen<'a> { Location::Imm64(_) | Location::Imm32(_) => { self.assembler.emit_mov(sz, loc, Location::GPR(GPR::RCX)); // must not be used during div (rax, rdx) self.mark_trappable(); + let offset = self.assembler.get_offset().0; + self.instructions_address_map.push(InstructionAddressMap { + srcloc: SourceLoc::new(self.src_loc), + code_offset: offset, + code_len: 2, // TODO: Check this length + }); self.trap_table .offset_to_code - .insert(self.assembler.get_offset().0, TrapCode::IntegerOverflow); + .insert(offset, TrapCode::IntegerOverflow); op(&mut self.assembler, sz, Location::GPR(GPR::RCX)); } _ => { self.mark_trappable(); + let offset = self.assembler.get_offset().0; + self.instructions_address_map.push(InstructionAddressMap { + srcloc: SourceLoc::new(self.src_loc), + code_offset: offset, + code_len: 2, // TODO: Check this length + }); self.trap_table .offset_to_code - .insert(self.assembler.get_offset().0, TrapCode::IntegerOverflow); + .insert(offset, TrapCode::IntegerOverflow); op(&mut self.assembler, sz, loc); } } @@ -1473,16 +1507,28 @@ impl<'a> FuncGen<'a> { ); self.assembler.emit_label(trap_overflow); + let offset = self.assembler.get_offset().0; + self.instructions_address_map.push(InstructionAddressMap { + srcloc: SourceLoc::new(self.src_loc), + code_offset: offset, + code_len: 2, // TODO: Check this length + }); self.trap_table .offset_to_code - .insert(self.assembler.get_offset().0, TrapCode::IntegerOverflow); + .insert(offset, TrapCode::IntegerOverflow); self.assembler.emit_ud2(); self.assembler.emit_label(trap_badconv); - self.trap_table.offset_to_code.insert( - self.assembler.get_offset().0, - TrapCode::BadConversionToInteger, - ); + + let offset = self.assembler.get_offset().0; + self.instructions_address_map.push(InstructionAddressMap { + srcloc: SourceLoc::new(self.src_loc), + code_offset: offset, + code_len: 2, // TODO: Check this length + }); + self.trap_table + .offset_to_code + .insert(offset, TrapCode::BadConversionToInteger); self.assembler.emit_ud2(); self.assembler.emit_label(end); @@ -1622,16 +1668,27 @@ impl<'a> FuncGen<'a> { ); self.assembler.emit_label(trap_overflow); + let offset = self.assembler.get_offset().0; + self.instructions_address_map.push(InstructionAddressMap { + srcloc: SourceLoc::new(self.src_loc), + code_offset: offset, + code_len: 2, // TODO: Check this length + }); self.trap_table .offset_to_code - .insert(self.assembler.get_offset().0, TrapCode::IntegerOverflow); + .insert(offset, TrapCode::IntegerOverflow); self.assembler.emit_ud2(); self.assembler.emit_label(trap_badconv); - self.trap_table.offset_to_code.insert( - self.assembler.get_offset().0, - TrapCode::BadConversionToInteger, - ); + let offset = self.assembler.get_offset().0; + self.instructions_address_map.push(InstructionAddressMap { + srcloc: SourceLoc::new(self.src_loc), + code_offset: offset, + code_len: 2, // TODO: Check this length + }); + self.trap_table + .offset_to_code + .insert(offset, TrapCode::BadConversionToInteger); self.assembler.emit_ud2(); self.assembler.emit_label(end); @@ -1819,6 +1876,8 @@ impl<'a> FuncGen<'a> { trap_table: TrapTable::default(), relocations: vec![], special_labels, + src_loc: 0, + instructions_address_map: vec![], }; fg.emit_head()?; Ok(fg) @@ -6128,10 +6187,15 @@ impl<'a> FuncGen<'a> { } Operator::Unreachable => { self.mark_trappable(); - self.trap_table.offset_to_code.insert( - self.assembler.get_offset().0, - TrapCode::UnreachableCodeReached, - ); + let offset = self.assembler.get_offset().0; + self.instructions_address_map.push(InstructionAddressMap { + srcloc: SourceLoc::new(self.src_loc), + code_offset: offset, + code_len: 2, // TODO: Check this length + }); + self.trap_table + .offset_to_code + .insert(offset, TrapCode::UnreachableCodeReached); self.assembler.emit_ud2(); self.unreachable_depth = 1; } @@ -8125,7 +8189,7 @@ impl<'a> FuncGen<'a> { Ok(()) } - pub fn finalize(mut self) -> CompiledFunction { + pub fn finalize(mut self, data: &FunctionBodyData) -> CompiledFunction { // Generate actual code for special labels. self.assembler .emit_label(self.special_labels.integer_division_by_zero); @@ -8153,6 +8217,11 @@ impl<'a> FuncGen<'a> { // Notify the assembler backend to generate necessary code at end of function. self.assembler.finalize_function(); + + let body_len = self.assembler.get_offset().0; + let instructions_address_map = self.instructions_address_map; + let address_map = get_function_address_map(instructions_address_map, data, body_len); + CompiledFunction { body: FunctionBody { body: self.assembler.finalize().unwrap().to_vec(), @@ -8170,7 +8239,7 @@ impl<'a> FuncGen<'a> { trap_code: code, }) .collect(), - ..Default::default() + address_map, }, } } diff --git a/lib/compiler-singlepass/src/compiler.rs b/lib/compiler-singlepass/src/compiler.rs index 2c90a6d3a04..72107a9f3d6 100644 --- a/lib/compiler-singlepass/src/compiler.rs +++ b/lib/compiler-singlepass/src/compiler.rs @@ -106,11 +106,12 @@ impl Compiler for SinglepassCompiler { .map_err(to_compile_error)?; while generator.has_control_frames() { + generator.set_srcloc(reader.original_position() as u32); let op = reader.read_operator().map_err(to_compile_error)?; generator.feed_operator(op).map_err(to_compile_error)?; } - Ok(generator.finalize()) + Ok(generator.finalize(&input)) }) .collect::, CompileError>>()? .into_iter() diff --git a/lib/compiler-singlepass/src/lib.rs b/lib/compiler-singlepass/src/lib.rs index e549e7d1551..202c0b9da38 100644 --- a/lib/compiler-singlepass/src/lib.rs +++ b/lib/compiler-singlepass/src/lib.rs @@ -8,6 +8,7 @@ //! Compared to Cranelift and LLVM, Singlepass compiles much faster but has worse //! runtime performance. +mod address_map; mod codegen_x64; mod common_decl; mod compiler; From 55d001dab9877647715d1f413b550b9d642f2daf Mon Sep 17 00:00:00 2001 From: Syrus Date: Thu, 3 Dec 2020 21:17:55 -0800 Subject: [PATCH 2/6] Debug frameinfos --- lib/engine-jit/src/artifact.rs | 4 +--- lib/engine/src/trap/frame_info.rs | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/lib/engine-jit/src/artifact.rs b/lib/engine-jit/src/artifact.rs index e9d91e10bdc..b1cc5c610a2 100644 --- a/lib/engine-jit/src/artifact.rs +++ b/lib/engine-jit/src/artifact.rs @@ -99,9 +99,7 @@ impl JITArtifact { .collect::>() .into_boxed_slice(); - let frame_infos = compilation - .get_frame_info() - .values() + let frame_infos = dbg!(compilation.get_frame_info().values()) .map(|frame_info| SerializableFunctionFrameInfo::Processed(frame_info.clone())) .collect::>(); diff --git a/lib/engine/src/trap/frame_info.rs b/lib/engine/src/trap/frame_info.rs index 9d5585a03dc..67bc22adb65 100644 --- a/lib/engine/src/trap/frame_info.rs +++ b/lib/engine/src/trap/frame_info.rs @@ -108,19 +108,22 @@ impl GlobalFrameInfo { /// Returns an object if this `pc` is known to some previously registered /// module, or returns `None` if no information can be found. pub fn lookup_frame_info(&self, pc: usize) -> Option { + dbg!(pc); let module = self.module_info(pc)?; + println!("lookup_frame_info::module_info success"); let func = module.function_info(pc)?; + println!("lookup_frame_info::function_info success"); // Use our relative position from the start of the function to find the // machine instruction that corresponds to `pc`, which then allows us to // map that to a wasm original source location. - let rel_pos = pc - func.start; + let rel_pos = dbg!(pc - func.start); let instr_map = &module .processed_function_frame_info(func.local_index) .address_map; - let pos = match instr_map + let pos = match dbg!(instr_map .instructions - .binary_search_by_key(&rel_pos, |map| map.code_offset) + .binary_search_by_key(&rel_pos, |map| map.code_offset)) { // Exact hit! Ok(pos) => Some(pos), @@ -144,6 +147,8 @@ impl GlobalFrameInfo { } }; + dbg!(pos); + // In debug mode for now assert that we found a mapping for `pc` within // the function, because otherwise something is buggy along the way and // not accounting for all the instructions. This isn't super critical @@ -277,6 +282,11 @@ pub fn register( assert!(*prev_end < min); } + println!( + "Frame info ranges for this Module info are ({}, {})", + min, max + ); + // ... then insert our range and assert nothing was there previously let prev = info.ranges.insert( max, From 917eadb7846af12c45b60e474fa32b30a4eaeab1 Mon Sep 17 00:00:00 2001 From: Syrus Date: Thu, 3 Dec 2020 21:37:29 -0800 Subject: [PATCH 3/6] Improve debugging of FunctionInfo --- lib/engine/src/trap/frame_info.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/engine/src/trap/frame_info.rs b/lib/engine/src/trap/frame_info.rs index 67bc22adb65..0487018ca1e 100644 --- a/lib/engine/src/trap/frame_info.rs +++ b/lib/engine/src/trap/frame_info.rs @@ -97,6 +97,7 @@ impl ModuleInfoFrameInfo { } } +#[derive(Debug)] struct FunctionInfo { start: usize, local_index: LocalFunctionIndex, @@ -111,7 +112,7 @@ impl GlobalFrameInfo { dbg!(pc); let module = self.module_info(pc)?; println!("lookup_frame_info::module_info success"); - let func = module.function_info(pc)?; + let func = dbg!(module.function_info(pc)?); println!("lookup_frame_info::function_info success"); // Use our relative position from the start of the function to find the From 2178f27a470dd6c7b05cb8a9a5a2529ecf2c9d66 Mon Sep 17 00:00:00 2001 From: Syrus Date: Thu, 3 Dec 2020 22:09:40 -0800 Subject: [PATCH 4/6] Added stackoverflow traps on function calls in singlepass --- lib/compiler-singlepass/src/codegen_x64.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/lib/compiler-singlepass/src/codegen_x64.rs b/lib/compiler-singlepass/src/codegen_x64.rs index dd0a06d6f31..cf7ea8f1336 100644 --- a/lib/compiler-singlepass/src/codegen_x64.rs +++ b/lib/compiler-singlepass/src/codegen_x64.rs @@ -5224,6 +5224,15 @@ impl<'a> FuncGen<'a> { self.emit_call_sysv( |this| { + let offset = this.assembler.get_offset().0; + this.instructions_address_map.push(InstructionAddressMap { + srcloc: SourceLoc::new(this.src_loc), + code_offset: offset, + code_len: 3, + }); + this.trap_table + .offset_to_code + .insert(offset, TrapCode::StackOverflow); this.assembler.emit_call_location(Location::GPR(GPR::RAX)); }, params.iter().copied(), @@ -5417,6 +5426,15 @@ impl<'a> FuncGen<'a> { ), ); } else { + let offset = this.assembler.get_offset().0; + this.instructions_address_map.push(InstructionAddressMap { + srcloc: SourceLoc::new(this.src_loc), + code_offset: offset, + code_len: 3, + }); + this.trap_table + .offset_to_code + .insert(offset, TrapCode::StackOverflow); this.assembler.emit_call_location(Location::Memory( GPR::RAX, vmcaller_checked_anyfunc_func_ptr as i32, From cc0d595f7690a787460044e4e3dc28adbde9b7b6 Mon Sep 17 00:00:00 2001 From: Syrus Date: Thu, 3 Dec 2020 23:02:09 -0800 Subject: [PATCH 5/6] Fixed last tests in Linux --- lib/compiler-singlepass/src/codegen_x64.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/compiler-singlepass/src/codegen_x64.rs b/lib/compiler-singlepass/src/codegen_x64.rs index cf7ea8f1336..47fce9dd948 100644 --- a/lib/compiler-singlepass/src/codegen_x64.rs +++ b/lib/compiler-singlepass/src/codegen_x64.rs @@ -1811,6 +1811,17 @@ impl<'a> FuncGen<'a> { // TODO: Full preemption by explicit signal checking + // We insert set StackOverflow as the default trap that can happen + // anywhere in the function prologue for sake of simplicity. + self.instructions_address_map.push(InstructionAddressMap { + srcloc: SourceLoc::new(self.src_loc), + code_offset: 0, + code_len: self.assembler.get_offset().0, + }); + self.trap_table + .offset_to_code + .insert(0, TrapCode::StackOverflow); + if self.machine.state.wasm_inst_offset != std::usize::MAX { return Err(CodegenError { message: "emit_head: wasm_inst_offset not std::usize::MAX".to_string(), From 6eb897db0a4278e3f9d7147874b25e6e8572ccc3 Mon Sep 17 00:00:00 2001 From: Syrus Date: Thu, 3 Dec 2020 23:03:49 -0800 Subject: [PATCH 6/6] Revert "Debug frameinfos" This reverts commit 55d001dab9877647715d1f413b550b9d642f2daf. # Conflicts: # lib/engine/src/trap/frame_info.rs --- lib/engine-jit/src/artifact.rs | 4 +++- lib/engine/src/trap/frame_info.rs | 18 ++++-------------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/lib/engine-jit/src/artifact.rs b/lib/engine-jit/src/artifact.rs index b1cc5c610a2..e9d91e10bdc 100644 --- a/lib/engine-jit/src/artifact.rs +++ b/lib/engine-jit/src/artifact.rs @@ -99,7 +99,9 @@ impl JITArtifact { .collect::>() .into_boxed_slice(); - let frame_infos = dbg!(compilation.get_frame_info().values()) + let frame_infos = compilation + .get_frame_info() + .values() .map(|frame_info| SerializableFunctionFrameInfo::Processed(frame_info.clone())) .collect::>(); diff --git a/lib/engine/src/trap/frame_info.rs b/lib/engine/src/trap/frame_info.rs index 0487018ca1e..f929f39ff94 100644 --- a/lib/engine/src/trap/frame_info.rs +++ b/lib/engine/src/trap/frame_info.rs @@ -109,22 +109,19 @@ impl GlobalFrameInfo { /// Returns an object if this `pc` is known to some previously registered /// module, or returns `None` if no information can be found. pub fn lookup_frame_info(&self, pc: usize) -> Option { - dbg!(pc); let module = self.module_info(pc)?; - println!("lookup_frame_info::module_info success"); - let func = dbg!(module.function_info(pc)?); - println!("lookup_frame_info::function_info success"); + let func = module.function_info(pc)?; // Use our relative position from the start of the function to find the // machine instruction that corresponds to `pc`, which then allows us to // map that to a wasm original source location. - let rel_pos = dbg!(pc - func.start); + let rel_pos = pc - func.start; let instr_map = &module .processed_function_frame_info(func.local_index) .address_map; - let pos = match dbg!(instr_map + let pos = match instr_map .instructions - .binary_search_by_key(&rel_pos, |map| map.code_offset)) + .binary_search_by_key(&rel_pos, |map| map.code_offset) { // Exact hit! Ok(pos) => Some(pos), @@ -148,8 +145,6 @@ impl GlobalFrameInfo { } }; - dbg!(pos); - // In debug mode for now assert that we found a mapping for `pc` within // the function, because otherwise something is buggy along the way and // not accounting for all the instructions. This isn't super critical @@ -283,11 +278,6 @@ pub fn register( assert!(*prev_end < min); } - println!( - "Frame info ranges for this Module info are ({}, {})", - min, max - ); - // ... then insert our range and assert nothing was there previously let prev = info.ranges.insert( max,