From 8ea7bfd56371cc4666d02baafd79f20d754851f6 Mon Sep 17 00:00:00 2001 From: losfair Date: Mon, 16 Mar 2020 00:43:19 +0800 Subject: [PATCH 01/19] NaN canonicalization by tracking values. --- lib/singlepass-backend/src/codegen_x64.rs | 1236 ++++++++++++++++----- 1 file changed, 943 insertions(+), 293 deletions(-) diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index 5903508fca1..1869b193509 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -40,7 +40,7 @@ use wasmer_runtime_core::{ typed_func::{Trampoline, Wasm}, types::{ FuncIndex, FuncSig, GlobalIndex, LocalFuncIndex, LocalOrImport, MemoryIndex, SigIndex, - TableIndex, Type, + TableIndex, Type, ImportedGlobalIndex, LocalGlobalIndex, }, vm::{self, LocalGlobal, LocalTable, INTERNALS_SIZE}, wasmparser::{MemoryImmediate, Operator, Type as WpType, TypeOrFuncType as WpTypeOrFuncType}, @@ -220,8 +220,12 @@ pub struct X64FunctionCode { returns: SmallVec<[WpType; 1]>, locals: Vec, num_params: usize, - num_locals: usize, + local_types: Vec, value_stack: Vec, + + /// Metadata about floating point values on the stack. + fp_stack: Vec, + control_stack: Vec, machine: Machine, unreachable_depth: usize, @@ -231,6 +235,92 @@ pub struct X64FunctionCode { exception_table: Option, } +/// Metadata about a floating-point value. +#[derive(Copy, Clone, Debug)] +struct FloatValue { + /// Do we need to canonicalize the value before its bit pattern is observable? If so, how? + canonicalization: Option, + + /// Corresponding depth in the main value stack. + depth: usize, +} + +impl FloatValue { + fn new(depth: usize) -> Self { + FloatValue { canonicalization: None, depth } + } + + fn cncl_f32(depth: usize) -> Self { + FloatValue { canonicalization: Some(CanonicalizeType::F32), depth } + } + + fn cncl_f64(depth: usize) -> Self { + FloatValue { canonicalization: Some(CanonicalizeType::F64), depth } + } + + fn promote(self, depth: usize) -> FloatValue { + FloatValue { + canonicalization: match self.canonicalization { + Some(_) => Some(CanonicalizeType::F64), + None => None, + }, + depth, + } + } + + fn demote(self, depth: usize) -> FloatValue { + FloatValue { + canonicalization: match self.canonicalization { + Some(_) => Some(CanonicalizeType::F32), + None => None, + }, + depth, + } + } +} + +#[derive(Copy, Clone, Debug)] +enum CanonicalizeType { + F32, + F64, +} + +trait PopMany { + fn peek1(&self) -> Result<&T, CodegenError>; + fn pop1(&mut self) -> Result; + fn pop2(&mut self) -> Result<(T, T), CodegenError>; +} + +impl PopMany for Vec { + fn peek1(&self) -> Result<&T, CodegenError> { + match self.last() { + Some(x) => Ok(x), + None => Err(CodegenError { + message: "peek1() expects at least 1 element".into(), + }), + } + } + fn pop1(&mut self) -> Result { + match self.pop() { + Some(x) => Ok(x), + None => Err(CodegenError { + message: "pop1() expects at least 1 element".into(), + }), + } + } + fn pop2(&mut self) -> Result<(T, T), CodegenError> { + if self.len() < 2 { + return Err(CodegenError { + message: "pop2() expects at least 2 elements".into(), + }); + } + + let right = self.pop().unwrap(); + let left = self.pop().unwrap(); + Ok((left, right)) + } +} + enum FuncPtrInner {} #[repr(transparent)] #[derive(Copy, Clone, Debug)] @@ -282,6 +372,7 @@ pub struct ControlFrame { pub if_else: IfElseState, pub returns: SmallVec<[WpType; 1]>, pub value_stack_depth: usize, + pub fp_stack_depth: usize, pub state: MachineState, pub state_diff_id: usize, } @@ -738,9 +829,10 @@ impl ModuleCodeGenerator breakpoints: Some(breakpoints), returns: smallvec![], locals: vec![], + local_types: vec![], num_params: 0, - num_locals: 0, value_stack: vec![], + fp_stack: vec![], control_stack: vec![], machine, unreachable_depth: 0, @@ -1103,6 +1195,67 @@ impl X64FunctionCode { ret } + /// Canonicalizes the floating point value at `input` into `output`. + fn canonicalize_nan( + a: &mut Assembler, + m: &mut Machine, + sz: Size, + input: Location, + output: Location, + ) { + let tmp1 = m.acquire_temp_xmm().unwrap(); + let tmp2 = m.acquire_temp_xmm().unwrap(); + let tmp3 = m.acquire_temp_xmm().unwrap(); + let tmpg1 = m.acquire_temp_gpr().unwrap(); + + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + sz, + input, + Location::XMM(tmp1), + ); + + match sz { + Size::S32 => { + a.emit_vcmpunordss(tmp1, XMMOrMemory::XMM(tmp1), tmp2); + a.emit_mov( + Size::S32, + Location::Imm32(0x7FC0_0000), // Canonical NaN + Location::GPR(tmpg1), + ); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp3)); + a.emit_vblendvps(tmp2, XMMOrMemory::XMM(tmp3), tmp1, tmp1); + }, + Size::S64 => { + a.emit_vcmpunordsd(tmp1, XMMOrMemory::XMM(tmp1), tmp2); + a.emit_mov( + Size::S64, + Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN + Location::GPR(tmpg1), + ); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp3)); + a.emit_vblendvpd(tmp2, XMMOrMemory::XMM(tmp3), tmp1, tmp1); + }, + _ => unreachable!(), + } + + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + sz, + Location::XMM(tmp1), + output, + ); + + m.release_temp_gpr(tmpg1); + m.release_temp_xmm(tmp3); + m.release_temp_xmm(tmp2); + m.release_temp_xmm(tmp1); + } + /// Moves `loc` to a valid location for `div`/`idiv`. fn emit_relaxed_xdiv( a: &mut Assembler, @@ -2506,14 +2659,16 @@ impl FunctionCodeGenerator for X64FunctionCode { Ok(()) } - fn feed_param(&mut self, _ty: WpType) -> Result<(), CodegenError> { + fn feed_param(&mut self, ty: WpType) -> Result<(), CodegenError> { self.num_params += 1; - self.num_locals += 1; + self.local_types.push(ty); Ok(()) } - fn feed_local(&mut self, _ty: WpType, n: usize, _loc: u32) -> Result<(), CodegenError> { - self.num_locals += n; + fn feed_local(&mut self, ty: WpType, n: usize, _loc: u32) -> Result<(), CodegenError> { + for _ in 0..n { + self.local_types.push(ty); + } Ok(()) } @@ -2550,7 +2705,7 @@ impl FunctionCodeGenerator for X64FunctionCode { self.locals = self .machine - .init_locals(a, self.num_locals, self.num_params); + .init_locals(a, self.local_types.len(), self.num_params); self.machine.state.register_values [X64Register::GPR(Machine::get_vmctx_reg()).to_index().0] = MachineValue::Vmctx; @@ -2578,6 +2733,7 @@ impl FunctionCodeGenerator for X64FunctionCode { if_else: IfElseState::None, returns: self.returns.clone(), value_stack_depth: 0, + fp_stack_depth: 0, state: self.machine.state.clone(), state_diff_id, }); @@ -2797,10 +2953,17 @@ impl FunctionCodeGenerator for X64FunctionCode { Location::Memory(tmp, (local_index.index() as i32) * 8), Location::GPR(tmp), ); + let ty = type_to_wp_type(module_info.globals[local_index].desc.ty); + match ty { + WpType::F32 | WpType::F64 => { + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); + } + _ => {} + } self.machine.acquire_locations( a, &[( - type_to_wp_type(module_info.globals[local_index].desc.ty), + ty, MachineValue::WasmStack(self.value_stack.len()), )], false, @@ -2820,10 +2983,17 @@ impl FunctionCodeGenerator for X64FunctionCode { Location::Memory(tmp, (import_index.index() as i32) * 8), Location::GPR(tmp), ); + let ty = type_to_wp_type(module_info.imported_globals[import_index].1.ty); + match ty { + WpType::F32 | WpType::F64 => { + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); + } + _ => {} + } self.machine.acquire_locations( a, &[( - type_to_wp_type(module_info.imported_globals[import_index].1.ty), + ty, MachineValue::WasmStack(self.value_stack.len()), )], false, @@ -2850,7 +3020,7 @@ impl FunctionCodeGenerator for X64FunctionCode { let tmp = self.machine.acquire_temp_gpr().unwrap(); - if global_index < module_info.imported_globals.len() { + let ty = if global_index < module_info.imported_globals.len() { a.emit_mov( Size::S64, Location::Memory( @@ -2859,6 +3029,7 @@ impl FunctionCodeGenerator for X64FunctionCode { ), Location::GPR(tmp), ); + type_to_wp_type(module_info.imported_globals[ImportedGlobalIndex::new(global_index)].1.ty) } else { global_index -= module_info.imported_globals.len(); if global_index >= module_info.globals.len() { @@ -2874,21 +3045,44 @@ impl FunctionCodeGenerator for X64FunctionCode { ), Location::GPR(tmp), ); - } + type_to_wp_type(module_info.globals[LocalGlobalIndex::new(global_index)].desc.ty) + }; a.emit_mov( Size::S64, Location::Memory(tmp, (global_index as i32) * 8), Location::GPR(tmp), ); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::Memory(tmp, LocalGlobal::offset_data() as i32), - ); - + match ty { + WpType::F32 | WpType::F64 => { + let fp = self.fp_stack.pop1()?; + if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { + Self::canonicalize_nan(a, &mut self.machine, match ty { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, loc, Location::Memory(tmp, LocalGlobal::offset_data() as i32)); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::Memory(tmp, LocalGlobal::offset_data() as i32), + ); + } + } + _ => { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::Memory(tmp, LocalGlobal::offset_data() as i32), + ); + } + } self.machine.release_temp_gpr(tmp); } Operator::LocalGet { local_index } => { @@ -2907,33 +3101,87 @@ impl FunctionCodeGenerator for X64FunctionCode { ret, ); self.value_stack.push(ret); + match self.local_types[local_index] { + WpType::F32 | WpType::F64 => { + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); + } + _ => {} + } } Operator::LocalSet { local_index } => { let local_index = local_index as usize; let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - self.locals[local_index], - ); + match self.local_types[local_index] { + WpType::F32 | WpType::F64 => { + let fp = self.fp_stack.pop1()?; + if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { + Self::canonicalize_nan(a, &mut self.machine, match self.local_types[local_index] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, loc, self.locals[local_index]); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + self.locals[local_index], + ); + } + } + _ => { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + self.locals[local_index], + ); + } + } + + } Operator::LocalTee { local_index } => { let local_index = local_index as usize; let loc = *self.value_stack.last().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - self.locals[local_index], - ); + match self.local_types[local_index] { + WpType::F32 | WpType::F64 => { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { + Self::canonicalize_nan(a, &mut self.machine, match self.local_types[local_index] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, loc, self.locals[local_index]); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + self.locals[local_index], + ); + } + } + _ => { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + self.locals[local_index], + ); + } + } } Operator::I32Const { value } => { self.value_stack.push(Location::Imm32(value as u32)); @@ -3905,36 +4153,55 @@ impl FunctionCodeGenerator for X64FunctionCode { Operator::F32Const { value } => { self.value_stack.push(Location::Imm32(value.bits())); + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); self.machine .state .wasm_stack .push(WasmAbstractValue::Const(value.bits() as u64)); } - Operator::F32Add => Self::emit_fp_binop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vaddss, - )?, - Operator::F32Sub => Self::emit_fp_binop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vsubss, - )?, - Operator::F32Mul => Self::emit_fp_binop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vmulss, - )?, - Operator::F32Div => Self::emit_fp_binop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vdivss, - )?, + Operator::F32Add => { + self.fp_stack.pop2()?; + self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vaddss, + )?; + }, + Operator::F32Sub => { + self.fp_stack.pop2()?; + self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vsubss, + )? + }, + Operator::F32Mul => { + self.fp_stack.pop2()?; + self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vmulss, + )? + }, + Operator::F32Div => { + self.fp_stack.pop2()?; + self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vdivss, + )? + }, Operator::F32Max => { + self.fp_stack.pop2()?; + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 2)); if !a.arch_supports_canonicalize_nan() { Self::emit_fp_binop_avx( a, @@ -4058,6 +4325,8 @@ impl FunctionCodeGenerator for X64FunctionCode { } } Operator::F32Min => { + self.fp_stack.pop2()?; + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 2)); if !a.arch_supports_canonicalize_nan() { Self::emit_fp_binop_avx( a, @@ -4186,74 +4455,114 @@ impl FunctionCodeGenerator for X64FunctionCode { self.machine.release_temp_xmm(tmp1); } } - Operator::F32Eq => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpeqss, - )?, - Operator::F32Ne => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpneqss, - )?, - Operator::F32Lt => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpltss, - )?, - Operator::F32Le => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpless, - )?, - Operator::F32Gt => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpgtss, - )?, - Operator::F32Ge => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpgess, - )?, - Operator::F32Nearest => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vroundss_nearest, - )?, - Operator::F32Floor => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vroundss_floor, - )?, - Operator::F32Ceil => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vroundss_ceil, - )?, - Operator::F32Trunc => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vroundss_trunc, - )?, - Operator::F32Sqrt => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vsqrtss, - )?, + Operator::F32Eq => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpeqss, + )? + }, + Operator::F32Ne => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpneqss, + )? + }, + Operator::F32Lt => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpltss, + )? + }, + Operator::F32Le => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpless, + )? + }, + Operator::F32Gt => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpgtss, + )? + }, + Operator::F32Ge => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpgess, + )? + }, + Operator::F32Nearest => { + self.fp_stack.pop1()?; + self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundss_nearest, + )? + }, + Operator::F32Floor => { + self.fp_stack.pop1()?; + self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundss_floor, + )? + }, + Operator::F32Ceil => { + self.fp_stack.pop1()?; + self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundss_ceil, + )? + }, + Operator::F32Trunc => { + self.fp_stack.pop1()?; + self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundss_trunc, + )? + }, + Operator::F32Sqrt => { + self.fp_stack.pop1()?; + self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vsqrtss, + )? + }, Operator::F32Copysign => { + // Preserve canonicalization state. + let loc_b = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let loc_a = @@ -4286,6 +4595,8 @@ impl FunctionCodeGenerator for X64FunctionCode { } Operator::F32Abs => { + // Preserve canonicalization state. + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let ret = self.machine.acquire_locations( @@ -4306,6 +4617,8 @@ impl FunctionCodeGenerator for X64FunctionCode { } Operator::F32Neg => { + // Preserve canonicalization state. + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let ret = self.machine.acquire_locations( @@ -4346,36 +4659,56 @@ impl FunctionCodeGenerator for X64FunctionCode { Operator::F64Const { value } => { self.value_stack.push(Location::Imm64(value.bits())); + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); self.machine .state .wasm_stack .push(WasmAbstractValue::Const(value.bits())); } - Operator::F64Add => Self::emit_fp_binop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vaddsd, - )?, - Operator::F64Sub => Self::emit_fp_binop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vsubsd, - )?, - Operator::F64Mul => Self::emit_fp_binop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vmulsd, - )?, - Operator::F64Div => Self::emit_fp_binop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vdivsd, - )?, + Operator::F64Add => { + self.fp_stack.pop2()?; + self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vaddsd, + )? + }, + Operator::F64Sub => { + self.fp_stack.pop2()?; + self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vsubsd, + )? + }, + Operator::F64Mul => { + self.fp_stack.pop2()?; + self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vmulsd, + )? + }, + Operator::F64Div => { + self.fp_stack.pop2()?; + self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 2)); + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vdivsd, + )? + }, Operator::F64Max => { + self.fp_stack.pop2()?; + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 2)); + if !a.arch_supports_canonicalize_nan() { Self::emit_fp_binop_avx( a, @@ -4499,6 +4832,9 @@ impl FunctionCodeGenerator for X64FunctionCode { } } Operator::F64Min => { + self.fp_stack.pop2()?; + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 2)); + if !a.arch_supports_canonicalize_nan() { Self::emit_fp_binop_avx( a, @@ -4627,74 +4963,114 @@ impl FunctionCodeGenerator for X64FunctionCode { self.machine.release_temp_xmm(tmp1); } } - Operator::F64Eq => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpeqsd, - )?, - Operator::F64Ne => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpneqsd, - )?, - Operator::F64Lt => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpltsd, - )?, - Operator::F64Le => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmplesd, - )?, - Operator::F64Gt => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpgtsd, - )?, - Operator::F64Ge => Self::emit_fp_cmpop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcmpgesd, - )?, - Operator::F64Nearest => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vroundsd_nearest, - )?, - Operator::F64Floor => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vroundsd_floor, - )?, - Operator::F64Ceil => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vroundsd_ceil, - )?, - Operator::F64Trunc => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vroundsd_trunc, - )?, - Operator::F64Sqrt => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vsqrtsd, - )?, + Operator::F64Eq => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpeqsd, + )? + }, + Operator::F64Ne => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpneqsd, + )? + }, + Operator::F64Lt => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpltsd, + )? + }, + Operator::F64Le => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmplesd, + )? + }, + Operator::F64Gt => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpgtsd, + )? + }, + Operator::F64Ge => { + self.fp_stack.pop2()?; + Self::emit_fp_cmpop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcmpgesd, + )? + }, + Operator::F64Nearest => { + self.fp_stack.pop1()?; + self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundsd_nearest, + )? + }, + Operator::F64Floor => { + self.fp_stack.pop1()?; + self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundsd_floor, + )? + }, + Operator::F64Ceil => { + self.fp_stack.pop1()?; + self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundsd_ceil, + )? + }, + Operator::F64Trunc => { + self.fp_stack.pop1()?; + self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vroundsd_trunc, + )? + }, + Operator::F64Sqrt => { + self.fp_stack.pop1()?; + self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vsqrtsd, + )? + }, Operator::F64Copysign => { + // Preserve canonicalization state. + let loc_b = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let loc_a = @@ -4736,6 +5112,8 @@ impl FunctionCodeGenerator for X64FunctionCode { } Operator::F64Abs => { + // Preserve canonicalization state. + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let ret = self.machine.acquire_locations( @@ -4762,6 +5140,8 @@ impl FunctionCodeGenerator for X64FunctionCode { } Operator::F64Neg => { + // Preserve canonicalization state. + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let ret = self.machine.acquire_locations( @@ -4799,18 +5179,26 @@ impl FunctionCodeGenerator for X64FunctionCode { } } - Operator::F64PromoteF32 => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcvtss2sd, - )?, - Operator::F32DemoteF64 => Self::emit_fp_unop_avx( - a, - &mut self.machine, - &mut self.value_stack, - Assembler::emit_vcvtsd2ss, - )?, + Operator::F64PromoteF32 => { + let fp = self.fp_stack.pop1()?; + self.fp_stack.push(fp.promote(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcvtss2sd, + )? + }, + Operator::F32DemoteF64 => { + let fp = self.fp_stack.pop1()?; + self.fp_stack.push(fp.demote(self.value_stack.len() - 1)); + Self::emit_fp_unop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vcvtsd2ss, + )? + }, Operator::I32ReinterpretF32 => { let loc = @@ -4821,16 +5209,21 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + let fp = self.fp_stack.pop1()?; - if loc != ret { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S32, - loc, - ret, - ); + if !a.arch_supports_canonicalize_nan() || fp.canonicalization.is_none() { + if loc != ret { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + ret, + ); + } + } else { + Self::canonicalize_nan(a, &mut self.machine, Size::S32, loc, ret); } } Operator::F32ReinterpretI32 => { @@ -4842,6 +5235,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); if loc != ret { Self::emit_relaxed_binop( @@ -4864,16 +5258,21 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + let fp = self.fp_stack.pop1()?; - if loc != ret { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - ret, - ); + if !a.arch_supports_canonicalize_nan() || fp.canonicalization.is_none() { + if loc != ret { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + ret, + ); + } + } else { + Self::canonicalize_nan(a, &mut self.machine, Size::S64, loc, ret); } } Operator::F64ReinterpretI64 => { @@ -4885,6 +5284,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); if loc != ret { Self::emit_relaxed_binop( @@ -4907,6 +5307,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; if a.arch_has_itruncf() { let tmp_out = self.machine.acquire_temp_gpr().unwrap(); @@ -4967,6 +5368,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; let tmp_out = self.machine.acquire_temp_gpr().unwrap(); let tmp_in = self.machine.acquire_temp_xmm().unwrap(); @@ -5018,6 +5420,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; if a.arch_has_itruncf() { let tmp_out = self.machine.acquire_temp_gpr().unwrap(); @@ -5078,6 +5481,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; let tmp_out = self.machine.acquire_temp_gpr().unwrap(); let tmp_in = self.machine.acquire_temp_xmm().unwrap(); @@ -5136,6 +5540,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; if a.arch_has_itruncf() { let tmp_out = self.machine.acquire_temp_gpr().unwrap(); @@ -5196,6 +5601,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; let tmp_out = self.machine.acquire_temp_gpr().unwrap(); let tmp_in = self.machine.acquire_temp_xmm().unwrap(); @@ -5254,6 +5660,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; if a.arch_has_itruncf() { let tmp_out = self.machine.acquire_temp_gpr().unwrap(); @@ -5338,6 +5745,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; let tmp_out = self.machine.acquire_temp_gpr().unwrap(); let tmp_in = self.machine.acquire_temp_xmm().unwrap(); @@ -5415,6 +5823,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; if a.arch_has_itruncf() { let tmp_out = self.machine.acquire_temp_gpr().unwrap(); @@ -5476,6 +5885,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; let tmp_out = self.machine.acquire_temp_gpr().unwrap(); let tmp_in = self.machine.acquire_temp_xmm().unwrap(); @@ -5528,6 +5938,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; if a.arch_has_itruncf() { let tmp_out = self.machine.acquire_temp_gpr().unwrap(); @@ -5594,6 +6005,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; let tmp_out = self.machine.acquire_temp_gpr().unwrap(); let tmp_in = self.machine.acquire_temp_xmm().unwrap(); @@ -5657,6 +6069,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; if a.arch_has_itruncf() { let tmp_out = self.machine.acquire_temp_gpr().unwrap(); @@ -5718,6 +6131,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; let tmp_out = self.machine.acquire_temp_gpr().unwrap(); let tmp_in = self.machine.acquire_temp_xmm().unwrap(); @@ -5776,6 +6190,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; if a.arch_has_itruncf() { let tmp_out = self.machine.acquire_temp_gpr().unwrap(); @@ -5861,6 +6276,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.pop1()?; let tmp_out = self.machine.acquire_temp_gpr().unwrap(); let tmp_in = self.machine.acquire_temp_xmm().unwrap(); @@ -5938,6 +6354,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f32 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -5982,6 +6399,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f32 never results in NaN. + if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); let tmp_in = self.machine.acquire_temp_gpr().unwrap(); @@ -6025,6 +6444,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f32 never results in NaN. + if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); let tmp_in = self.machine.acquire_temp_gpr().unwrap(); @@ -6068,6 +6489,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f32 never results in NaN. + if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); let tmp_in = self.machine.acquire_temp_gpr().unwrap(); @@ -6128,6 +6551,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f64 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -6172,6 +6596,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f64 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -6216,6 +6641,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f64 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -6260,6 +6686,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f64 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -6339,6 +6766,14 @@ impl FunctionCodeGenerator for X64FunctionCode { self.machine.release_locations_only_osr_state(params.len()); + while let Some(x) = self.fp_stack.last() { + if x.depth >= self.value_stack.len() { + self.fp_stack.pop().unwrap(); + } else { + break; + } + } + Self::emit_call_sysv_label( a, &mut self.machine, @@ -6362,6 +6797,7 @@ impl FunctionCodeGenerator for X64FunctionCode { match return_types[0] { WpType::F32 | WpType::F64 => { a.emit_mov(Size::S64, Location::XMM(XMM::XMM0), ret); + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); } _ => { a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); @@ -6390,6 +6826,14 @@ impl FunctionCodeGenerator for X64FunctionCode { .collect(); self.machine.release_locations_only_regs(¶ms); + while let Some(x) = self.fp_stack.last() { + if x.depth >= self.value_stack.len() { + self.fp_stack.pop().unwrap(); + } else { + break; + } + } + let table_base = self.machine.acquire_temp_gpr().unwrap(); let table_count = self.machine.acquire_temp_gpr().unwrap(); let sigidx = self.machine.acquire_temp_gpr().unwrap(); @@ -6508,6 +6952,7 @@ impl FunctionCodeGenerator for X64FunctionCode { match return_types[0] { WpType::F32 | WpType::F64 => { a.emit_mov(Size::S64, Location::XMM(XMM::XMM0), ret); + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); } _ => { a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); @@ -6536,6 +6981,7 @@ impl FunctionCodeGenerator for X64FunctionCode { } }, value_stack_depth: self.value_stack.len(), + fp_stack_depth: self.fp_stack.len(), state: self.machine.state.clone(), state_diff_id: Self::get_state_diff( &self.machine, @@ -6559,19 +7005,43 @@ impl FunctionCodeGenerator for X64FunctionCode { if !was_unreachable && frame.returns.len() > 0 { let loc = *self.value_stack.last().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::GPR(GPR::RAX), - ); + match frame.returns[0] { + WpType::F32 | WpType::F64 => { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { + Self::canonicalize_nan(a, &mut self.machine, match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, loc, Location::GPR(GPR::RAX)); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } + _ => { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } } let released: &[Location] = &self.value_stack[frame.value_stack_depth..]; self.machine.release_locations(a, released); self.value_stack.truncate(frame.value_stack_depth); + self.fp_stack.truncate(frame.fp_stack_depth); match frame.if_else { IfElseState::If(label) => { @@ -6593,6 +7063,13 @@ impl FunctionCodeGenerator for X64FunctionCode { get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let v_a = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cncl: Option<(Option, Option)> = if self.fp_stack.len() >= 2 && self.fp_stack[self.fp_stack.len() - 2].depth == self.value_stack.len() && self.fp_stack[self.fp_stack.len() - 1].depth == self.value_stack.len() + 1 { + let (left, right) = self.fp_stack.pop2()?; + self.fp_stack.push(FloatValue::new(self.value_stack.len())); + Some((left.canonicalization, right.canonicalization)) + } else { + None + }; let ret = self.machine.acquire_locations( a, &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], @@ -6612,27 +7089,49 @@ impl FunctionCodeGenerator for X64FunctionCode { cond, ); a.emit_jmp(Condition::Equal, zero_label); - if v_a != ret { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - v_a, - ret, - ); + match cncl { + Some((Some(fp), _)) if a.arch_supports_canonicalize_nan() => { + let sz = match fp { + CanonicalizeType::F32 => Size::S32, + CanonicalizeType::F64 => Size::S64, + }; + Self::canonicalize_nan(a, &mut self.machine, sz, v_a, ret); + } + _ => { + if v_a != ret { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + v_a, + ret, + ); + } + } } a.emit_jmp(Condition::None, end_label); a.emit_label(zero_label); - if v_b != ret { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - v_b, - ret, - ); + match cncl { + Some((_, Some(fp))) if a.arch_supports_canonicalize_nan() => { + let sz = match fp { + CanonicalizeType::F32 => Size::S32, + CanonicalizeType::F64 => Size::S64, + }; + Self::canonicalize_nan(a, &mut self.machine, sz, v_b, ret); + } + _ => { + if v_b != ret { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + v_b, + ret, + ); + } + } } a.emit_label(end_label); } @@ -6651,6 +7150,7 @@ impl FunctionCodeGenerator for X64FunctionCode { } }, value_stack_depth: self.value_stack.len(), + fp_stack_depth: self.fp_stack.len(), state: self.machine.state.clone(), state_diff_id: Self::get_state_diff( &self.machine, @@ -6680,6 +7180,7 @@ impl FunctionCodeGenerator for X64FunctionCode { } }, value_stack_depth: self.value_stack.len(), + fp_stack_depth: self.fp_stack.len(), state: self.machine.state.clone(), state_diff_id, }); @@ -6844,6 +7345,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); Self::emit_memory_op( module_info, @@ -7162,6 +7664,7 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); Self::emit_memory_op( module_info, @@ -7568,14 +8071,37 @@ impl FunctionCodeGenerator for X64FunctionCode { }); } let loc = *self.value_stack.last().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::GPR(GPR::RAX), - ); + match frame.returns[0] { + WpType::F32 | WpType::F64 => { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { + Self::canonicalize_nan(a, &mut self.machine, match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, loc, Location::GPR(GPR::RAX)); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } + _ => { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } } let released = &self.value_stack[frame.value_stack_depth..]; self.machine.release_locations_keep_state(a, released); @@ -7592,8 +8118,33 @@ impl FunctionCodeGenerator for X64FunctionCode { }); } let loc = *self.value_stack.last().unwrap(); - a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + + match frame.returns[0] { + WpType::F32 | WpType::F64 => { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { + Self::canonicalize_nan(a, &mut self.machine, match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, loc, Location::GPR(GPR::RAX)); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } + _ => { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + } + } } + let released = &self.value_stack[frame.value_stack_depth..]; self.machine.release_locations_keep_state(a, released); a.emit_jmp(Condition::None, frame.label); @@ -7622,7 +8173,30 @@ impl FunctionCodeGenerator for X64FunctionCode { }); } let loc = *self.value_stack.last().unwrap(); - a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + match frame.returns[0] { + WpType::F32 | WpType::F64 => { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { + Self::canonicalize_nan(a, &mut self.machine, match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, loc, Location::GPR(GPR::RAX)); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } + _ => { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + } + } } let released = &self.value_stack[frame.value_stack_depth..]; self.machine.release_locations_keep_state(a, released); @@ -7673,7 +8247,30 @@ impl FunctionCodeGenerator for X64FunctionCode { }); } let loc = *self.value_stack.last().unwrap(); - a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + match frame.returns[0] { + WpType::F32 | WpType::F64 => { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { + Self::canonicalize_nan(a, &mut self.machine, match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, loc, Location::GPR(GPR::RAX)); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } + _ => { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + } + } } let released = &self.value_stack[frame.value_stack_depth..]; self.machine.release_locations_keep_state(a, released); @@ -7691,7 +8288,30 @@ impl FunctionCodeGenerator for X64FunctionCode { }); } let loc = *self.value_stack.last().unwrap(); - a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + match frame.returns[0] { + WpType::F32 | WpType::F64 => { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { + Self::canonicalize_nan(a, &mut self.machine, match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, loc, Location::GPR(GPR::RAX)); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } + _ => { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + } + } } let released = &self.value_stack[frame.value_stack_depth..]; self.machine.release_locations_keep_state(a, released); @@ -7712,14 +8332,37 @@ impl FunctionCodeGenerator for X64FunctionCode { if !was_unreachable && frame.returns.len() > 0 { let loc = *self.value_stack.last().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::GPR(GPR::RAX), - ); + match frame.returns[0] { + WpType::F32 | WpType::F64 => { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { + Self::canonicalize_nan(a, &mut self.machine, match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, loc, Location::GPR(GPR::RAX)); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } + _ => { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); + } + } } if self.control_stack.len() == 0 { @@ -7744,6 +8387,7 @@ impl FunctionCodeGenerator for X64FunctionCode { let released = &self.value_stack[frame.value_stack_depth..]; self.machine.release_locations(a, released); self.value_stack.truncate(frame.value_stack_depth); + self.fp_stack.truncate(frame.fp_stack_depth); if !frame.loop_like { a.emit_label(frame.label); @@ -7769,6 +8413,12 @@ impl FunctionCodeGenerator for X64FunctionCode { )[0]; a.emit_mov(Size::S64, Location::GPR(GPR::RAX), loc); self.value_stack.push(loc); + match frame.returns[0]{ + WpType::F32 | WpType::F64 => { + self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // we already canonicalized at the `Br*` instruction or here previously. + } + _ => {} + } } } } From 5d26d92d9bea0856763c354a4c11a8598a0e99bb Mon Sep 17 00:00:00 2001 From: losfair Date: Mon, 16 Mar 2020 23:40:02 +0800 Subject: [PATCH 02/19] Add switch to turn on/off NaN canonicalization. --- lib/runtime-core/src/backend.rs | 4 + lib/singlepass-backend/src/codegen_x64.rs | 515 ++++++++++++++-------- lib/spectests/tests/spectest.rs | 5 + 3 files changed, 340 insertions(+), 184 deletions(-) diff --git a/lib/runtime-core/src/backend.rs b/lib/runtime-core/src/backend.rs index 4aca2d2a71c..49b4161e48c 100644 --- a/lib/runtime-core/src/backend.rs +++ b/lib/runtime-core/src/backend.rs @@ -132,6 +132,10 @@ pub struct CompilerConfig { /// When enabled there can be a small amount of runtime performance overhead. pub full_preemption: bool, + /// Whether to enable spec-compliant NaN canonicalization at all places. + /// Enabling this increases runtime overhead. + pub nan_canonicalization: bool, + pub features: Features, // Target info. Presently only supported by LLVM. diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index 1869b193509..dbfb66c431a 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -39,8 +39,8 @@ use wasmer_runtime_core::{ structures::{Map, TypedIndex}, typed_func::{Trampoline, Wasm}, types::{ - FuncIndex, FuncSig, GlobalIndex, LocalFuncIndex, LocalOrImport, MemoryIndex, SigIndex, - TableIndex, Type, ImportedGlobalIndex, LocalGlobalIndex, + FuncIndex, FuncSig, GlobalIndex, ImportedGlobalIndex, LocalFuncIndex, LocalGlobalIndex, + LocalOrImport, MemoryIndex, SigIndex, TableIndex, Type, }, vm::{self, LocalGlobal, LocalTable, INTERNALS_SIZE}, wasmparser::{MemoryImmediate, Operator, Type as WpType, TypeOrFuncType as WpTypeOrFuncType}, @@ -247,15 +247,24 @@ struct FloatValue { impl FloatValue { fn new(depth: usize) -> Self { - FloatValue { canonicalization: None, depth } + FloatValue { + canonicalization: None, + depth, + } } fn cncl_f32(depth: usize) -> Self { - FloatValue { canonicalization: Some(CanonicalizeType::F32), depth } + FloatValue { + canonicalization: Some(CanonicalizeType::F32), + depth, + } } fn cncl_f64(depth: usize) -> Self { - FloatValue { canonicalization: Some(CanonicalizeType::F64), depth } + FloatValue { + canonicalization: Some(CanonicalizeType::F64), + depth, + } } fn promote(self, depth: usize) -> FloatValue { @@ -736,6 +745,7 @@ struct CodegenConfig { enforce_stack_check: bool, track_state: bool, full_preemption: bool, + nan_canonicalization: bool, } impl ModuleCodeGenerator @@ -1113,6 +1123,7 @@ impl ModuleCodeGenerator enforce_stack_check: config.enforce_stack_check, track_state: config.track_state, full_preemption: config.full_preemption, + nan_canonicalization: config.nan_canonicalization, })); Ok(()) } @@ -1208,15 +1219,8 @@ impl X64FunctionCode { let tmp3 = m.acquire_temp_xmm().unwrap(); let tmpg1 = m.acquire_temp_gpr().unwrap(); - Self::emit_relaxed_binop( - a, - m, - Assembler::emit_mov, - sz, - input, - Location::XMM(tmp1), - ); - + Self::emit_relaxed_binop(a, m, Assembler::emit_mov, sz, input, Location::XMM(tmp1)); + match sz { Size::S32 => { a.emit_vcmpunordss(tmp1, XMMOrMemory::XMM(tmp1), tmp2); @@ -1227,7 +1231,7 @@ impl X64FunctionCode { ); a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp3)); a.emit_vblendvps(tmp2, XMMOrMemory::XMM(tmp3), tmp1, tmp1); - }, + } Size::S64 => { a.emit_vcmpunordsd(tmp1, XMMOrMemory::XMM(tmp1), tmp2); a.emit_mov( @@ -1237,18 +1241,11 @@ impl X64FunctionCode { ); a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp3)); a.emit_vblendvpd(tmp2, XMMOrMemory::XMM(tmp3), tmp1, tmp1); - }, + } _ => unreachable!(), } - Self::emit_relaxed_binop( - a, - m, - Assembler::emit_mov, - sz, - Location::XMM(tmp1), - output, - ); + Self::emit_relaxed_binop(a, m, Assembler::emit_mov, sz, Location::XMM(tmp1), output); m.release_temp_gpr(tmpg1); m.release_temp_xmm(tmp3); @@ -2956,16 +2953,14 @@ impl FunctionCodeGenerator for X64FunctionCode { let ty = type_to_wp_type(module_info.globals[local_index].desc.ty); match ty { WpType::F32 | WpType::F64 => { - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); } _ => {} } self.machine.acquire_locations( a, - &[( - ty, - MachineValue::WasmStack(self.value_stack.len()), - )], + &[(ty, MachineValue::WasmStack(self.value_stack.len()))], false, )[0] } @@ -2986,16 +2981,14 @@ impl FunctionCodeGenerator for X64FunctionCode { let ty = type_to_wp_type(module_info.imported_globals[import_index].1.ty); match ty { WpType::F32 | WpType::F64 => { - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); } _ => {} } self.machine.acquire_locations( a, - &[( - ty, - MachineValue::WasmStack(self.value_stack.len()), - )], + &[(ty, MachineValue::WasmStack(self.value_stack.len()))], false, )[0] } @@ -3029,7 +3022,11 @@ impl FunctionCodeGenerator for X64FunctionCode { ), Location::GPR(tmp), ); - type_to_wp_type(module_info.imported_globals[ImportedGlobalIndex::new(global_index)].1.ty) + type_to_wp_type( + module_info.imported_globals[ImportedGlobalIndex::new(global_index)] + .1 + .ty, + ) } else { global_index -= module_info.imported_globals.len(); if global_index >= module_info.globals.len() { @@ -3045,7 +3042,11 @@ impl FunctionCodeGenerator for X64FunctionCode { ), Location::GPR(tmp), ); - type_to_wp_type(module_info.globals[LocalGlobalIndex::new(global_index)].desc.ty) + type_to_wp_type( + module_info.globals[LocalGlobalIndex::new(global_index)] + .desc + .ty, + ) }; a.emit_mov( Size::S64, @@ -3055,12 +3056,21 @@ impl FunctionCodeGenerator for X64FunctionCode { match ty { WpType::F32 | WpType::F64 => { let fp = self.fp_stack.pop1()?; - if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { - Self::canonicalize_nan(a, &mut self.machine, match ty { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, loc, Location::Memory(tmp, LocalGlobal::offset_data() as i32)); + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match ty { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::Memory(tmp, LocalGlobal::offset_data() as i32), + ); } else { Self::emit_relaxed_binop( a, @@ -3103,7 +3113,8 @@ impl FunctionCodeGenerator for X64FunctionCode { self.value_stack.push(ret); match self.local_types[local_index] { WpType::F32 | WpType::F64 => { - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); } _ => {} } @@ -3116,12 +3127,21 @@ impl FunctionCodeGenerator for X64FunctionCode { match self.local_types[local_index] { WpType::F32 | WpType::F64 => { let fp = self.fp_stack.pop1()?; - if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { - Self::canonicalize_nan(a, &mut self.machine, match self.local_types[local_index] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, loc, self.locals[local_index]); + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match self.local_types[local_index] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + self.locals[local_index], + ); } else { Self::emit_relaxed_binop( a, @@ -3144,8 +3164,6 @@ impl FunctionCodeGenerator for X64FunctionCode { ); } } - - } Operator::LocalTee { local_index } => { let local_index = local_index as usize; @@ -3154,12 +3172,21 @@ impl FunctionCodeGenerator for X64FunctionCode { match self.local_types[local_index] { WpType::F32 | WpType::F64 => { let fp = self.fp_stack.peek1()?; - if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { - Self::canonicalize_nan(a, &mut self.machine, match self.local_types[local_index] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, loc, self.locals[local_index]); + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match self.local_types[local_index] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + self.locals[local_index], + ); } else { Self::emit_relaxed_binop( a, @@ -4153,7 +4180,8 @@ impl FunctionCodeGenerator for X64FunctionCode { Operator::F32Const { value } => { self.value_stack.push(Location::Imm32(value.bits())); - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); self.machine .state .wasm_stack @@ -4161,47 +4189,52 @@ impl FunctionCodeGenerator for X64FunctionCode { } Operator::F32Add => { self.fp_stack.pop2()?; - self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 2)); + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 2)); Self::emit_fp_binop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vaddss, )?; - }, + } Operator::F32Sub => { self.fp_stack.pop2()?; - self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 2)); + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 2)); Self::emit_fp_binop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vsubss, )? - }, + } Operator::F32Mul => { self.fp_stack.pop2()?; - self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 2)); + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 2)); Self::emit_fp_binop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vmulss, )? - }, + } Operator::F32Div => { self.fp_stack.pop2()?; - self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 2)); + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 2)); Self::emit_fp_binop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vdivss, )? - }, + } Operator::F32Max => { self.fp_stack.pop2()?; - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 2)); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 2)); if !a.arch_supports_canonicalize_nan() { Self::emit_fp_binop_avx( a, @@ -4326,7 +4359,8 @@ impl FunctionCodeGenerator for X64FunctionCode { } Operator::F32Min => { self.fp_stack.pop2()?; - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 2)); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 2)); if !a.arch_supports_canonicalize_nan() { Self::emit_fp_binop_avx( a, @@ -4463,7 +4497,7 @@ impl FunctionCodeGenerator for X64FunctionCode { &mut self.value_stack, Assembler::emit_vcmpeqss, )? - }, + } Operator::F32Ne => { self.fp_stack.pop2()?; Self::emit_fp_cmpop_avx( @@ -4472,7 +4506,7 @@ impl FunctionCodeGenerator for X64FunctionCode { &mut self.value_stack, Assembler::emit_vcmpneqss, )? - }, + } Operator::F32Lt => { self.fp_stack.pop2()?; Self::emit_fp_cmpop_avx( @@ -4481,7 +4515,7 @@ impl FunctionCodeGenerator for X64FunctionCode { &mut self.value_stack, Assembler::emit_vcmpltss, )? - }, + } Operator::F32Le => { self.fp_stack.pop2()?; Self::emit_fp_cmpop_avx( @@ -4490,7 +4524,7 @@ impl FunctionCodeGenerator for X64FunctionCode { &mut self.value_stack, Assembler::emit_vcmpless, )? - }, + } Operator::F32Gt => { self.fp_stack.pop2()?; Self::emit_fp_cmpop_avx( @@ -4499,7 +4533,7 @@ impl FunctionCodeGenerator for X64FunctionCode { &mut self.value_stack, Assembler::emit_vcmpgtss, )? - }, + } Operator::F32Ge => { self.fp_stack.pop2()?; Self::emit_fp_cmpop_avx( @@ -4508,57 +4542,62 @@ impl FunctionCodeGenerator for X64FunctionCode { &mut self.value_stack, Assembler::emit_vcmpgess, )? - }, + } Operator::F32Nearest => { self.fp_stack.pop1()?; - self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 1)); Self::emit_fp_unop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundss_nearest, )? - }, + } Operator::F32Floor => { self.fp_stack.pop1()?; - self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 1)); Self::emit_fp_unop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundss_floor, )? - }, + } Operator::F32Ceil => { self.fp_stack.pop1()?; - self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 1)); Self::emit_fp_unop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundss_ceil, )? - }, + } Operator::F32Trunc => { self.fp_stack.pop1()?; - self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 1)); Self::emit_fp_unop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundss_trunc, )? - }, + } Operator::F32Sqrt => { self.fp_stack.pop1()?; - self.fp_stack.push(FloatValue::cncl_f32(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::cncl_f32(self.value_stack.len() - 1)); Self::emit_fp_unop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vsqrtss, )? - }, + } Operator::F32Copysign => { // Preserve canonicalization state. @@ -4659,7 +4698,8 @@ impl FunctionCodeGenerator for X64FunctionCode { Operator::F64Const { value } => { self.value_stack.push(Location::Imm64(value.bits())); - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); self.machine .state .wasm_stack @@ -4667,48 +4707,53 @@ impl FunctionCodeGenerator for X64FunctionCode { } Operator::F64Add => { self.fp_stack.pop2()?; - self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 2)); + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 2)); Self::emit_fp_binop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vaddsd, )? - }, + } Operator::F64Sub => { self.fp_stack.pop2()?; - self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 2)); + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 2)); Self::emit_fp_binop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vsubsd, )? - }, + } Operator::F64Mul => { self.fp_stack.pop2()?; - self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 2)); + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 2)); Self::emit_fp_binop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vmulsd, )? - }, + } Operator::F64Div => { self.fp_stack.pop2()?; - self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 2)); + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 2)); Self::emit_fp_binop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vdivsd, )? - }, + } Operator::F64Max => { self.fp_stack.pop2()?; - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 2)); - + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 2)); + if !a.arch_supports_canonicalize_nan() { Self::emit_fp_binop_avx( a, @@ -4833,8 +4878,9 @@ impl FunctionCodeGenerator for X64FunctionCode { } Operator::F64Min => { self.fp_stack.pop2()?; - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 2)); - + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 2)); + if !a.arch_supports_canonicalize_nan() { Self::emit_fp_binop_avx( a, @@ -4971,7 +5017,7 @@ impl FunctionCodeGenerator for X64FunctionCode { &mut self.value_stack, Assembler::emit_vcmpeqsd, )? - }, + } Operator::F64Ne => { self.fp_stack.pop2()?; Self::emit_fp_cmpop_avx( @@ -4980,7 +5026,7 @@ impl FunctionCodeGenerator for X64FunctionCode { &mut self.value_stack, Assembler::emit_vcmpneqsd, )? - }, + } Operator::F64Lt => { self.fp_stack.pop2()?; Self::emit_fp_cmpop_avx( @@ -4989,7 +5035,7 @@ impl FunctionCodeGenerator for X64FunctionCode { &mut self.value_stack, Assembler::emit_vcmpltsd, )? - }, + } Operator::F64Le => { self.fp_stack.pop2()?; Self::emit_fp_cmpop_avx( @@ -4998,7 +5044,7 @@ impl FunctionCodeGenerator for X64FunctionCode { &mut self.value_stack, Assembler::emit_vcmplesd, )? - }, + } Operator::F64Gt => { self.fp_stack.pop2()?; Self::emit_fp_cmpop_avx( @@ -5007,7 +5053,7 @@ impl FunctionCodeGenerator for X64FunctionCode { &mut self.value_stack, Assembler::emit_vcmpgtsd, )? - }, + } Operator::F64Ge => { self.fp_stack.pop2()?; Self::emit_fp_cmpop_avx( @@ -5016,57 +5062,62 @@ impl FunctionCodeGenerator for X64FunctionCode { &mut self.value_stack, Assembler::emit_vcmpgesd, )? - }, + } Operator::F64Nearest => { self.fp_stack.pop1()?; - self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 1)); Self::emit_fp_unop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundsd_nearest, )? - }, + } Operator::F64Floor => { self.fp_stack.pop1()?; - self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 1)); Self::emit_fp_unop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundsd_floor, )? - }, + } Operator::F64Ceil => { self.fp_stack.pop1()?; - self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 1)); Self::emit_fp_unop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundsd_ceil, )? - }, + } Operator::F64Trunc => { self.fp_stack.pop1()?; - self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 1)); Self::emit_fp_unop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundsd_trunc, )? - }, + } Operator::F64Sqrt => { self.fp_stack.pop1()?; - self.fp_stack.push(FloatValue::cncl_f64(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::cncl_f64(self.value_stack.len() - 1)); Self::emit_fp_unop_avx( a, &mut self.machine, &mut self.value_stack, Assembler::emit_vsqrtsd, )? - }, + } Operator::F64Copysign => { // Preserve canonicalization state. @@ -5188,7 +5239,7 @@ impl FunctionCodeGenerator for X64FunctionCode { &mut self.value_stack, Assembler::emit_vcvtss2sd, )? - }, + } Operator::F32DemoteF64 => { let fp = self.fp_stack.pop1()?; self.fp_stack.push(fp.demote(self.value_stack.len() - 1)); @@ -5198,7 +5249,7 @@ impl FunctionCodeGenerator for X64FunctionCode { &mut self.value_stack, Assembler::emit_vcvtsd2ss, )? - }, + } Operator::I32ReinterpretF32 => { let loc = @@ -5211,7 +5262,10 @@ impl FunctionCodeGenerator for X64FunctionCode { self.value_stack.push(ret); let fp = self.fp_stack.pop1()?; - if !a.arch_supports_canonicalize_nan() || fp.canonicalization.is_none() { + if !a.arch_supports_canonicalize_nan() + || !self.config.nan_canonicalization + || fp.canonicalization.is_none() + { if loc != ret { Self::emit_relaxed_binop( a, @@ -5235,7 +5289,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); if loc != ret { Self::emit_relaxed_binop( @@ -5260,7 +5315,10 @@ impl FunctionCodeGenerator for X64FunctionCode { self.value_stack.push(ret); let fp = self.fp_stack.pop1()?; - if !a.arch_supports_canonicalize_nan() || fp.canonicalization.is_none() { + if !a.arch_supports_canonicalize_nan() + || !self.config.nan_canonicalization + || fp.canonicalization.is_none() + { if loc != ret { Self::emit_relaxed_binop( a, @@ -5284,7 +5342,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); if loc != ret { Self::emit_relaxed_binop( @@ -6354,7 +6413,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f32 never results in NaN. + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f32 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -6399,7 +6459,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f32 never results in NaN. + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f32 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -6444,7 +6505,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f32 never results in NaN. + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f32 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -6489,7 +6551,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f32 never results in NaN. + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f32 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -6551,7 +6614,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f64 never results in NaN. + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f64 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -6596,7 +6660,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f64 never results in NaN. + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i32 to f64 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -6641,7 +6706,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f64 never results in NaN. + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f64 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -6686,7 +6752,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f64 never results in NaN. + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); // Converting i64 to f64 never results in NaN. if a.arch_has_fconverti() { let tmp_out = self.machine.acquire_temp_xmm().unwrap(); @@ -6797,7 +6864,8 @@ impl FunctionCodeGenerator for X64FunctionCode { match return_types[0] { WpType::F32 | WpType::F64 => { a.emit_mov(Size::S64, Location::XMM(XMM::XMM0), ret); - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); } _ => { a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); @@ -6952,7 +7020,8 @@ impl FunctionCodeGenerator for X64FunctionCode { match return_types[0] { WpType::F32 | WpType::F64 => { a.emit_mov(Size::S64, Location::XMM(XMM::XMM0), ret); - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); } _ => { a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); @@ -7008,12 +7077,21 @@ impl FunctionCodeGenerator for X64FunctionCode { match frame.returns[0] { WpType::F32 | WpType::F64 => { let fp = self.fp_stack.peek1()?; - if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { - Self::canonicalize_nan(a, &mut self.machine, match frame.returns[0] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, loc, Location::GPR(GPR::RAX)); + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); } else { Self::emit_relaxed_binop( a, @@ -7063,13 +7141,18 @@ impl FunctionCodeGenerator for X64FunctionCode { get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let v_a = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); - let cncl: Option<(Option, Option)> = if self.fp_stack.len() >= 2 && self.fp_stack[self.fp_stack.len() - 2].depth == self.value_stack.len() && self.fp_stack[self.fp_stack.len() - 1].depth == self.value_stack.len() + 1 { - let (left, right) = self.fp_stack.pop2()?; - self.fp_stack.push(FloatValue::new(self.value_stack.len())); - Some((left.canonicalization, right.canonicalization)) - } else { - None - }; + let cncl: Option<(Option, Option)> = + if self.fp_stack.len() >= 2 + && self.fp_stack[self.fp_stack.len() - 2].depth == self.value_stack.len() + && self.fp_stack[self.fp_stack.len() - 1].depth + == self.value_stack.len() + 1 + { + let (left, right) = self.fp_stack.pop2()?; + self.fp_stack.push(FloatValue::new(self.value_stack.len())); + Some((left.canonicalization, right.canonicalization)) + } else { + None + }; let ret = self.machine.acquire_locations( a, &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], @@ -7090,7 +7173,10 @@ impl FunctionCodeGenerator for X64FunctionCode { ); a.emit_jmp(Condition::Equal, zero_label); match cncl { - Some((Some(fp), _)) if a.arch_supports_canonicalize_nan() => { + Some((Some(fp), _)) + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization => + { let sz = match fp { CanonicalizeType::F32 => Size::S32, CanonicalizeType::F64 => Size::S64, @@ -7113,7 +7199,10 @@ impl FunctionCodeGenerator for X64FunctionCode { a.emit_jmp(Condition::None, end_label); a.emit_label(zero_label); match cncl { - Some((_, Some(fp))) if a.arch_supports_canonicalize_nan() => { + Some((_, Some(fp))) + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization => + { let sz = match fp { CanonicalizeType::F32 => Size::S32, CanonicalizeType::F64 => Size::S64, @@ -7345,7 +7434,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); Self::emit_memory_op( module_info, @@ -7664,7 +7754,8 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); Self::emit_memory_op( module_info, @@ -8074,12 +8165,21 @@ impl FunctionCodeGenerator for X64FunctionCode { match frame.returns[0] { WpType::F32 | WpType::F64 => { let fp = self.fp_stack.peek1()?; - if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { - Self::canonicalize_nan(a, &mut self.machine, match frame.returns[0] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, loc, Location::GPR(GPR::RAX)); + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); } else { Self::emit_relaxed_binop( a, @@ -8122,12 +8222,21 @@ impl FunctionCodeGenerator for X64FunctionCode { match frame.returns[0] { WpType::F32 | WpType::F64 => { let fp = self.fp_stack.peek1()?; - if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { - Self::canonicalize_nan(a, &mut self.machine, match frame.returns[0] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, loc, Location::GPR(GPR::RAX)); + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); } else { Self::emit_relaxed_binop( a, @@ -8176,12 +8285,21 @@ impl FunctionCodeGenerator for X64FunctionCode { match frame.returns[0] { WpType::F32 | WpType::F64 => { let fp = self.fp_stack.peek1()?; - if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { - Self::canonicalize_nan(a, &mut self.machine, match frame.returns[0] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, loc, Location::GPR(GPR::RAX)); + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); } else { Self::emit_relaxed_binop( a, @@ -8250,12 +8368,21 @@ impl FunctionCodeGenerator for X64FunctionCode { match frame.returns[0] { WpType::F32 | WpType::F64 => { let fp = self.fp_stack.peek1()?; - if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { - Self::canonicalize_nan(a, &mut self.machine, match frame.returns[0] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, loc, Location::GPR(GPR::RAX)); + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); } else { Self::emit_relaxed_binop( a, @@ -8291,12 +8418,21 @@ impl FunctionCodeGenerator for X64FunctionCode { match frame.returns[0] { WpType::F32 | WpType::F64 => { let fp = self.fp_stack.peek1()?; - if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { - Self::canonicalize_nan(a, &mut self.machine, match frame.returns[0] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, loc, Location::GPR(GPR::RAX)); + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); } else { Self::emit_relaxed_binop( a, @@ -8335,12 +8471,21 @@ impl FunctionCodeGenerator for X64FunctionCode { match frame.returns[0] { WpType::F32 | WpType::F64 => { let fp = self.fp_stack.peek1()?; - if a.arch_supports_canonicalize_nan() && fp.canonicalization.is_some() { - Self::canonicalize_nan(a, &mut self.machine, match frame.returns[0] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, loc, Location::GPR(GPR::RAX)); + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); } else { Self::emit_relaxed_binop( a, @@ -8413,9 +8558,11 @@ impl FunctionCodeGenerator for X64FunctionCode { )[0]; a.emit_mov(Size::S64, Location::GPR(GPR::RAX), loc); self.value_stack.push(loc); - match frame.returns[0]{ + match frame.returns[0] { WpType::F32 | WpType::F64 => { - self.fp_stack.push(FloatValue::new(self.value_stack.len() - 1)); // we already canonicalized at the `Br*` instruction or here previously. + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + // we already canonicalized at the `Br*` instruction or here previously. } _ => {} } diff --git a/lib/spectests/tests/spectest.rs b/lib/spectests/tests/spectest.rs index 409a9d90829..c3d1ecce723 100644 --- a/lib/spectests/tests/spectest.rs +++ b/lib/spectests/tests/spectest.rs @@ -336,6 +336,7 @@ mod tests { simd: true, threads: true, }, + nan_canonicalization: true, ..Default::default() }; let module = compile_with_config(&module.into_vec(), config) @@ -774,6 +775,7 @@ mod tests { simd: true, threads: true, }, + nan_canonicalization: true, ..Default::default() }; compile_with_config(&module.into_vec(), config) @@ -826,6 +828,7 @@ mod tests { simd: true, threads: true, }, + nan_canonicalization: true, ..Default::default() }; compile_with_config(&module.into_vec(), config) @@ -877,6 +880,7 @@ mod tests { simd: true, threads: true, }, + nan_canonicalization: true, ..Default::default() }; let module = compile_with_config(&module.into_vec(), config) @@ -972,6 +976,7 @@ mod tests { simd: true, threads: true, }, + nan_canonicalization: true, ..Default::default() }; let module = compile_with_config(&module.into_vec(), config) From da07ed5011a816a3c52beed107c2ca80ed5657e2 Mon Sep 17 00:00:00 2001 From: losfair Date: Mon, 16 Mar 2020 23:40:28 +0800 Subject: [PATCH 03/19] Add clif/llvm NaN spectest failure excludes. --- lib/spectests/spectests/wasmer.wast | 96 ++++++++++++++++++++++++++++- lib/spectests/tests/excludes.txt | 30 +++++++++ 2 files changed, 125 insertions(+), 1 deletion(-) diff --git a/lib/spectests/spectests/wasmer.wast b/lib/spectests/spectests/wasmer.wast index cf3841f9bfc..24183e2ecaf 100644 --- a/lib/spectests/spectests/wasmer.wast +++ b/lib/spectests/spectests/wasmer.wast @@ -28,6 +28,100 @@ (call_indirect (type $out-i32)) (return) ) + + ;; NaN canonicalization tests. + ;; Things that are covered by spectests canonicalization (`fabs`, `fneg`, `fcopysign`, `reinterpet`, `const`) won't be duplicated here. + + (func (export "nan-canonicalization-f32-add") (param i32) (result i32) + (i32.reinterpret_f32 (f32.add (f32.reinterpret_i32 (get_local 0)) (f32.const 0))) + ) + (func (export "nan-canonicalization-f32-sub") (param i32) (result i32) + (i32.reinterpret_f32 (f32.sub (f32.reinterpret_i32 (get_local 0)) (f32.const 0))) + ) + (func (export "nan-canonicalization-f32-mul") (param i32) (result i32) + (i32.reinterpret_f32 (f32.mul (f32.reinterpret_i32 (get_local 0)) (f32.const 0))) + ) + (func (export "nan-canonicalization-f32-div") (param i32) (result i32) + (i32.reinterpret_f32 (f32.div (f32.reinterpret_i32 (get_local 0)) (f32.const 1))) + ) + (func (export "nan-canonicalization-f32-max") (param i32) (result i32) + (i32.reinterpret_f32 (f32.max (f32.reinterpret_i32 (get_local 0)) (f32.const 1))) + ) + (func (export "nan-canonicalization-f32-min") (param i32) (result i32) + (i32.reinterpret_f32 (f32.min (f32.reinterpret_i32 (get_local 0)) (f32.const 1))) + ) + (func (export "nan-canonicalization-f32-nearest") (param i32) (result i32) + (i32.reinterpret_f32 (f32.nearest (f32.reinterpret_i32 (get_local 0)))) + ) + (func (export "nan-canonicalization-f32-floor") (param i32) (result i32) + (i32.reinterpret_f32 (f32.floor (f32.reinterpret_i32 (get_local 0)))) + ) + (func (export "nan-canonicalization-f32-ceil") (param i32) (result i32) + (i32.reinterpret_f32 (f32.ceil (f32.reinterpret_i32 (get_local 0)))) + ) + (func (export "nan-canonicalization-f32-trunc") (param i32) (result i32) + (i32.reinterpret_f32 (f32.trunc (f32.reinterpret_i32 (get_local 0)))) + ) + (func (export "nan-canonicalization-f32-sqrt") (param i32) (result i32) + (i32.reinterpret_f32 (f32.sqrt (f32.reinterpret_i32 (get_local 0)))) + ) + + (func (export "nan-canonicalization-f64-add") (param i64) (result i64) + (i64.reinterpret_f64 (f64.add (f64.reinterpret_i64 (get_local 0)) (f64.const 0))) + ) + (func (export "nan-canonicalization-f64-sub") (param i64) (result i64) + (i64.reinterpret_f64 (f64.sub (f64.reinterpret_i64 (get_local 0)) (f64.const 0))) + ) + (func (export "nan-canonicalization-f64-mul") (param i64) (result i64) + (i64.reinterpret_f64 (f64.mul (f64.reinterpret_i64 (get_local 0)) (f64.const 0))) + ) + (func (export "nan-canonicalization-f64-div") (param i64) (result i64) + (i64.reinterpret_f64 (f64.div (f64.reinterpret_i64 (get_local 0)) (f64.const 1))) + ) + (func (export "nan-canonicalization-f64-max") (param i64) (result i64) + (i64.reinterpret_f64 (f64.max (f64.reinterpret_i64 (get_local 0)) (f64.const 1))) + ) + (func (export "nan-canonicalization-f64-min") (param i64) (result i64) + (i64.reinterpret_f64 (f64.min (f64.reinterpret_i64 (get_local 0)) (f64.const 1))) + ) + (func (export "nan-canonicalization-f64-nearest") (param i64) (result i64) + (i64.reinterpret_f64 (f64.nearest (f64.reinterpret_i64 (get_local 0)))) + ) + (func (export "nan-canonicalization-f64-floor") (param i64) (result i64) + (i64.reinterpret_f64 (f64.floor (f64.reinterpret_i64 (get_local 0)))) + ) + (func (export "nan-canonicalization-f64-ceil") (param i64) (result i64) + (i64.reinterpret_f64 (f64.ceil (f64.reinterpret_i64 (get_local 0)))) + ) + (func (export "nan-canonicalization-f64-trunc") (param i64) (result i64) + (i64.reinterpret_f64 (f64.trunc (f64.reinterpret_i64 (get_local 0)))) + ) + (func (export "nan-canonicalization-f64-sqrt") (param i64) (result i64) + (i64.reinterpret_f64 (f64.sqrt (f64.reinterpret_i64 (get_local 0)))) + ) ) -(assert_return (invoke "call-indirect-from-spilled-stack") (i32.const 0x132)) \ No newline at end of file +(assert_return (invoke "call-indirect-from-spilled-stack") (i32.const 0x132)) +(assert_return (invoke "nan-canonicalization-f32-add" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-sub" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-mul" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-div" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-max" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-min" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-nearest" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-floor" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-ceil" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-trunc" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-sqrt" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) + +(assert_return (invoke "nan-canonicalization-f64-add" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-sub" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-mul" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-div" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-max" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-min" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-nearest" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-floor" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-ceil" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-trunc" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-sqrt" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) diff --git a/lib/spectests/tests/excludes.txt b/lib/spectests/tests/excludes.txt index 8d1ccdaa7a9..d64f5e25936 100644 --- a/lib/spectests/tests/excludes.txt +++ b/lib/spectests/tests/excludes.txt @@ -266,6 +266,30 @@ clif:fail:data.wast:266:windows # AssertUnlinkable - caught panic Any clif:fail:data.wast:186:windows # AssertUnlinkable - caught panic Any clif:fail:data.wast:194:windows # AssertUnlinkable - caught panic Any +# NaN canonicalization +clif:fail:wasmer.wast:105 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +clif:fail:wasmer.wast:106 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +clif:fail:wasmer.wast:107 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +clif:fail:wasmer.wast:108 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +clif:fail:wasmer.wast:109 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +clif:fail:wasmer.wast:110 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +clif:fail:wasmer.wast:111 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +clif:fail:wasmer.wast:112 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +clif:fail:wasmer.wast:113 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +clif:fail:wasmer.wast:114 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +clif:fail:wasmer.wast:115 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +clif:fail:wasmer.wast:117 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +clif:fail:wasmer.wast:118 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +clif:fail:wasmer.wast:119 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +clif:fail:wasmer.wast:120 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +clif:fail:wasmer.wast:121 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +clif:fail:wasmer.wast:122 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +clif:fail:wasmer.wast:123 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +clif:fail:wasmer.wast:124 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +clif:fail:wasmer.wast:125 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +clif:fail:wasmer.wast:126 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +clif:fail:wasmer.wast:127 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") + # LLVM llvm:fail:linking.wast:388 # AssertReturn - Call failed RuntimeError: WebAssembly trap occurred during runtime: incorrect `call_indirect` signature @@ -302,6 +326,12 @@ llvm:skip:unwind.wast:*:windows llvm:skip:simd.wast:352:unix # Module - caught panic Any llvm:skip:simd_binaryen.wast:*:unix # Module - caught panic Any +# NaN canonicalization +llvm:fail:wasmer.wast:111 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +llvm:fail:wasmer.wast:114 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +llvm:fail:wasmer.wast:123 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +llvm:fail:wasmer.wast:126 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") + # Singlepass singlepass:skip:simd.wast:* # SIMD not implemented singlepass:skip:simd_binaryen.wast:* # SIMD not implemented From ae9b321365e532c02cc93d3d84e593ac1ee0634e Mon Sep 17 00:00:00 2001 From: losfair Date: Mon, 16 Mar 2020 23:41:44 +0800 Subject: [PATCH 04/19] Update changelog. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fcfbd3eab48..c1f21bc94f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## **[Unreleased]** +- [#1303](https://github.com/wasmerio/wasmer/pull/1303) NaN canonicalization for singlepass backend. - [#1301](https://github.com/wasmerio/wasmer/pull/1301) Update supported stable Rust version to 1.41.1. - [#1285](https://github.com/wasmerio/wasmer/pull/1285) Greatly improve errors in `wasmer-interface-types` - [#1283](https://github.com/wasmerio/wasmer/pull/1283) Workaround for floating point arguments and return values in `DynamicFunc`s. From 29b5223b3eb21f2531e269fbaf99bcedb3581793 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Mon, 16 Mar 2020 11:36:17 -0700 Subject: [PATCH 05/19] Canonicalize NANs produced by f.trunc and f.nearby. --- lib/llvm-backend/src/code.rs | 8 ++++---- lib/spectests/tests/excludes.txt | 8 +------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/lib/llvm-backend/src/code.rs b/lib/llvm-backend/src/code.rs index cebde5daefa..822447076d5 100644 --- a/lib/llvm-backend/src/code.rs +++ b/lib/llvm-backend/src/code.rs @@ -3701,7 +3701,7 @@ impl<'ctx> FunctionCodeGenerator for LLVMFunctionCodeGenerator<'ct .try_as_basic_value() .left() .unwrap(); - state.push1_extra(res, i); + state.push1_extra(res, i | ExtraInfo::pending_f32_nan()); } Operator::F64Trunc => { let (v, i) = state.pop1_extra()?; @@ -3714,7 +3714,7 @@ impl<'ctx> FunctionCodeGenerator for LLVMFunctionCodeGenerator<'ct .try_as_basic_value() .left() .unwrap(); - state.push1_extra(res, i); + state.push1_extra(res, i | ExtraInfo::pending_f64_nan()); } Operator::F32Nearest => { let (v, i) = state.pop1_extra()?; @@ -3727,7 +3727,7 @@ impl<'ctx> FunctionCodeGenerator for LLVMFunctionCodeGenerator<'ct .try_as_basic_value() .left() .unwrap(); - state.push1_extra(res, i); + state.push1_extra(res, i | ExtraInfo::pending_f32_nan()); } Operator::F64Nearest => { let (v, i) = state.pop1_extra()?; @@ -3740,7 +3740,7 @@ impl<'ctx> FunctionCodeGenerator for LLVMFunctionCodeGenerator<'ct .try_as_basic_value() .left() .unwrap(); - state.push1_extra(res, i); + state.push1_extra(res, i | ExtraInfo::pending_f64_nan()); } Operator::F32Abs => { let (v, i) = state.pop1_extra()?; diff --git a/lib/spectests/tests/excludes.txt b/lib/spectests/tests/excludes.txt index d64f5e25936..88568745532 100644 --- a/lib/spectests/tests/excludes.txt +++ b/lib/spectests/tests/excludes.txt @@ -326,12 +326,6 @@ llvm:skip:unwind.wast:*:windows llvm:skip:simd.wast:352:unix # Module - caught panic Any llvm:skip:simd_binaryen.wast:*:unix # Module - caught panic Any -# NaN canonicalization -llvm:fail:wasmer.wast:111 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") -llvm:fail:wasmer.wast:114 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") -llvm:fail:wasmer.wast:123 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") -llvm:fail:wasmer.wast:126 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") - # Singlepass singlepass:skip:simd.wast:* # SIMD not implemented singlepass:skip:simd_binaryen.wast:* # SIMD not implemented @@ -515,4 +509,4 @@ singlepass:fail:traps.wast:53:*:aarch64 # AssertTrap - expected trap, got [] singlepass:fail:traps.wast:54:*:aarch64 # AssertTrap - expected trap, got [] singlepass:fail:traps.wast:55:*:aarch64 # AssertTrap - expected trap, got [] singlepass:fail:traps.wast:56:*:aarch64 # AssertTrap - expected trap, got [] -singlepass:fail:traps.wast:57:*:aarch64 # AssertTrap - expected trap, got [] \ No newline at end of file +singlepass:fail:traps.wast:57:*:aarch64 # AssertTrap - expected trap, got [] From 3ee7f43b1c0a0cbaeb0fa8fdd4eca3b4e0e0802a Mon Sep 17 00:00:00 2001 From: losfair Date: Tue, 17 Mar 2020 13:09:22 +0800 Subject: [PATCH 06/19] Enable nan canonicalization for cranelift backend. --- lib/clif-backend/src/code.rs | 30 +++++++++++++++++++++--------- lib/clif-backend/src/lib.rs | 11 ++++++++--- lib/clif-backend/src/trampoline.rs | 6 ++---- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/lib/clif-backend/src/code.rs b/lib/clif-backend/src/code.rs index ada71d0a394..3b9bdf3940c 100644 --- a/lib/clif-backend/src/code.rs +++ b/lib/clif-backend/src/code.rs @@ -19,7 +19,7 @@ use std::mem; use std::sync::{Arc, RwLock}; use wasmer_runtime_core::error::CompileError; use wasmer_runtime_core::{ - backend::{CacheGen, Token}, + backend::{CacheGen, CompilerConfig, Token}, cache::{Artifact, Error as CacheError}, codegen::*, memory::MemoryType, @@ -36,7 +36,7 @@ use wasmparser::Type as WpType; static BACKEND_ID: &str = "cranelift"; pub struct CraneliftModuleCodeGenerator { - isa: Box, + isa: Option>, signatures: Option>>, pub clif_signatures: Map, function_signatures: Option>>, @@ -47,9 +47,8 @@ impl ModuleCodeGenerator for CraneliftModuleCodeGenerator { fn new() -> Self { - let isa = get_isa(); CraneliftModuleCodeGenerator { - isa, + isa: None, clif_signatures: Map::new(), functions: vec![], function_signatures: None, @@ -100,7 +99,7 @@ impl ModuleCodeGenerator position: Position::default(), func_env: FunctionEnvironment { module_info: Arc::clone(&module_info), - target_config: self.isa.frontend_config().clone(), + target_config: self.isa.as_ref().unwrap().frontend_config().clone(), clif_signatures: self.clif_signatures.clone(), }, loc, @@ -162,9 +161,9 @@ impl ModuleCodeGenerator } let (func_resolver_builder, debug_metadata, handler_data) = - FuncResolverBuilder::new(&*self.isa, func_bodies, module_info)?; + FuncResolverBuilder::new(&**self.isa.as_ref().unwrap(), func_bodies, module_info)?; - let trampolines = Arc::new(Trampolines::new(&*self.isa, module_info)); + let trampolines = Arc::new(Trampolines::new(&**self.isa.as_ref().unwrap(), module_info)); let signatures_empty = Map::new(); let signatures = if self.signatures.is_some() { @@ -191,9 +190,19 @@ impl ModuleCodeGenerator )) } + fn feed_compiler_config(&mut self, config: &CompilerConfig) -> Result<(), CodegenError> { + self.isa = Some(get_isa(Some(config))); + Ok(()) + } + fn feed_signatures(&mut self, signatures: Map) -> Result<(), CodegenError> { self.signatures = Some(Arc::new(signatures)); - let call_conv = self.isa.frontend_config().default_call_conv; + let call_conv = self + .isa + .as_ref() + .unwrap() + .frontend_config() + .default_call_conv; for (_sig_idx, func_sig) in self.signatures.as_ref().unwrap().iter() { self.clif_signatures .push(convert_func_sig(func_sig, call_conv)); @@ -1302,7 +1311,10 @@ fn generate_signature( } fn pointer_type(mcg: &CraneliftModuleCodeGenerator) -> ir::Type { - ir::Type::int(u16::from(mcg.isa.frontend_config().pointer_bits())).unwrap() + ir::Type::int(u16::from( + mcg.isa.as_ref().unwrap().frontend_config().pointer_bits(), + )) + .unwrap() } /// Declare local variables for the signature parameters that correspond to WebAssembly locals. diff --git a/lib/clif-backend/src/lib.rs b/lib/clif-backend/src/lib.rs index 95fd334a4ad..c4fb65736c3 100644 --- a/lib/clif-backend/src/lib.rs +++ b/lib/clif-backend/src/lib.rs @@ -29,6 +29,7 @@ use cranelift_codegen::{ settings::{self, Configurable}, }; use target_lexicon::Triple; +use wasmer_runtime_core::{backend::CompilerConfig, codegen::SimpleStreamingCompilerGen}; #[macro_use] extern crate serde_derive; @@ -36,7 +37,7 @@ extern crate serde_derive; extern crate rayon; extern crate serde; -fn get_isa() -> Box { +fn get_isa(config: Option<&CompilerConfig>) -> Box { let flags = { let mut builder = settings::builder(); builder.set("opt_level", "speed_and_size").unwrap(); @@ -48,6 +49,12 @@ fn get_isa() -> Box { builder.set("enable_verifier", "false").unwrap(); } + if let Some(config) = config { + if config.nan_canonicalization { + builder.set("enable_nan_canonicalization", "true").unwrap(); + } + } + let flags = settings::Flags::new(builder); debug_assert_eq!(flags.opt_level(), settings::OptLevel::SpeedAndSize); flags @@ -58,8 +65,6 @@ fn get_isa() -> Box { /// The current version of this crate pub const VERSION: &str = env!("CARGO_PKG_VERSION"); -use wasmer_runtime_core::codegen::SimpleStreamingCompilerGen; - /// Streaming compiler implementation for the Cranelift backed. Compiles web assembly binary into /// machine code. pub type CraneliftCompiler = SimpleStreamingCompilerGen< diff --git a/lib/clif-backend/src/trampoline.rs b/lib/clif-backend/src/trampoline.rs index 70854c1ea90..6b6c5d7ea08 100644 --- a/lib/clif-backend/src/trampoline.rs +++ b/lib/clif-backend/src/trampoline.rs @@ -212,8 +212,7 @@ fn wasm_ty_to_clif(ty: Type) -> ir::types::Type { } fn generate_trampoline_signature() -> ir::Signature { - let isa = super::get_isa(); - let call_convention = isa.default_call_conv(); + let call_convention = super::get_isa(None).default_call_conv(); let mut sig = ir::Signature::new(call_convention); let ptr_param = ir::AbiParam { @@ -229,8 +228,7 @@ fn generate_trampoline_signature() -> ir::Signature { } fn generate_export_signature(func_sig: &FuncSig) -> ir::Signature { - let isa = super::get_isa(); - let call_convention = isa.default_call_conv(); + let call_convention = super::get_isa(None).default_call_conv(); let mut export_clif_sig = ir::Signature::new(call_convention); let func_sig_iter = func_sig.params().iter().map(|wasm_ty| ir::AbiParam { From 4357c1504616f1989c67d4e81192ab305f211bb4 Mon Sep 17 00:00:00 2001 From: losfair Date: Tue, 17 Mar 2020 13:52:11 +0800 Subject: [PATCH 07/19] Fix missing canonicalizations. --- lib/singlepass-backend/src/codegen_x64.rs | 802 ++++++++++++---------- 1 file changed, 422 insertions(+), 380 deletions(-) diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index dbfb66c431a..8782e0cc48c 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -294,6 +294,15 @@ enum CanonicalizeType { F64, } +impl CanonicalizeType { + fn to_size(&self) -> Size { + match self { + CanonicalizeType::F32 => Size::S32, + CanonicalizeType::F64 => Size::S64, + } + } +} + trait PopMany { fn peek1(&self) -> Result<&T, CodegenError>; fn pop1(&mut self) -> Result; @@ -330,6 +339,19 @@ impl PopMany for Vec { } } +trait WpTypeExt { + fn is_float(&self) -> bool; +} + +impl WpTypeExt for WpType { + fn is_float(&self) -> bool { + match self { + WpType::F32 | WpType::F64 => true, + _ => false, + } + } +} + enum FuncPtrInner {} #[repr(transparent)] #[derive(Copy, Clone, Debug)] @@ -2784,6 +2806,8 @@ impl FunctionCodeGenerator for X64FunctionCode { module_info: &ModuleInfo, _source_loc: u32, ) -> Result<(), CodegenError> { + assert!(self.fp_stack.len() <= self.value_stack.len()); + let a = self.assembler.as_mut().unwrap(); match ev { @@ -2951,12 +2975,9 @@ impl FunctionCodeGenerator for X64FunctionCode { Location::GPR(tmp), ); let ty = type_to_wp_type(module_info.globals[local_index].desc.ty); - match ty { - WpType::F32 | WpType::F64 => { - self.fp_stack - .push(FloatValue::new(self.value_stack.len() - 1)); - } - _ => {} + if ty.is_float() { + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); } self.machine.acquire_locations( a, @@ -2979,12 +3000,9 @@ impl FunctionCodeGenerator for X64FunctionCode { Location::GPR(tmp), ); let ty = type_to_wp_type(module_info.imported_globals[import_index].1.ty); - match ty { - WpType::F32 | WpType::F64 => { - self.fp_stack - .push(FloatValue::new(self.value_stack.len() - 1)); - } - _ => {} + if ty.is_float() { + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); } self.machine.acquire_locations( a, @@ -3053,36 +3071,24 @@ impl FunctionCodeGenerator for X64FunctionCode { Location::Memory(tmp, (global_index as i32) * 8), Location::GPR(tmp), ); - match ty { - WpType::F32 | WpType::F64 => { - let fp = self.fp_stack.pop1()?; - if a.arch_supports_canonicalize_nan() - && self.config.nan_canonicalization - && fp.canonicalization.is_some() - { - Self::canonicalize_nan( - a, - &mut self.machine, - match ty { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, - loc, - Location::Memory(tmp, LocalGlobal::offset_data() as i32), - ); - } else { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::Memory(tmp, LocalGlobal::offset_data() as i32), - ); - } - } - _ => { + if ty.is_float() { + let fp = self.fp_stack.pop1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match ty { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::Memory(tmp, LocalGlobal::offset_data() as i32), + ); + } else { Self::emit_relaxed_binop( a, &mut self.machine, @@ -3092,6 +3098,15 @@ impl FunctionCodeGenerator for X64FunctionCode { Location::Memory(tmp, LocalGlobal::offset_data() as i32), ); } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::Memory(tmp, LocalGlobal::offset_data() as i32), + ); } self.machine.release_temp_gpr(tmp); } @@ -3111,12 +3126,9 @@ impl FunctionCodeGenerator for X64FunctionCode { ret, ); self.value_stack.push(ret); - match self.local_types[local_index] { - WpType::F32 | WpType::F64 => { - self.fp_stack - .push(FloatValue::new(self.value_stack.len() - 1)); - } - _ => {} + if self.local_types[local_index].is_float() { + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); } } Operator::LocalSet { local_index } => { @@ -3124,36 +3136,24 @@ impl FunctionCodeGenerator for X64FunctionCode { let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); - match self.local_types[local_index] { - WpType::F32 | WpType::F64 => { - let fp = self.fp_stack.pop1()?; - if a.arch_supports_canonicalize_nan() - && self.config.nan_canonicalization - && fp.canonicalization.is_some() - { - Self::canonicalize_nan( - a, - &mut self.machine, - match self.local_types[local_index] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, - loc, - self.locals[local_index], - ); - } else { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - self.locals[local_index], - ); - } - } - _ => { + if self.local_types[local_index].is_float() { + let fp = self.fp_stack.pop1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match self.local_types[local_index] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + self.locals[local_index], + ); + } else { Self::emit_relaxed_binop( a, &mut self.machine, @@ -3163,42 +3163,39 @@ impl FunctionCodeGenerator for X64FunctionCode { self.locals[local_index], ); } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + self.locals[local_index], + ); } } Operator::LocalTee { local_index } => { let local_index = local_index as usize; let loc = *self.value_stack.last().unwrap(); - match self.local_types[local_index] { - WpType::F32 | WpType::F64 => { - let fp = self.fp_stack.peek1()?; - if a.arch_supports_canonicalize_nan() - && self.config.nan_canonicalization - && fp.canonicalization.is_some() - { - Self::canonicalize_nan( - a, - &mut self.machine, - match self.local_types[local_index] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, - loc, - self.locals[local_index], - ); - } else { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - self.locals[local_index], - ); - } - } - _ => { + if self.local_types[local_index].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match self.local_types[local_index] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + self.locals[local_index], + ); + } else { Self::emit_relaxed_binop( a, &mut self.machine, @@ -3208,6 +3205,15 @@ impl FunctionCodeGenerator for X64FunctionCode { self.locals[local_index], ); } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + self.locals[local_index], + ); } } Operator::I32Const { value } => { @@ -4600,8 +4606,6 @@ impl FunctionCodeGenerator for X64FunctionCode { } Operator::F32Copysign => { - // Preserve canonicalization state. - let loc_b = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let loc_a = @@ -4613,10 +4617,31 @@ impl FunctionCodeGenerator for X64FunctionCode { )[0]; self.value_stack.push(ret); + let (fp_src1, fp_src2) = self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + let tmp1 = self.machine.acquire_temp_gpr().unwrap(); let tmp2 = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S32, loc_a, Location::GPR(tmp1)); - a.emit_mov(Size::S32, loc_b, Location::GPR(tmp2)); + + if a.arch_supports_canonicalize_nan() && self.config.nan_canonicalization { + for (fp, loc, tmp) in [(fp_src1, loc_a, tmp1), (fp_src2, loc_b, tmp2)].iter() { + match fp.canonicalization { + Some(_) => { + Self::canonicalize_nan( + a, + &mut self.machine, + Size::S32, + *loc, + Location::GPR(*tmp), + ); + } + None => { + a.emit_mov(Size::S32, *loc, Location::GPR(*tmp)); + } + } + } + } a.emit_and( Size::S32, Location::Imm32(0x7fffffffu32), @@ -5120,8 +5145,6 @@ impl FunctionCodeGenerator for X64FunctionCode { } Operator::F64Copysign => { - // Preserve canonicalization state. - let loc_b = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let loc_a = @@ -5133,12 +5156,33 @@ impl FunctionCodeGenerator for X64FunctionCode { )[0]; self.value_stack.push(ret); + let (fp_src1, fp_src2) = self.fp_stack.pop2()?; + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + let tmp1 = self.machine.acquire_temp_gpr().unwrap(); let tmp2 = self.machine.acquire_temp_gpr().unwrap(); - let c = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S64, loc_a, Location::GPR(tmp1)); - a.emit_mov(Size::S64, loc_b, Location::GPR(tmp2)); + if a.arch_supports_canonicalize_nan() && self.config.nan_canonicalization { + for (fp, loc, tmp) in [(fp_src1, loc_a, tmp1), (fp_src2, loc_b, tmp2)].iter() { + match fp.canonicalization { + Some(_) => { + Self::canonicalize_nan( + a, + &mut self.machine, + Size::S64, + *loc, + Location::GPR(*tmp), + ); + } + None => { + a.emit_mov(Size::S64, *loc, Location::GPR(*tmp)); + } + } + } + } + + let c = self.machine.acquire_temp_gpr().unwrap(); a.emit_mov( Size::S64, @@ -6861,15 +6905,12 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - match return_types[0] { - WpType::F32 | WpType::F64 => { - a.emit_mov(Size::S64, Location::XMM(XMM::XMM0), ret); - self.fp_stack - .push(FloatValue::new(self.value_stack.len() - 1)); - } - _ => { - a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); - } + if return_types[0].is_float() { + a.emit_mov(Size::S64, Location::XMM(XMM::XMM0), ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + } else { + a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); } } } @@ -7017,15 +7058,12 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - match return_types[0] { - WpType::F32 | WpType::F64 => { - a.emit_mov(Size::S64, Location::XMM(XMM::XMM0), ret); - self.fp_stack - .push(FloatValue::new(self.value_stack.len() - 1)); - } - _ => { - a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); - } + if return_types[0].is_float() { + a.emit_mov(Size::S64, Location::XMM(XMM::XMM0), ret); + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + } else { + a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret); } } } @@ -7074,36 +7112,24 @@ impl FunctionCodeGenerator for X64FunctionCode { if !was_unreachable && frame.returns.len() > 0 { let loc = *self.value_stack.last().unwrap(); - match frame.returns[0] { - WpType::F32 | WpType::F64 => { - let fp = self.fp_stack.peek1()?; - if a.arch_supports_canonicalize_nan() - && self.config.nan_canonicalization - && fp.canonicalization.is_some() - { - Self::canonicalize_nan( - a, - &mut self.machine, - match frame.returns[0] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, - loc, - Location::GPR(GPR::RAX), - ); - } else { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::GPR(GPR::RAX), - ); - } - } - _ => { + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { Self::emit_relaxed_binop( a, &mut self.machine, @@ -7113,6 +7139,15 @@ impl FunctionCodeGenerator for X64FunctionCode { Location::GPR(GPR::RAX), ); } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); } } @@ -7177,11 +7212,7 @@ impl FunctionCodeGenerator for X64FunctionCode { if a.arch_supports_canonicalize_nan() && self.config.nan_canonicalization => { - let sz = match fp { - CanonicalizeType::F32 => Size::S32, - CanonicalizeType::F64 => Size::S64, - }; - Self::canonicalize_nan(a, &mut self.machine, sz, v_a, ret); + Self::canonicalize_nan(a, &mut self.machine, fp.to_size(), v_a, ret); } _ => { if v_a != ret { @@ -7203,11 +7234,7 @@ impl FunctionCodeGenerator for X64FunctionCode { if a.arch_supports_canonicalize_nan() && self.config.nan_canonicalization => { - let sz = match fp { - CanonicalizeType::F32 => Size::S32, - CanonicalizeType::F64 => Size::S64, - }; - Self::canonicalize_nan(a, &mut self.machine, sz, v_b, ret); + Self::canonicalize_nan(a, &mut self.machine, fp.to_size(), v_b, ret); } _ => { if v_b != ret { @@ -7630,6 +7657,8 @@ impl FunctionCodeGenerator for X64FunctionCode { get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let target_addr = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let fp = self.fp_stack.pop1()?; + let config_nan_canonicalization = self.config.nan_canonicalization; Self::emit_memory_op( module_info, @@ -7642,17 +7671,31 @@ impl FunctionCodeGenerator for X64FunctionCode { false, 4, |a, m, addr| { - Self::emit_relaxed_binop( - a, - m, - Assembler::emit_mov, - Size::S32, - target_value, - Location::Memory(addr, 0), - ); - Ok(()) - }, - )?; + if !a.arch_supports_canonicalize_nan() + || !config_nan_canonicalization + || fp.canonicalization.is_none() + { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S32, + target_value, + Location::Memory(addr, 0), + ); + } else { + Self::canonicalize_nan( + a, + m, + Size::S32, + target_value, + Location::Memory(addr, 0), + ); + } + + Ok(()) + }, + )?; } Operator::I32Store8 { ref memarg } => { let target_value = @@ -8032,6 +8075,8 @@ impl FunctionCodeGenerator for X64FunctionCode { get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let target_addr = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let fp = self.fp_stack.pop1()?; + let config_nan_canonicalization = self.config.nan_canonicalization; Self::emit_memory_op( module_info, @@ -8044,14 +8089,27 @@ impl FunctionCodeGenerator for X64FunctionCode { false, 8, |a, m, addr| { - Self::emit_relaxed_binop( - a, - m, - Assembler::emit_mov, - Size::S64, - target_value, - Location::Memory(addr, 0), - ); + if !a.arch_supports_canonicalize_nan() + || !config_nan_canonicalization + || fp.canonicalization.is_none() + { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S64, + target_value, + Location::Memory(addr, 0), + ); + } else { + Self::canonicalize_nan( + a, + m, + Size::S64, + target_value, + Location::Memory(addr, 0), + ); + } Ok(()) }, )?; @@ -8162,36 +8220,24 @@ impl FunctionCodeGenerator for X64FunctionCode { }); } let loc = *self.value_stack.last().unwrap(); - match frame.returns[0] { - WpType::F32 | WpType::F64 => { - let fp = self.fp_stack.peek1()?; - if a.arch_supports_canonicalize_nan() - && self.config.nan_canonicalization - && fp.canonicalization.is_some() - { - Self::canonicalize_nan( - a, - &mut self.machine, - match frame.returns[0] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, - loc, - Location::GPR(GPR::RAX), - ); - } else { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::GPR(GPR::RAX), - ); - } - } - _ => { + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { Self::emit_relaxed_binop( a, &mut self.machine, @@ -8201,6 +8247,15 @@ impl FunctionCodeGenerator for X64FunctionCode { Location::GPR(GPR::RAX), ); } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); } } let released = &self.value_stack[frame.value_stack_depth..]; @@ -8219,38 +8274,35 @@ impl FunctionCodeGenerator for X64FunctionCode { } let loc = *self.value_stack.last().unwrap(); - match frame.returns[0] { - WpType::F32 | WpType::F64 => { - let fp = self.fp_stack.peek1()?; - if a.arch_supports_canonicalize_nan() - && self.config.nan_canonicalization - && fp.canonicalization.is_some() - { - Self::canonicalize_nan( - a, - &mut self.machine, - match frame.returns[0] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, - loc, - Location::GPR(GPR::RAX), - ); - } else { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::GPR(GPR::RAX), - ); - } - } - _ => { - a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); } + } else { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); } } @@ -8282,38 +8334,35 @@ impl FunctionCodeGenerator for X64FunctionCode { }); } let loc = *self.value_stack.last().unwrap(); - match frame.returns[0] { - WpType::F32 | WpType::F64 => { - let fp = self.fp_stack.peek1()?; - if a.arch_supports_canonicalize_nan() - && self.config.nan_canonicalization - && fp.canonicalization.is_some() - { - Self::canonicalize_nan( - a, - &mut self.machine, - match frame.returns[0] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, - loc, - Location::GPR(GPR::RAX), - ); - } else { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::GPR(GPR::RAX), - ); - } - } - _ => { - a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); } + } else { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); } } let released = &self.value_stack[frame.value_stack_depth..]; @@ -8365,38 +8414,35 @@ impl FunctionCodeGenerator for X64FunctionCode { }); } let loc = *self.value_stack.last().unwrap(); - match frame.returns[0] { - WpType::F32 | WpType::F64 => { - let fp = self.fp_stack.peek1()?; - if a.arch_supports_canonicalize_nan() - && self.config.nan_canonicalization - && fp.canonicalization.is_some() - { - Self::canonicalize_nan( - a, - &mut self.machine, - match frame.returns[0] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, - loc, - Location::GPR(GPR::RAX), - ); - } else { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::GPR(GPR::RAX), - ); - } - } - _ => { - a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); } + } else { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); } } let released = &self.value_stack[frame.value_stack_depth..]; @@ -8415,61 +8461,7 @@ impl FunctionCodeGenerator for X64FunctionCode { }); } let loc = *self.value_stack.last().unwrap(); - match frame.returns[0] { - WpType::F32 | WpType::F64 => { - let fp = self.fp_stack.peek1()?; - if a.arch_supports_canonicalize_nan() - && self.config.nan_canonicalization - && fp.canonicalization.is_some() - { - Self::canonicalize_nan( - a, - &mut self.machine, - match frame.returns[0] { - WpType::F32 => Size::S32, - WpType::F64 => Size::S64, - _ => unreachable!(), - }, - loc, - Location::GPR(GPR::RAX), - ); - } else { - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::GPR(GPR::RAX), - ); - } - } - _ => { - a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); - } - } - } - let released = &self.value_stack[frame.value_stack_depth..]; - self.machine.release_locations_keep_state(a, released); - a.emit_jmp(Condition::None, frame.label); - } - - a.emit_label(table_label); - for x in table { - a.emit_jmp(Condition::None, x); - } - self.unreachable_depth = 1; - } - Operator::Drop => { - get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); - } - Operator::End => { - let frame = self.control_stack.pop().unwrap(); - - if !was_unreachable && frame.returns.len() > 0 { - let loc = *self.value_stack.last().unwrap(); - match frame.returns[0] { - WpType::F32 | WpType::F64 => { + if frame.returns[0].is_float() { let fp = self.fp_stack.peek1()?; if a.arch_supports_canonicalize_nan() && self.config.nan_canonicalization @@ -8496,8 +8488,52 @@ impl FunctionCodeGenerator for X64FunctionCode { Location::GPR(GPR::RAX), ); } + } else { + a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); } - _ => { + } + let released = &self.value_stack[frame.value_stack_depth..]; + self.machine.release_locations_keep_state(a, released); + a.emit_jmp(Condition::None, frame.label); + } + + a.emit_label(table_label); + for x in table { + a.emit_jmp(Condition::None, x); + } + self.unreachable_depth = 1; + } + Operator::Drop => { + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + if let Some(x) = self.fp_stack.last() { + if x.depth == self.value_stack.len() { + self.fp_stack.pop1()?; + } + } + } + Operator::End => { + let frame = self.control_stack.pop().unwrap(); + + if !was_unreachable && frame.returns.len() > 0 { + let loc = *self.value_stack.last().unwrap(); + if frame.returns[0].is_float() { + let fp = self.fp_stack.peek1()?; + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + match frame.returns[0] { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::RAX), + ); + } else { Self::emit_relaxed_binop( a, &mut self.machine, @@ -8507,6 +8543,15 @@ impl FunctionCodeGenerator for X64FunctionCode { Location::GPR(GPR::RAX), ); } + } else { + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(GPR::RAX), + ); } } @@ -8558,13 +8603,10 @@ impl FunctionCodeGenerator for X64FunctionCode { )[0]; a.emit_mov(Size::S64, Location::GPR(GPR::RAX), loc); self.value_stack.push(loc); - match frame.returns[0] { - WpType::F32 | WpType::F64 => { - self.fp_stack - .push(FloatValue::new(self.value_stack.len() - 1)); - // we already canonicalized at the `Br*` instruction or here previously. - } - _ => {} + if frame.returns[0].is_float() { + self.fp_stack + .push(FloatValue::new(self.value_stack.len() - 1)); + // we already canonicalized at the `Br*` instruction or here previously. } } } From e0538d32fa402b918fc6160eef7a6f2b2d8078d3 Mon Sep 17 00:00:00 2001 From: losfair Date: Tue, 17 Mar 2020 13:52:42 +0800 Subject: [PATCH 08/19] Remove clif spectest excludes. --- lib/spectests/tests/excludes.txt | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/lib/spectests/tests/excludes.txt b/lib/spectests/tests/excludes.txt index 88568745532..7d099560b4b 100644 --- a/lib/spectests/tests/excludes.txt +++ b/lib/spectests/tests/excludes.txt @@ -266,30 +266,6 @@ clif:fail:data.wast:266:windows # AssertUnlinkable - caught panic Any clif:fail:data.wast:186:windows # AssertUnlinkable - caught panic Any clif:fail:data.wast:194:windows # AssertUnlinkable - caught panic Any -# NaN canonicalization -clif:fail:wasmer.wast:105 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") -clif:fail:wasmer.wast:106 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") -clif:fail:wasmer.wast:107 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") -clif:fail:wasmer.wast:108 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") -clif:fail:wasmer.wast:109 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") -clif:fail:wasmer.wast:110 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") -clif:fail:wasmer.wast:111 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") -clif:fail:wasmer.wast:112 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") -clif:fail:wasmer.wast:113 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") -clif:fail:wasmer.wast:114 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") -clif:fail:wasmer.wast:115 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") -clif:fail:wasmer.wast:117 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") -clif:fail:wasmer.wast:118 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") -clif:fail:wasmer.wast:119 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") -clif:fail:wasmer.wast:120 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") -clif:fail:wasmer.wast:121 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") -clif:fail:wasmer.wast:122 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") -clif:fail:wasmer.wast:123 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") -clif:fail:wasmer.wast:124 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") -clif:fail:wasmer.wast:125 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") -clif:fail:wasmer.wast:126 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") -clif:fail:wasmer.wast:127 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") - # LLVM llvm:fail:linking.wast:388 # AssertReturn - Call failed RuntimeError: WebAssembly trap occurred during runtime: incorrect `call_indirect` signature From 1ddf3a1c4e3fcc144ef88fb0b08276af83b9562b Mon Sep 17 00:00:00 2001 From: losfair Date: Tue, 17 Mar 2020 14:13:31 +0800 Subject: [PATCH 09/19] Add mem/local NaN tests. --- lib/spectests/spectests/wasmer.wast | 46 +++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/lib/spectests/spectests/wasmer.wast b/lib/spectests/spectests/wasmer.wast index 24183e2ecaf..4c3ca1bae0c 100644 --- a/lib/spectests/spectests/wasmer.wast +++ b/lib/spectests/spectests/wasmer.wast @@ -12,6 +12,8 @@ ) ) + (memory 1) + ;; https://github.com/wasmerio/wasmer/pull/1191 (func (export "call-indirect-from-spilled-stack") (result i32) (i64.add (i64.const 0) (i64.const 0)) @@ -65,6 +67,24 @@ (func (export "nan-canonicalization-f32-sqrt") (param i32) (result i32) (i32.reinterpret_f32 (f32.sqrt (f32.reinterpret_i32 (get_local 0)))) ) + (func (export "nan-canonicalization-f32-mem") (param i32) (result i32) + (f32.store (i32.const 0) (f32.reinterpret_i32 (get_local 0))) + (i32.reinterpret_f32 (f32.load (i32.const 0))) + ) + (func (export "nan-canonicalization-f32-mem-cncl") (param i32) (result i32) + (f32.store (i32.const 0) (f32.add (f32.reinterpret_i32 (get_local 0)) (f32.const 0))) + (i32.reinterpret_f32 (f32.load (i32.const 0))) + ) + (func (export "nan-canonicalization-f32-local") (param i32) (result i32) + (local f32) + (set_local 1 (f32.reinterpret_i32 (get_local 0))) + (i32.reinterpret_f32 (get_local 1)) + ) + (func (export "nan-canonicalization-f32-local-cncl") (param i32) (result i32) + (local f32) + (set_local 1 (f32.add (f32.reinterpret_i32 (get_local 0)) (f32.const 0))) + (i32.reinterpret_f32 (get_local 1)) + ) (func (export "nan-canonicalization-f64-add") (param i64) (result i64) (i64.reinterpret_f64 (f64.add (f64.reinterpret_i64 (get_local 0)) (f64.const 0))) @@ -99,6 +119,24 @@ (func (export "nan-canonicalization-f64-sqrt") (param i64) (result i64) (i64.reinterpret_f64 (f64.sqrt (f64.reinterpret_i64 (get_local 0)))) ) + (func (export "nan-canonicalization-f64-mem") (param i64) (result i64) + (f64.store (i32.const 0) (f64.reinterpret_i64 (get_local 0))) + (i64.reinterpret_f64 (f64.load (i32.const 0))) + ) + (func (export "nan-canonicalization-f64-mem-cncl") (param i64) (result i64) + (f64.store (i32.const 0) (f64.add (f64.reinterpret_i64 (get_local 0)) (f64.const 0))) + (i64.reinterpret_f64 (f64.load (i32.const 0))) + ) + (func (export "nan-canonicalization-f64-local") (param i64) (result i64) + (local f64) + (set_local 1 (f64.reinterpret_i64 (get_local 0))) + (i64.reinterpret_f64 (get_local 1)) + ) + (func (export "nan-canonicalization-f64-local-cncl") (param i64) (result i64) + (local f64) + (set_local 1 (f64.add (f64.reinterpret_i64 (get_local 0)) (f64.const 0))) + (i64.reinterpret_f64 (get_local 1)) + ) ) (assert_return (invoke "call-indirect-from-spilled-stack") (i32.const 0x132)) @@ -113,6 +151,10 @@ (assert_return (invoke "nan-canonicalization-f32-ceil" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) (assert_return (invoke "nan-canonicalization-f32-trunc" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) (assert_return (invoke "nan-canonicalization-f32-sqrt" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-mem" (i32.const 0x7fc00001)) (i32.const 0x7fc00001)) +(assert_return (invoke "nan-canonicalization-f32-mem-cncl" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-local" (i32.const 0x7fc00001)) (i32.const 0x7fc00001)) +(assert_return (invoke "nan-canonicalization-f32-local-cncl" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) (assert_return (invoke "nan-canonicalization-f64-add" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) (assert_return (invoke "nan-canonicalization-f64-sub" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) @@ -125,3 +167,7 @@ (assert_return (invoke "nan-canonicalization-f64-ceil" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) (assert_return (invoke "nan-canonicalization-f64-trunc" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) (assert_return (invoke "nan-canonicalization-f64-sqrt" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-mem" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000001)) +(assert_return (invoke "nan-canonicalization-f64-mem-cncl" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-local" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000001)) +(assert_return (invoke "nan-canonicalization-f64-local-cncl" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) \ No newline at end of file From 1d9f0c53e18fc82d85eb475cbc7064acb82e6b26 Mon Sep 17 00:00:00 2001 From: losfair Date: Wed, 18 Mar 2020 00:42:10 +0800 Subject: [PATCH 10/19] Style fixes and fix missing canonicalization at function call. --- lib/singlepass-backend/src/codegen_x64.rs | 46 ++++++++++++++++++----- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index 8782e0cc48c..e19ad958443 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -270,7 +270,8 @@ impl FloatValue { fn promote(self, depth: usize) -> FloatValue { FloatValue { canonicalization: match self.canonicalization { - Some(_) => Some(CanonicalizeType::F64), + Some(CanonicalizeType::F32) => Some(CanonicalizeType::F64), + Some(CanonicalizeType::F64) => panic!("cannot promote F64"), None => None, }, depth, @@ -280,7 +281,8 @@ impl FloatValue { fn demote(self, depth: usize) -> FloatValue { FloatValue { canonicalization: match self.canonicalization { - Some(_) => Some(CanonicalizeType::F32), + Some(CanonicalizeType::F64) => Some(CanonicalizeType::F32), + Some(CanonicalizeType::F32) => panic!("cannot demote F32"), None => None, }, depth, @@ -288,6 +290,8 @@ impl FloatValue { } } +/// Type of a pending canonicalization floating point value. +/// Sometimes we don't have the type information elsewhere and therefore we need to track it here. #[derive(Copy, Clone, Debug)] enum CanonicalizeType { F32, @@ -2685,9 +2689,7 @@ impl FunctionCodeGenerator for X64FunctionCode { } fn feed_local(&mut self, ty: WpType, n: usize, _loc: u32) -> Result<(), CodegenError> { - for _ in 0..n { - self.local_types.push(ty); - } + self.local_types.extend(iter::repeat(ty).take(n)); Ok(()) } @@ -6877,8 +6879,21 @@ impl FunctionCodeGenerator for X64FunctionCode { self.machine.release_locations_only_osr_state(params.len()); - while let Some(x) = self.fp_stack.last() { - if x.depth >= self.value_stack.len() { + while let Some(fp) = self.fp_stack.last() { + if fp.depth >= self.value_stack.len() { + let index = fp.depth - self.value_stack.len(); + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + fp.canonicalization.unwrap().to_size(), + params[index], + params[index], + ); + } self.fp_stack.pop().unwrap(); } else { break; @@ -6935,8 +6950,21 @@ impl FunctionCodeGenerator for X64FunctionCode { .collect(); self.machine.release_locations_only_regs(¶ms); - while let Some(x) = self.fp_stack.last() { - if x.depth >= self.value_stack.len() { + while let Some(fp) = self.fp_stack.last() { + if fp.depth >= self.value_stack.len() { + let index = fp.depth - self.value_stack.len(); + if a.arch_supports_canonicalize_nan() + && self.config.nan_canonicalization + && fp.canonicalization.is_some() + { + Self::canonicalize_nan( + a, + &mut self.machine, + fp.canonicalization.unwrap().to_size(), + params[index], + params[index], + ); + } self.fp_stack.pop().unwrap(); } else { break; From 72bc9f65312468449af82183ab1ff447d795f272 Mon Sep 17 00:00:00 2001 From: losfair Date: Wed, 18 Mar 2020 00:42:19 +0800 Subject: [PATCH 11/19] Add call canonicalization tests. --- lib/spectests/spectests/wasmer.wast | 46 +++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/lib/spectests/spectests/wasmer.wast b/lib/spectests/spectests/wasmer.wast index 4c3ca1bae0c..1a7647ee1e7 100644 --- a/lib/spectests/spectests/wasmer.wast +++ b/lib/spectests/spectests/wasmer.wast @@ -3,12 +3,16 @@ (module ;; Auxiliary definitions (type $out-i32 (func (result i32))) + (type $f32-id (func (param f32) (result f32))) + (type $f64-id (func (param f64) (result f64))) (func $const-i32 (type $out-i32) (i32.const 0x132)) (table funcref (elem $const-i32 + $nan-canonicalization-f32-func-call-target + $nan-canonicalization-f64-func-call-target ) ) @@ -32,7 +36,7 @@ ) ;; NaN canonicalization tests. - ;; Things that are covered by spectests canonicalization (`fabs`, `fneg`, `fcopysign`, `reinterpet`, `const`) won't be duplicated here. + ;; Things that are covered by spectests canonicalization (`fabs`, `fneg`, `fcopysign`, `reinterpret`, `const`) won't be duplicated here. (func (export "nan-canonicalization-f32-add") (param i32) (result i32) (i32.reinterpret_f32 (f32.add (f32.reinterpret_i32 (get_local 0)) (f32.const 0))) @@ -85,6 +89,21 @@ (set_local 1 (f32.add (f32.reinterpret_i32 (get_local 0)) (f32.const 0))) (i32.reinterpret_f32 (get_local 1)) ) + (func $nan-canonicalization-f32-func-call-target (param f32) (result f32) + (get_local 0) + ) + (func (export "nan-canonicalization-f32-func-call") (param i32) (result i32) + (i32.reinterpret_f32 (call $nan-canonicalization-f32-func-call-target (f32.reinterpret_i32 (get_local 0)))) + ) + (func (export "nan-canonicalization-f32-func-call-cncl") (param i32) (result i32) + (i32.reinterpret_f32 (call $nan-canonicalization-f32-func-call-target (f32.add (f32.reinterpret_i32 (get_local 0)) (f32.const 0)))) + ) + (func (export "nan-canonicalization-f32-func-call-indirect") (param i32) (result i32) + (i32.reinterpret_f32 (call_indirect (type $f32-id) (f32.reinterpret_i32 (get_local 0)) (i32.const 1))) + ) + (func (export "nan-canonicalization-f32-func-call-indirect-cncl") (param i32) (result i32) + (i32.reinterpret_f32 (call_indirect (type $f32-id) (f32.add (f32.reinterpret_i32 (get_local 0)) (f32.const 0)) (i32.const 1))) + ) (func (export "nan-canonicalization-f64-add") (param i64) (result i64) (i64.reinterpret_f64 (f64.add (f64.reinterpret_i64 (get_local 0)) (f64.const 0))) @@ -137,6 +156,21 @@ (set_local 1 (f64.add (f64.reinterpret_i64 (get_local 0)) (f64.const 0))) (i64.reinterpret_f64 (get_local 1)) ) + (func $nan-canonicalization-f64-func-call-target (param f64) (result f64) + (get_local 0) + ) + (func (export "nan-canonicalization-f64-func-call") (param i64) (result i64) + (i64.reinterpret_f64 (call $nan-canonicalization-f64-func-call-target (f64.reinterpret_i64 (get_local 0)))) + ) + (func (export "nan-canonicalization-f64-func-call-cncl") (param i64) (result i64) + (i64.reinterpret_f64 (call $nan-canonicalization-f64-func-call-target (f64.add (f64.reinterpret_i64 (get_local 0)) (f64.const 0)))) + ) + (func (export "nan-canonicalization-f64-func-call-indirect") (param i64) (result i64) + (i64.reinterpret_f64 (call_indirect (type $f64-id) (f64.reinterpret_i64 (get_local 0)) (i32.const 2))) + ) + (func (export "nan-canonicalization-f64-func-call-indirect-cncl") (param i64) (result i64) + (i64.reinterpret_f64 (call_indirect (type $f64-id) (f64.add (f64.reinterpret_i64 (get_local 0)) (f64.const 0)) (i32.const 2))) + ) ) (assert_return (invoke "call-indirect-from-spilled-stack") (i32.const 0x132)) @@ -155,6 +189,10 @@ (assert_return (invoke "nan-canonicalization-f32-mem-cncl" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) (assert_return (invoke "nan-canonicalization-f32-local" (i32.const 0x7fc00001)) (i32.const 0x7fc00001)) (assert_return (invoke "nan-canonicalization-f32-local-cncl" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-func-call" (i32.const 0x7fc00001)) (i32.const 0x7fc00001)) +(assert_return (invoke "nan-canonicalization-f32-func-call-cncl" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) +(assert_return (invoke "nan-canonicalization-f32-func-call-indirect" (i32.const 0x7fc00001)) (i32.const 0x7fc00001)) +(assert_return (invoke "nan-canonicalization-f32-func-call-indirect-cncl" (i32.const 0x7fc00001)) (i32.const 0x7fc00000)) (assert_return (invoke "nan-canonicalization-f64-add" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) (assert_return (invoke "nan-canonicalization-f64-sub" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) @@ -170,4 +208,8 @@ (assert_return (invoke "nan-canonicalization-f64-mem" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000001)) (assert_return (invoke "nan-canonicalization-f64-mem-cncl" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) (assert_return (invoke "nan-canonicalization-f64-local" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000001)) -(assert_return (invoke "nan-canonicalization-f64-local-cncl" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) \ No newline at end of file +(assert_return (invoke "nan-canonicalization-f64-local-cncl" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-func-call" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000001)) +(assert_return (invoke "nan-canonicalization-f64-func-call-cncl" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) +(assert_return (invoke "nan-canonicalization-f64-func-call-indirect" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000001)) +(assert_return (invoke "nan-canonicalization-f64-func-call-indirect-cncl" (i64.const 0x7ff8000000000001)) (i64.const 0x7ff8000000000000)) From 8e92e3208a61837d5dfbf89ebaf53c107ccd50f7 Mon Sep 17 00:00:00 2001 From: losfair Date: Wed, 18 Mar 2020 00:47:52 +0800 Subject: [PATCH 12/19] Disable canonicalization for aarch64. --- lib/singlepass-backend/src/codegen_x64.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index e19ad958443..0f7ef522110 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -1149,7 +1149,12 @@ impl ModuleCodeGenerator enforce_stack_check: config.enforce_stack_check, track_state: config.track_state, full_preemption: config.full_preemption, + + // NaN canonicalization is only implemented for x86_64 for now. + #[cfg(target_arch = "x86_64")] nan_canonicalization: config.nan_canonicalization, + #[cfg(not(target_arch = "x86_64"))] + nan_canonicalization: false, })); Ok(()) } From bfc3b8292c387f26ff9055c67e0ea49d97171dcc Mon Sep 17 00:00:00 2001 From: Heyang Zhou Date: Wed, 18 Mar 2020 00:58:50 +0800 Subject: [PATCH 13/19] Apply suggestions from code review Co-Authored-By: nlewycky --- lib/singlepass-backend/src/codegen_x64.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index 0f7ef522110..0cf7f2b4af7 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -238,7 +238,7 @@ pub struct X64FunctionCode { /// Metadata about a floating-point value. #[derive(Copy, Clone, Debug)] struct FloatValue { - /// Do we need to canonicalize the value before its bit pattern is observable? If so, how? + /// Do we need to canonicalize the value before its bit pattern is next observed? If so, how? canonicalization: Option, /// Corresponding depth in the main value stack. From 86dde8cab2f3b0376930bb84678759528d22e9a3 Mon Sep 17 00:00:00 2001 From: losfair Date: Wed, 18 Mar 2020 01:02:59 +0800 Subject: [PATCH 14/19] Add missing movs. --- lib/singlepass-backend/src/codegen_x64.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index 0f7ef522110..ae09f6c690e 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -1149,12 +1149,7 @@ impl ModuleCodeGenerator enforce_stack_check: config.enforce_stack_check, track_state: config.track_state, full_preemption: config.full_preemption, - - // NaN canonicalization is only implemented for x86_64 for now. - #[cfg(target_arch = "x86_64")] nan_canonicalization: config.nan_canonicalization, - #[cfg(not(target_arch = "x86_64"))] - nan_canonicalization: false, })); Ok(()) } @@ -4648,6 +4643,9 @@ impl FunctionCodeGenerator for X64FunctionCode { } } } + } else { + a.emit_mov(Size::S32, loc_a, Location::GPR(tmp1)); + a.emit_mov(Size::S32, loc_b, Location::GPR(tmp2)); } a.emit_and( Size::S32, @@ -5187,6 +5185,9 @@ impl FunctionCodeGenerator for X64FunctionCode { } } } + } else { + a.emit_mov(Size::S64, loc_a, Location::GPR(tmp1)); + a.emit_mov(Size::S64, loc_b, Location::GPR(tmp2)); } let c = self.machine.acquire_temp_gpr().unwrap(); From ea0cd7286e37c8803c25a1143eaa0e29c6b14fe1 Mon Sep 17 00:00:00 2001 From: losfair Date: Wed, 18 Mar 2020 01:03:55 +0800 Subject: [PATCH 15/19] Cargo fmt --- lib/singlepass-backend/src/codegen_x64.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index ca23a56af17..e66a08f1b9d 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -4644,8 +4644,8 @@ impl FunctionCodeGenerator for X64FunctionCode { } } } else { - a.emit_mov(Size::S32, loc_a, Location::GPR(tmp1)); - a.emit_mov(Size::S32, loc_b, Location::GPR(tmp2)); + a.emit_mov(Size::S32, loc_a, Location::GPR(tmp1)); + a.emit_mov(Size::S32, loc_b, Location::GPR(tmp2)); } a.emit_and( Size::S32, @@ -5186,7 +5186,7 @@ impl FunctionCodeGenerator for X64FunctionCode { } } } else { - a.emit_mov(Size::S64, loc_a, Location::GPR(tmp1)); + a.emit_mov(Size::S64, loc_a, Location::GPR(tmp1)); a.emit_mov(Size::S64, loc_b, Location::GPR(tmp2)); } From a9cd6d68146ee93ed116484d107f868635e8e748 Mon Sep 17 00:00:00 2001 From: losfair Date: Wed, 18 Mar 2020 01:16:18 +0800 Subject: [PATCH 16/19] Add aarch64 NaN canonicalization spectest excludes. --- lib/spectests/tests/excludes.txt | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/lib/spectests/tests/excludes.txt b/lib/spectests/tests/excludes.txt index 4007d781b63..bd876f460de 100644 --- a/lib/spectests/tests/excludes.txt +++ b/lib/spectests/tests/excludes.txt @@ -524,3 +524,35 @@ singlepass:fail:traps.wast:54:*:aarch64 # AssertTrap - expected trap, got [] singlepass:fail:traps.wast:55:*:aarch64 # AssertTrap - expected trap, got [] singlepass:fail:traps.wast:56:*:aarch64 # AssertTrap - expected trap, got [] singlepass:fail:traps.wast:57:*:aarch64 # AssertTrap - expected trap, got [] + +# NaN canonicalization is not yet implemented for aarch64. +singlepass:fail:wasmer.wast:177:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:178:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:179:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:180:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:181:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:182:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:183:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:184:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:185:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:186:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:187:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:189:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:191:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:193:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:195:*:aarch64 # AssertReturn - result I32(2143289345) ("0x7fc00001") does not match expected I32(2143289344) ("0x7fc00000") +singlepass:fail:wasmer.wast:197:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:198:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:199:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:200:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:201:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:202:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:203:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:204:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:205:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:206:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:207:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:209:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:211:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:213:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") +singlepass:fail:wasmer.wast:215:*:aarch64 # AssertReturn - result I64(9221120237041090561) ("0x7ff8000000000001") does not match expected I64(9221120237041090560) ("0x7ff8000000000000") \ No newline at end of file From 29a431cfc7547a3b8ee01b25e2cc55a1f7171393 Mon Sep 17 00:00:00 2001 From: losfair Date: Wed, 18 Mar 2020 01:43:05 +0800 Subject: [PATCH 17/19] Remove 4 spectest excludes that are no longer needed. --- lib/spectests/tests/excludes.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/spectests/tests/excludes.txt b/lib/spectests/tests/excludes.txt index bd876f460de..54c886a89a7 100644 --- a/lib/spectests/tests/excludes.txt +++ b/lib/spectests/tests/excludes.txt @@ -244,10 +244,6 @@ clif:fail:exports.wast:167:windows # Module - caught panic Any clif:fail:exports.wast:168:windows # Module - caught panic Any clif:fail:exports.wast:169:windows # Module - caught panic Any clif:fail:exports.wast:170:windows # Module - caught panic Any -clif:fail:f32.wast:2496:windows # "AssertReturnArithmeticNan" - value is not arithmetic nan F32(NaN) -clif:fail:f32.wast:2498:windows # "AssertReturnArithmeticNan" - value is not arithmetic nan F32(NaN) -clif:fail:f64.wast:2496:windows # "AssertReturnArithmeticNan" - value is not arithmetic nan F64(NaN) -clif:fail:f64.wast:2498:windows # "AssertReturnArithmeticNan" - value is not arithmetic nan F64(NaN) clif:fail:func.wast:289:windows # Module - caught panic Any clif:fail:memory.wast:3:windows # Module - caught panic Any clif:fail:memory.wast:4:windows # Module - caught panic Any From 8485ccc88ca71f342ecbb5a70562c148caeda642 Mon Sep 17 00:00:00 2001 From: losfair Date: Wed, 18 Mar 2020 01:48:27 +0800 Subject: [PATCH 18/19] Update comment for `nan_canonicalization`. --- lib/runtime-core/src/backend.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/runtime-core/src/backend.rs b/lib/runtime-core/src/backend.rs index 49b4161e48c..eabe526d8c7 100644 --- a/lib/runtime-core/src/backend.rs +++ b/lib/runtime-core/src/backend.rs @@ -132,8 +132,8 @@ pub struct CompilerConfig { /// When enabled there can be a small amount of runtime performance overhead. pub full_preemption: bool, - /// Whether to enable spec-compliant NaN canonicalization at all places. - /// Enabling this increases runtime overhead. + /// Always choose a unique bit representation for NaN. + /// Enabling this makes execution deterministic but increases runtime overhead. pub nan_canonicalization: bool, pub features: Features, From c25ba627dfa4243d6b3c9523f54a7520e43858c9 Mon Sep 17 00:00:00 2001 From: losfair Date: Wed, 18 Mar 2020 23:41:10 +0800 Subject: [PATCH 19/19] Add comment for call argument list preprocessing. --- lib/singlepass-backend/src/codegen_x64.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index e66a08f1b9d..38df4e67b6b 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -6885,6 +6885,10 @@ impl FunctionCodeGenerator for X64FunctionCode { self.machine.release_locations_only_osr_state(params.len()); + // Pop arguments off the FP stack and canonicalize them if needed. + // + // Canonicalization state will be lost across function calls, so early canonicalization + // is necessary here. while let Some(fp) = self.fp_stack.last() { if fp.depth >= self.value_stack.len() { let index = fp.depth - self.value_stack.len(); @@ -6956,6 +6960,10 @@ impl FunctionCodeGenerator for X64FunctionCode { .collect(); self.machine.release_locations_only_regs(¶ms); + // Pop arguments off the FP stack and canonicalize them if needed. + // + // Canonicalization state will be lost across function calls, so early canonicalization + // is necessary here. while let Some(fp) = self.fp_stack.last() { if fp.depth >= self.value_stack.len() { let index = fp.depth - self.value_stack.len();