From fa64b42a2994cb36d44d3411606993e9bb995638 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Wed, 28 Aug 2019 17:23:26 -0700 Subject: [PATCH 1/9] Initial implementation of atomic load/store and i32 atomic rmw add. --- lib/singlepass-backend/src/codegen_x64.rs | 571 ++++++++++++++++++++-- lib/singlepass-backend/src/emitter_x64.rs | 80 ++- 2 files changed, 611 insertions(+), 40 deletions(-) diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index a12b2557f3b..4d997bf434b 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -34,7 +34,7 @@ use wasmer_runtime_core::{ }, vm::{self, LocalGlobal, LocalTable, INTERNALS_SIZE}, }; -use wasmparser::{Operator, Type as WpType, TypeOrFuncType as WpTypeOrFuncType}; +use wasmparser::{MemoryImmediate, Operator, Type as WpType, TypeOrFuncType as WpTypeOrFuncType}; lazy_static! { /// Performs a System V call to `target` with [stack_top..stack_base] as the argument list, from right to left. @@ -1465,7 +1465,8 @@ impl X64FunctionCode { a: &mut Assembler, m: &mut Machine, addr: Location, - offset: usize, + memarg: &MemoryImmediate, + check_alignment: bool, value_size: usize, cb: F, ) { @@ -1487,7 +1488,6 @@ impl X64FunctionCode { let tmp_addr = m.acquire_temp_gpr().unwrap(); let tmp_base = m.acquire_temp_gpr().unwrap(); - let tmp_bound = m.acquire_temp_gpr().unwrap(); // Load base into temporary register. a.emit_mov( @@ -1500,6 +1500,8 @@ impl X64FunctionCode { ); if need_check { + let tmp_bound = m.acquire_temp_gpr().unwrap(); + a.emit_mov( Size::S64, Location::Memory( @@ -1513,14 +1515,14 @@ impl X64FunctionCode { a.emit_mov(Size::S32, addr, Location::GPR(tmp_addr)); // This branch is used for emitting "faster" code for the special case of (offset + value_size) not exceeding u32 range. - match (offset as u32).checked_add(value_size as u32) { + match (memarg.offset as u32).checked_add(value_size as u32) { Some(x) => { a.emit_add(Size::S64, Location::Imm32(x), Location::GPR(tmp_addr)); } None => { a.emit_add( Size::S64, - Location::Imm32(offset as u32), + Location::Imm32(memarg.offset as u32), Location::GPR(tmp_addr), ); a.emit_add( @@ -1535,20 +1537,39 @@ impl X64FunctionCode { a.emit_add(Size::S64, Location::GPR(tmp_base), Location::GPR(tmp_addr)); a.emit_cmp(Size::S64, Location::GPR(tmp_bound), Location::GPR(tmp_addr)); a.emit_conditional_trap(Condition::Above); - } - m.release_temp_gpr(tmp_bound); + m.release_temp_gpr(tmp_bound); + } // Calculates the real address, and loads from it. a.emit_mov(Size::S32, addr, Location::GPR(tmp_addr)); a.emit_add( Size::S64, - Location::Imm32(offset as u32), + Location::Imm32(memarg.offset as u32), Location::GPR(tmp_addr), ); a.emit_add(Size::S64, Location::GPR(tmp_base), Location::GPR(tmp_addr)); m.release_temp_gpr(tmp_base); + let align = match memarg.flags & 3 { + 0 => 1, + 1 => 2, + 2 => 4, + 3 => 8, + _ => unreachable!("this match is fully covered"), + }; + if check_alignment && align != 1 { + let tmp_aligncheck = m.acquire_temp_gpr().unwrap(); + //let tmp_mask = m.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, Location::GPR(tmp_addr), Location::GPR(tmp_aligncheck)); + //a.emit_mov(Size::S64, Location::Imm64(align - 1), Location::GPR(tmp_mask)); + //a.emit_and(Size::S64, Location::GPR(tmp_mask), Location::GPR(tmp_aligncheck)); + a.emit_and(Size::S64, Location::Imm32(align - 1), Location::GPR(tmp_aligncheck)); + a.emit_conditional_trap(Condition::NotEqual); + //m.release_temp_gpr(tmp_mask); + m.release_temp_gpr(tmp_aligncheck); + } + cb(a, m, tmp_addr); m.release_temp_gpr(tmp_addr); @@ -4144,7 +4165,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target, - memarg.offset as usize, + memarg, + false, 4, |a, m, addr| { Self::emit_relaxed_binop( @@ -4174,7 +4196,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target, - memarg.offset as usize, + memarg, + false, 4, |a, m, addr| { Self::emit_relaxed_binop( @@ -4204,7 +4227,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target, - memarg.offset as usize, + memarg, + false, 1, |a, m, addr| { Self::emit_relaxed_zx_sx( @@ -4235,7 +4259,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target, - memarg.offset as usize, + memarg, + false, 1, |a, m, addr| { Self::emit_relaxed_zx_sx( @@ -4266,7 +4291,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target, - memarg.offset as usize, + memarg, + false, 2, |a, m, addr| { Self::emit_relaxed_zx_sx( @@ -4297,7 +4323,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target, - memarg.offset as usize, + memarg, + false, 2, |a, m, addr| { Self::emit_relaxed_zx_sx( @@ -4324,7 +4351,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target_addr, - memarg.offset as usize, + memarg, + false, 4, |a, m, addr| { Self::emit_relaxed_binop( @@ -4350,7 +4378,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target_addr, - memarg.offset as usize, + memarg, + false, 4, |a, m, addr| { Self::emit_relaxed_binop( @@ -4376,7 +4405,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target_addr, - memarg.offset as usize, + memarg, + false, 1, |a, m, addr| { Self::emit_relaxed_binop( @@ -4402,7 +4432,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target_addr, - memarg.offset as usize, + memarg, + false, 2, |a, m, addr| { Self::emit_relaxed_binop( @@ -4432,7 +4463,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target, - memarg.offset as usize, + memarg, + false, 8, |a, m, addr| { Self::emit_relaxed_binop( @@ -4462,7 +4494,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target, - memarg.offset as usize, + memarg, + false, 8, |a, m, addr| { Self::emit_relaxed_binop( @@ -4492,7 +4525,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target, - memarg.offset as usize, + memarg, + false, 1, |a, m, addr| { Self::emit_relaxed_zx_sx( @@ -4523,7 +4557,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target, - memarg.offset as usize, + memarg, + false, 1, |a, m, addr| { Self::emit_relaxed_zx_sx( @@ -4554,7 +4589,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target, - memarg.offset as usize, + memarg, + false, 2, |a, m, addr| { Self::emit_relaxed_zx_sx( @@ -4585,7 +4621,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target, - memarg.offset as usize, + memarg, + false, 2, |a, m, addr| { Self::emit_relaxed_zx_sx( @@ -4616,7 +4653,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target, - memarg.offset as usize, + memarg, + false, 4, |a, m, addr| { match ret { @@ -4657,7 +4695,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target, - memarg.offset as usize, + memarg, + false, 4, |a, m, addr| { Self::emit_relaxed_zx_sx( @@ -4684,7 +4723,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target_addr, - memarg.offset as usize, + memarg, + false, 8, |a, m, addr| { Self::emit_relaxed_binop( @@ -4710,7 +4750,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target_addr, - memarg.offset as usize, + memarg, + false, 8, |a, m, addr| { Self::emit_relaxed_binop( @@ -4736,7 +4777,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target_addr, - memarg.offset as usize, + memarg, + false, 1, |a, m, addr| { Self::emit_relaxed_binop( @@ -4762,7 +4804,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target_addr, - memarg.offset as usize, + memarg, + false, 2, |a, m, addr| { Self::emit_relaxed_binop( @@ -4788,7 +4831,8 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, target_addr, - memarg.offset as usize, + memarg, + false, 4, |a, m, addr| { Self::emit_relaxed_binop( @@ -4977,6 +5021,471 @@ impl FunctionCodeGenerator for X64FunctionCode { } } } + Operator::Fence { flags: _ } => { + // Fence is a nop. + // + // Fence was added to preserve information about fences from + // source languages. If in the future Wasm extends the memory + // model, and if we hadn't recorded what fences used to be there, + // it would lead to data races that weren't present in the + // original source language. + } + Operator::I32AtomicLoad { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 4, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S32, + Location::Memory(addr, 0), + ret, + ); + }, + ); + } + Operator::I32AtomicLoad8U { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 1, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movzx, + Size::S8, + Location::Memory(addr, 0), + Size::S32, + ret, + ); + }, + ); + } + Operator::I32AtomicLoad16U { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 2, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movzx, + Size::S16, + Location::Memory(addr, 0), + Size::S32, + ret, + ); + }, + ); + } + Operator::I32AtomicStore { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target_addr, + memarg, + true, + 4, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_xchg, + Size::S32, + target_value, + Location::Memory(addr, 0), + ); + }, + ); + } + Operator::I32AtomicStore8 { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target_addr, + memarg, + true, + 1, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_xchg, + Size::S8, + target_value, + Location::Memory(addr, 0), + ); + }, + ); + } + Operator::I32AtomicStore16 { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target_addr, + memarg, + true, + 2, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_xchg, + Size::S16, + target_value, + Location::Memory(addr, 0), + ); + }, + ); + } + Operator::I64AtomicLoad { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 8, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S64, + Location::Memory(addr, 0), + ret, + ); + }, + ); + } + Operator::I64AtomicLoad8U { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 1, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movzx, + Size::S8, + Location::Memory(addr, 0), + Size::S64, + ret, + ); + }, + ); + } + Operator::I64AtomicLoad16U { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 2, + |a, m, addr| { + Self::emit_relaxed_zx_sx( + a, + m, + Assembler::emit_movzx, + Size::S16, + Location::Memory(addr, 0), + Size::S64, + ret, + ); + }, + ); + } + Operator::I64AtomicLoad32U { ref memarg } => { + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 4, + |a, m, addr| { + match ret { + Location::GPR(_) => {} + Location::Memory(base, offset) => { + a.emit_mov( + Size::S32, + Location::Imm32(0), + Location::Memory(base, offset + 4), + ); // clear upper bits + } + _ => unreachable!(), + } + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_mov, + Size::S32, + Location::Memory(addr, 0), + ret, + ); + }, + ); + } + Operator::I64AtomicStore { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target_addr, + memarg, + true, + 8, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_xchg, + Size::S64, + target_value, + Location::Memory(addr, 0), + ); + }, + ); + } + Operator::I64AtomicStore8 { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target_addr, + memarg, + true, + 1, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_xchg, + Size::S8, + target_value, + Location::Memory(addr, 0), + ); + }, + ); + } + Operator::I64AtomicStore16 { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target_addr, + memarg, + true, + 2, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_xchg, + Size::S16, + target_value, + Location::Memory(addr, 0), + ); + }, + ); + } + Operator::I64AtomicStore32 { ref memarg } => { + let target_value = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target_addr = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target_addr, + memarg, + true, + 4, + |a, m, addr| { + Self::emit_relaxed_binop( + a, + m, + Assembler::emit_xchg, + Size::S32, + target_value, + Location::Memory(addr, 0), + ); + }, + ); + } + Operator::I32AtomicRmwAdd { ref memarg } => { + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov( + Size::S32, + loc, + Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_lock_xadd(Size::S32, Location::GPR(value), Location::Memory(addr, 0)) + } + ); + a.emit_mov( + Size::S32, + Location::GPR(value), + ret); + self.machine.release_temp_gpr(value); + } _ => { return Err(CodegenError { message: format!("not yet implemented: {:?}", op), diff --git a/lib/singlepass-backend/src/emitter_x64.rs b/lib/singlepass-backend/src/emitter_x64.rs index fa2ad485308..9344197ffd7 100644 --- a/lib/singlepass-backend/src/emitter_x64.rs +++ b/lib/singlepass-backend/src/emitter_x64.rs @@ -94,6 +94,8 @@ pub trait Emitter { fn emit_popcnt(&mut self, sz: Size, src: Location, dst: Location); fn emit_movzx(&mut self, sz_src: Size, src: Location, sz_dst: Size, dst: Location); fn emit_movsx(&mut self, sz_src: Size, src: Location, sz_dst: Size, dst: Location); + fn emit_xchg(&mut self, sz: Size, src: Location, dst: Location); + fn emit_lock_xadd(&mut self, sz: Size, src: Location, dst: Location); fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR); fn emit_btc_gpr_imm8_64(&mut self, src: u8, dst: GPR); @@ -562,7 +564,7 @@ impl Emitter for Assembler { (Size::S64, Location::Memory(src, disp), Location::GPR(dst)) => { dynasm!(self ; lea Rq(dst as u8), [Rq(src as u8) + disp]); } - _ => unreachable!(), + _ => panic!("LEA {:?} {:?} {:?}", sz, src, dst), } } fn emit_lea_label(&mut self, label: Self::Label, dst: Location) { @@ -570,7 +572,7 @@ impl Emitter for Assembler { Location::GPR(x) => { dynasm!(self ; lea Rq(x as u8), [=>label]); } - _ => unreachable!(), + _ => panic!("LEA label={:?} {:?}", label, dst), } } fn emit_cdq(&mut self) { @@ -602,7 +604,7 @@ impl Emitter for Assembler { match loc { Location::GPR(x) => dynasm!(self ; jmp Rq(x as u8)), Location::Memory(base, disp) => dynasm!(self ; jmp QWORD [Rq(base as u8) + disp]), - _ => unreachable!(), + _ => panic!("JMP {:?}", loc), } } fn emit_conditional_trap(&mut self, condition: Condition) { @@ -634,7 +636,7 @@ impl Emitter for Assembler { Condition::Equal => dynasm!(self ; sete Rb(dst as u8)), Condition::NotEqual => dynasm!(self ; setne Rb(dst as u8)), Condition::Signed => dynasm!(self ; sets Rb(dst as u8)), - _ => unreachable!(), + _ => panic!("SET {:?} {:?}", condition, dst), } } fn emit_push(&mut self, sz: Size, src: Location) { @@ -644,7 +646,7 @@ impl Emitter for Assembler { (Size::S64, Location::Memory(src, disp)) => { dynasm!(self ; push QWORD [Rq(src as u8) + disp]) } - _ => panic!("push {:?} {:?}", sz, src), + _ => panic!("PUSH {:?} {:?}", sz, src), } } fn emit_pop(&mut self, sz: Size, dst: Location) { @@ -653,12 +655,12 @@ impl Emitter for Assembler { (Size::S64, Location::Memory(dst, disp)) => { dynasm!(self ; pop QWORD [Rq(dst as u8) + disp]) } - _ => panic!("pop {:?} {:?}", sz, dst), + _ => panic!("POP {:?} {:?}", sz, dst), } } fn emit_cmp(&mut self, sz: Size, left: Location, right: Location) { binop_all_nofp!(cmp, self, sz, left, right, { - panic!("{:?} {:?} {:?}", sz, left, right); + panic!("CMP {:?} {:?} {:?}", sz, left, right); }); } fn emit_add(&mut self, sz: Size, src: Location, dst: Location) { @@ -743,7 +745,7 @@ impl Emitter for Assembler { (Size::S16, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => { dynasm!(self ; movzx Rq(dst as u8), WORD [Rq(src as u8) + disp]); } - _ => unreachable!(), + _ => panic!("MOVZX {:?} {:?} {:?} {:?}", sz_src, src, sz_dst, dst), } } fn emit_movsx(&mut self, sz_src: Size, src: Location, sz_dst: Size, dst: Location) { @@ -778,7 +780,67 @@ impl Emitter for Assembler { (Size::S32, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => { dynasm!(self ; movsx Rq(dst as u8), DWORD [Rq(src as u8) + disp]); } - _ => unreachable!(), + _ => panic!("MOVSX {:?} {:?} {:?} {:?}", sz_src, src, sz_dst, dst), + } + } + + fn emit_xchg(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S8, Location::GPR(src), Location::GPR(dst)) => { + dynasm!(self ; xchg Rb(dst as u8), Rb(src as u8)); + } + (Size::S16, Location::GPR(src), Location::GPR(dst)) => { + dynasm!(self ; xchg Rw(dst as u8), Rw(src as u8)); + } + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + dynasm!(self ; xchg Rd(dst as u8), Rd(src as u8)); + } + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + dynasm!(self ; xchg Rq(dst as u8), Rq(src as u8)); + } + (Size::S8, Location::Memory(src, disp), Location::GPR(dst)) => { + dynasm!(self ; xchg Rb(dst as u8), [Rq(src as u8) + disp]); + } + (Size::S8, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; xchg [Rq(dst as u8) + disp], Rb(src as u8)); + } + (Size::S16, Location::Memory(src, disp), Location::GPR(dst)) => { + dynasm!(self ; xchg Rw(dst as u8), [Rq(src as u8) + disp]); + } + (Size::S16, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; xchg [Rq(dst as u8) + disp], Rw(src as u8)); + } + (Size::S32, Location::Memory(src, disp), Location::GPR(dst)) => { + dynasm!(self ; xchg Rd(dst as u8), [Rq(src as u8) + disp]); + } + (Size::S32, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; xchg [Rq(dst as u8) + disp], Rd(src as u8)); + } + (Size::S64, Location::Memory(src, disp), Location::GPR(dst)) => { + dynasm!(self ; xchg Rq(dst as u8), [Rq(src as u8) + disp]); + } + (Size::S64, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; xchg [Rq(dst as u8) + disp], Rq(src as u8)); + } + _ => panic!("XCHG {:?} {:?} {:?}", sz, src, dst), + } + } + + fn emit_lock_xadd(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S8, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; lock xadd [Rq(dst as u8) + disp], Rb(src as u8)); + } + (Size::S16, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; lock xadd [Rq(dst as u8) + disp], Rw(src as u8)); + } + (Size::S32, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; lock xadd [Rq(dst as u8) + disp], Rd(src as u8)); + } + (Size::S64, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; lock xadd [Rq(dst as u8) + disp], Rq(src as u8)); + } + _ => panic!("LOCK XADD {:?} {:?} {:?}", sz, src, dst), } } From f63d4aa52397a4752bb39d857a176a4ee96ed27f Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Fri, 30 Aug 2019 18:00:10 -0700 Subject: [PATCH 2/9] Add i32 rmw add and sub. --- lib/singlepass-backend/src/codegen_x64.rs | 182 ++++++++++++++++++++++ lib/singlepass-backend/src/emitter_x64.rs | 14 ++ 2 files changed, 196 insertions(+) diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index 4d997bf434b..196e92760db 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -5486,6 +5486,188 @@ impl FunctionCodeGenerator for X64FunctionCode { ret); self.machine.release_temp_gpr(value); } + Operator::I32AtomicRmw8UAdd { ref memarg } => { + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx( + Size::S8, + loc, + Size::S32, + Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_lock_xadd(Size::S8, Location::GPR(value), Location::Memory(addr, 0)) + } + ); + a.emit_mov( + Size::S32, + Location::GPR(value), + ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmw16UAdd { ref memarg } => { + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx( + Size::S16, + loc, + Size::S32, + Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_lock_xadd(Size::S16, Location::GPR(value), Location::Memory(addr, 0)) + } + ); + a.emit_mov( + Size::S32, + Location::GPR(value), + ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmwSub { ref memarg } => { + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov( + Size::S32, + loc, + Location::GPR(value)); + a.emit_neg(Size::S32, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_lock_xadd(Size::S32, Location::GPR(value), Location::Memory(addr, 0)) + } + ); + a.emit_mov( + Size::S32, + Location::GPR(value), + ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmw8USub { ref memarg } => { + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx( + Size::S8, + loc, + Size::S32, + Location::GPR(value)); + a.emit_neg(Size::S8, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_lock_xadd(Size::S8, Location::GPR(value), Location::Memory(addr, 0)) + } + ); + a.emit_mov( + Size::S32, + Location::GPR(value), + ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmw16USub { ref memarg } => { + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx( + Size::S16, + loc, + Size::S32, + Location::GPR(value)); + a.emit_neg(Size::S16, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_lock_xadd(Size::S16, Location::GPR(value), Location::Memory(addr, 0)) + } + ); + a.emit_mov( + Size::S32, + Location::GPR(value), + ret); + self.machine.release_temp_gpr(value); + } _ => { return Err(CodegenError { message: format!("not yet implemented: {:?}", op), diff --git a/lib/singlepass-backend/src/emitter_x64.rs b/lib/singlepass-backend/src/emitter_x64.rs index 9344197ffd7..4bea37d3e2a 100644 --- a/lib/singlepass-backend/src/emitter_x64.rs +++ b/lib/singlepass-backend/src/emitter_x64.rs @@ -78,6 +78,7 @@ pub trait Emitter { fn emit_cmp(&mut self, sz: Size, left: Location, right: Location); fn emit_add(&mut self, sz: Size, src: Location, dst: Location); fn emit_sub(&mut self, sz: Size, src: Location, dst: Location); + fn emit_neg(&mut self, sz: Size, value: Location); fn emit_imul(&mut self, sz: Size, src: Location, dst: Location); fn emit_imul_imm32_gpr64(&mut self, src: u32, dst: GPR); fn emit_div(&mut self, sz: Size, divisor: Location); @@ -669,6 +670,19 @@ impl Emitter for Assembler { fn emit_sub(&mut self, sz: Size, src: Location, dst: Location) { binop_all_nofp!(sub, self, sz, src, dst, { unreachable!() }); } + fn emit_neg(&mut self, sz: Size, value: Location) { + match (sz, value) { + (Size::S8, Location::GPR(value)) => { dynasm!(self ; neg Rb(value as u8)) } + (Size::S8, Location::Memory(value, disp)) => { dynasm!(self ; neg [Rq(value as u8) + disp]) } + (Size::S16, Location::GPR(value)) => { dynasm!(self ; neg Rw(value as u8)) } + (Size::S16, Location::Memory(value, disp)) => { dynasm!(self ; neg [Rq(value as u8) + disp]) } + (Size::S32, Location::GPR(value)) => { dynasm!(self ; neg Rd(value as u8)) } + (Size::S32, Location::Memory(value, disp)) => { dynasm!(self ; neg [Rq(value as u8) + disp]) } + (Size::S64, Location::GPR(value)) => { dynasm!(self ; neg Rq(value as u8)) } + (Size::S64, Location::Memory(value, disp)) => { dynasm!(self ; neg [Rq(value as u8) + disp]) } + _ => panic!("NEG {:?} {:?}", sz, value), + } + } fn emit_imul(&mut self, sz: Size, src: Location, dst: Location) { binop_gpr_gpr!(imul, self, sz, src, dst, { binop_mem_gpr!(imul, self, sz, src, dst, { unreachable!() }) From 5b177436ed16b7c88916d242739b3310a32ec910 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Wed, 18 Sep 2019 11:58:21 -0700 Subject: [PATCH 3/9] Add emitter for LOCK CMPXCHG so that we can emit compare-and-swap loops. --- lib/singlepass-backend/src/emitter_x64.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lib/singlepass-backend/src/emitter_x64.rs b/lib/singlepass-backend/src/emitter_x64.rs index 4bea37d3e2a..83099f25970 100644 --- a/lib/singlepass-backend/src/emitter_x64.rs +++ b/lib/singlepass-backend/src/emitter_x64.rs @@ -97,6 +97,7 @@ pub trait Emitter { fn emit_movsx(&mut self, sz_src: Size, src: Location, sz_dst: Size, dst: Location); fn emit_xchg(&mut self, sz: Size, src: Location, dst: Location); fn emit_lock_xadd(&mut self, sz: Size, src: Location, dst: Location); + fn emit_lock_cmpxchg(&mut self, sz: Size, src: Location, dst: Location); fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR); fn emit_btc_gpr_imm8_64(&mut self, src: u8, dst: GPR); @@ -858,6 +859,24 @@ impl Emitter for Assembler { } } + fn emit_lock_cmpxchg(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S8, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; lock cmpxchg [Rq(dst as u8) + disp], Rb(src as u8)); + } + (Size::S16, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; lock cmpxchg [Rq(dst as u8) + disp], Rw(src as u8)); + } + (Size::S32, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; lock cmpxchg [Rq(dst as u8) + disp], Rd(src as u8)); + } + (Size::S64, Location::GPR(src), Location::Memory(dst, disp)) => { + dynasm!(self ; lock cmpxchg [Rq(dst as u8) + disp], Rq(src as u8)); + } + _ => panic!("LOCK CMPXCHG {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR) { dynasm!(self ; btc Rd(dst as u8), BYTE src as i8); } From 13d490fb08112463d55583bc32bd0343a80d8c73 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Fri, 20 Sep 2019 23:39:15 -0700 Subject: [PATCH 4/9] Use a compare-and-swap loop for AND. BUG: This might allocate RAX twice. --- lib/singlepass-backend/src/codegen_x64.rs | 157 +++++++++++++--------- 1 file changed, 96 insertions(+), 61 deletions(-) diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index 196e92760db..b9f059c188b 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -1561,10 +1561,18 @@ impl X64FunctionCode { if check_alignment && align != 1 { let tmp_aligncheck = m.acquire_temp_gpr().unwrap(); //let tmp_mask = m.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S32, Location::GPR(tmp_addr), Location::GPR(tmp_aligncheck)); + a.emit_mov( + Size::S32, + Location::GPR(tmp_addr), + Location::GPR(tmp_aligncheck), + ); //a.emit_mov(Size::S64, Location::Imm64(align - 1), Location::GPR(tmp_mask)); //a.emit_and(Size::S64, Location::GPR(tmp_mask), Location::GPR(tmp_aligncheck)); - a.emit_and(Size::S64, Location::Imm32(align - 1), Location::GPR(tmp_aligncheck)); + a.emit_and( + Size::S64, + Location::Imm32(align - 1), + Location::GPR(tmp_aligncheck), + ); a.emit_conditional_trap(Condition::NotEqual); //m.release_temp_gpr(tmp_mask); m.release_temp_gpr(tmp_aligncheck); @@ -5452,7 +5460,8 @@ impl FunctionCodeGenerator for X64FunctionCode { ); } Operator::I32AtomicRmwAdd { ref memarg } => { - let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let target = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let ret = self.machine.acquire_locations( @@ -5463,10 +5472,7 @@ impl FunctionCodeGenerator for X64FunctionCode { self.value_stack.push(ret); let value = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov( - Size::S32, - loc, - Location::GPR(value)); + a.emit_mov(Size::S32, loc, Location::GPR(value)); Self::emit_memory_op( module_info, &self.config, @@ -5478,16 +5484,14 @@ impl FunctionCodeGenerator for X64FunctionCode { 4, |a, _m, addr| { a.emit_lock_xadd(Size::S32, Location::GPR(value), Location::Memory(addr, 0)) - } + }, ); - a.emit_mov( - Size::S32, - Location::GPR(value), - ret); + a.emit_mov(Size::S32, Location::GPR(value), ret); self.machine.release_temp_gpr(value); } Operator::I32AtomicRmw8UAdd { ref memarg } => { - let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let target = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let ret = self.machine.acquire_locations( @@ -5498,11 +5502,7 @@ impl FunctionCodeGenerator for X64FunctionCode { self.value_stack.push(ret); let value = self.machine.acquire_temp_gpr().unwrap(); - a.emit_movzx( - Size::S8, - loc, - Size::S32, - Location::GPR(value)); + a.emit_movzx(Size::S8, loc, Size::S32, Location::GPR(value)); Self::emit_memory_op( module_info, &self.config, @@ -5514,16 +5514,14 @@ impl FunctionCodeGenerator for X64FunctionCode { 4, |a, _m, addr| { a.emit_lock_xadd(Size::S8, Location::GPR(value), Location::Memory(addr, 0)) - } + }, ); - a.emit_mov( - Size::S32, - Location::GPR(value), - ret); + a.emit_mov(Size::S32, Location::GPR(value), ret); self.machine.release_temp_gpr(value); } Operator::I32AtomicRmw16UAdd { ref memarg } => { - let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let target = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let ret = self.machine.acquire_locations( @@ -5534,11 +5532,7 @@ impl FunctionCodeGenerator for X64FunctionCode { self.value_stack.push(ret); let value = self.machine.acquire_temp_gpr().unwrap(); - a.emit_movzx( - Size::S16, - loc, - Size::S32, - Location::GPR(value)); + a.emit_movzx(Size::S16, loc, Size::S32, Location::GPR(value)); Self::emit_memory_op( module_info, &self.config, @@ -5550,16 +5544,14 @@ impl FunctionCodeGenerator for X64FunctionCode { 4, |a, _m, addr| { a.emit_lock_xadd(Size::S16, Location::GPR(value), Location::Memory(addr, 0)) - } + }, ); - a.emit_mov( - Size::S32, - Location::GPR(value), - ret); + a.emit_mov(Size::S32, Location::GPR(value), ret); self.machine.release_temp_gpr(value); } Operator::I32AtomicRmwSub { ref memarg } => { - let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let target = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let ret = self.machine.acquire_locations( @@ -5570,10 +5562,7 @@ impl FunctionCodeGenerator for X64FunctionCode { self.value_stack.push(ret); let value = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov( - Size::S32, - loc, - Location::GPR(value)); + a.emit_mov(Size::S32, loc, Location::GPR(value)); a.emit_neg(Size::S32, Location::GPR(value)); Self::emit_memory_op( module_info, @@ -5586,16 +5575,14 @@ impl FunctionCodeGenerator for X64FunctionCode { 4, |a, _m, addr| { a.emit_lock_xadd(Size::S32, Location::GPR(value), Location::Memory(addr, 0)) - } + }, ); - a.emit_mov( - Size::S32, - Location::GPR(value), - ret); + a.emit_mov(Size::S32, Location::GPR(value), ret); self.machine.release_temp_gpr(value); } Operator::I32AtomicRmw8USub { ref memarg } => { - let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let target = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let ret = self.machine.acquire_locations( @@ -5623,16 +5610,14 @@ impl FunctionCodeGenerator for X64FunctionCode { 4, |a, _m, addr| { a.emit_lock_xadd(Size::S8, Location::GPR(value), Location::Memory(addr, 0)) - } + }, ); - a.emit_mov( - Size::S32, - Location::GPR(value), - ret); + a.emit_mov(Size::S32, Location::GPR(value), ret); self.machine.release_temp_gpr(value); } Operator::I32AtomicRmw16USub { ref memarg } => { - let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let target = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let ret = self.machine.acquire_locations( @@ -5643,11 +5628,7 @@ impl FunctionCodeGenerator for X64FunctionCode { self.value_stack.push(ret); let value = self.machine.acquire_temp_gpr().unwrap(); - a.emit_movzx( - Size::S16, - loc, - Size::S32, - Location::GPR(value)); + a.emit_movzx(Size::S16, loc, Size::S32, Location::GPR(value)); a.emit_neg(Size::S16, Location::GPR(value)); Self::emit_memory_op( module_info, @@ -5660,12 +5641,66 @@ impl FunctionCodeGenerator for X64FunctionCode { 4, |a, _m, addr| { a.emit_lock_xadd(Size::S16, Location::GPR(value), Location::Memory(addr, 0)) - } + }, ); - a.emit_mov( - Size::S32, - Location::GPR(value), - ret); + a.emit_mov(Size::S32, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmwAnd { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let retry = a.get_label(); + + let value = self.machine.acquire_temp_gpr().unwrap(); + let compare = GPR::RAX; + + a.emit_label(retry); + + a.emit_mov(Size::S32, loc, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_mov(Size::S32, Location::Memory(addr, 0), Location::GPR(compare)) + }, + ); + a.emit_and(Size::S32, Location::GPR(compare), Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_lock_cmpxchg( + Size::S32, + Location::GPR(value), + Location::Memory(addr, 0), + ) + }, + ); + + a.emit_jmp(Condition::NotEqual, retry); + + a.emit_mov(Size::S32, Location::GPR(value), ret); self.machine.release_temp_gpr(value); } _ => { From 2f10f29792240a79dca2a3d348be04352e8cac61 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Mon, 23 Sep 2019 14:52:05 -0700 Subject: [PATCH 5/9] Initial working implementation of I32AtomicRmwAnd! Adds the ability to reserve a specific temp-gpr register. Needed for CMPXCHG which always uses RAX. --- lib/singlepass-backend/src/codegen_x64.rs | 79 +++++++++++------------ lib/singlepass-backend/src/machine.rs | 9 ++- 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index b9f059c188b..ab97a21dd62 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -1514,22 +1514,24 @@ impl X64FunctionCode { a.emit_add(Size::S64, Location::GPR(tmp_base), Location::GPR(tmp_bound)); a.emit_mov(Size::S32, addr, Location::GPR(tmp_addr)); - // This branch is used for emitting "faster" code for the special case of (offset + value_size) not exceeding u32 range. - match (memarg.offset as u32).checked_add(value_size as u32) { - Some(x) => { - a.emit_add(Size::S64, Location::Imm32(x), Location::GPR(tmp_addr)); - } - None => { - a.emit_add( - Size::S64, - Location::Imm32(memarg.offset as u32), - Location::GPR(tmp_addr), - ); - a.emit_add( - Size::S64, - Location::Imm32(value_size as u32), - Location::GPR(tmp_addr), - ); + if memarg.offset != 0 && value_size != 0 { + // This branch is used for emitting "faster" code for the special case of (offset + value_size) not exceeding u32 range. + match (memarg.offset as u32).checked_add(value_size as u32) { + Some(x) => { + a.emit_add(Size::S64, Location::Imm32(x), Location::GPR(tmp_addr)); + } + None => { + a.emit_add( + Size::S64, + Location::Imm32(memarg.offset as u32), + Location::GPR(tmp_addr), + ); + a.emit_add( + Size::S64, + Location::Imm32(value_size as u32), + Location::GPR(tmp_addr), + ); + } } } @@ -1543,11 +1545,13 @@ impl X64FunctionCode { // Calculates the real address, and loads from it. a.emit_mov(Size::S32, addr, Location::GPR(tmp_addr)); - a.emit_add( - Size::S64, - Location::Imm32(memarg.offset as u32), - Location::GPR(tmp_addr), - ); + if memarg.offset != 0 { + a.emit_add( + Size::S64, + Location::Imm32(memarg.offset as u32), + Location::GPR(tmp_addr), + ); + } a.emit_add(Size::S64, Location::GPR(tmp_base), Location::GPR(tmp_addr)); m.release_temp_gpr(tmp_base); @@ -5658,28 +5662,14 @@ impl FunctionCodeGenerator for X64FunctionCode { )[0]; self.value_stack.push(ret); - let retry = a.get_label(); - - let value = self.machine.acquire_temp_gpr().unwrap(); - let compare = GPR::RAX; + let compare = self.machine.reserve_temp_gpr(GPR::RAX); + let value = if loc == Location::GPR(GPR::R14) { GPR::R13 } else { GPR::R14 }; + a.emit_push(Size::S64, Location::GPR(value)); + a.emit_mov(Size::S32, loc, Location::GPR(value)); + let retry = a.get_label(); a.emit_label(retry); - a.emit_mov(Size::S32, loc, Location::GPR(value)); - Self::emit_memory_op( - module_info, - &self.config, - a, - &mut self.machine, - target, - memarg, - true, - 4, - |a, _m, addr| { - a.emit_mov(Size::S32, Location::Memory(addr, 0), Location::GPR(compare)) - }, - ); - a.emit_and(Size::S32, Location::GPR(compare), Location::GPR(value)); Self::emit_memory_op( module_info, &self.config, @@ -5690,18 +5680,21 @@ impl FunctionCodeGenerator for X64FunctionCode { true, 4, |a, _m, addr| { + a.emit_mov(Size::S32, Location::Memory(addr, 0), Location::GPR(compare)); + a.emit_mov(Size::S32, Location::GPR(compare), ret); + a.emit_and(Size::S32, Location::GPR(compare), Location::GPR(value)); a.emit_lock_cmpxchg( Size::S32, Location::GPR(value), Location::Memory(addr, 0), - ) + ); }, ); a.emit_jmp(Condition::NotEqual, retry); - a.emit_mov(Size::S32, Location::GPR(value), ret); - self.machine.release_temp_gpr(value); + a.emit_pop(Size::S64, Location::GPR(value)); + self.machine.release_temp_gpr(compare); } _ => { return Err(CodegenError { diff --git a/lib/singlepass-backend/src/machine.rs b/lib/singlepass-backend/src/machine.rs index a0793f37983..dfeb010dfc0 100644 --- a/lib/singlepass-backend/src/machine.rs +++ b/lib/singlepass-backend/src/machine.rs @@ -83,7 +83,14 @@ impl Machine { /// Releases a temporary GPR. pub fn release_temp_gpr(&mut self, gpr: GPR) { - assert_eq!(self.used_gprs.remove(&gpr), true); + assert!(self.used_gprs.remove(&gpr)); + } + + /// Specify that a given register is in use. + pub fn reserve_temp_gpr(&mut self, gpr: GPR) -> GPR { + assert!(!self.used_gprs.contains(&gpr)); + self.used_gprs.insert(gpr); + gpr } /// Picks an unused XMM register. From e9f1bd0146c61fa6920465816cef2bf88d317678 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Mon, 23 Sep 2019 16:08:49 -0700 Subject: [PATCH 6/9] Refactor out a compare-and-swap loop function. --- lib/singlepass-backend/src/codegen_x64.rs | 84 +++++++++++++++-------- 1 file changed, 55 insertions(+), 29 deletions(-) diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index ab97a21dd62..43bfa9103a6 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -1587,6 +1587,55 @@ impl X64FunctionCode { m.release_temp_gpr(tmp_addr); } + /// Emits a memory operation. + fn emit_cas_loop_op( + module_info: &ModuleInfo, + config: &CodegenConfig, + a: &mut Assembler, + m: &mut Machine, + loc: Location, + target: Location, + ret: Location, + memarg: &MemoryImmediate, + value_size: usize, + cb: F, + ) { + let compare = m.reserve_temp_gpr(GPR::RAX); + let value = if loc == Location::GPR(GPR::R14) { + GPR::R13 + } else { + GPR::R14 + }; + a.emit_push(Size::S64, Location::GPR(value)); + + a.emit_mov(Size::S32, loc, Location::GPR(value)); + + let retry = a.get_label(); + a.emit_label(retry); + + Self::emit_memory_op( + module_info, + config, + a, + m, + target, + memarg, + true, + value_size, + |a, m, addr| { + a.emit_mov(Size::S32, Location::Memory(addr, 0), Location::GPR(compare)); + a.emit_mov(Size::S32, Location::GPR(compare), ret); + cb(a, m, compare, value); + a.emit_lock_cmpxchg(Size::S32, Location::GPR(value), Location::Memory(addr, 0)); + }, + ); + + a.emit_jmp(Condition::NotEqual, retry); + + a.emit_pop(Size::S64, Location::GPR(value)); + m.release_temp_gpr(compare); + } + // Checks for underflow/overflow/nan before IxxTrunc{U/S}F32. fn emit_f32_int_conv_check( a: &mut Assembler, @@ -5597,11 +5646,7 @@ impl FunctionCodeGenerator for X64FunctionCode { self.value_stack.push(ret); let value = self.machine.acquire_temp_gpr().unwrap(); - a.emit_movzx( - Size::S8, - loc, - Size::S32, - Location::GPR(value)); + a.emit_movzx(Size::S8, loc, Size::S32, Location::GPR(value)); a.emit_neg(Size::S8, Location::GPR(value)); Self::emit_memory_op( module_info, @@ -5662,39 +5707,20 @@ impl FunctionCodeGenerator for X64FunctionCode { )[0]; self.value_stack.push(ret); - let compare = self.machine.reserve_temp_gpr(GPR::RAX); - let value = if loc == Location::GPR(GPR::R14) { GPR::R13 } else { GPR::R14 }; - a.emit_push(Size::S64, Location::GPR(value)); - a.emit_mov(Size::S32, loc, Location::GPR(value)); - - let retry = a.get_label(); - a.emit_label(retry); - - Self::emit_memory_op( + Self::emit_cas_loop_op( module_info, &self.config, a, &mut self.machine, + loc, target, + ret, memarg, - true, 4, - |a, _m, addr| { - a.emit_mov(Size::S32, Location::Memory(addr, 0), Location::GPR(compare)); - a.emit_mov(Size::S32, Location::GPR(compare), ret); - a.emit_and(Size::S32, Location::GPR(compare), Location::GPR(value)); - a.emit_lock_cmpxchg( - Size::S32, - Location::GPR(value), - Location::Memory(addr, 0), - ); + |a, _m, src, dst| { + a.emit_and(Size::S32, Location::GPR(src), Location::GPR(dst)); }, ); - - a.emit_jmp(Condition::NotEqual, retry); - - a.emit_pop(Size::S64, Location::GPR(value)); - self.machine.release_temp_gpr(compare); } _ => { return Err(CodegenError { From 2ada54a3e67d3d5b7c50af3cb484dce5eff25f22 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Mon, 23 Sep 2019 22:01:08 -0700 Subject: [PATCH 7/9] Add atomic.rmw operations, excluding xchg and cmpxchg. Sizes are now ordered, to facilitate an assertion that one size is less (smaller) than another. panic! error messages are provided for remaining emitter functions. --- lib/singlepass-backend/src/codegen_x64.rs | 850 +++++++++++++++++++++- lib/singlepass-backend/src/emitter_x64.rs | 90 ++- 2 files changed, 905 insertions(+), 35 deletions(-) diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index 43bfa9103a6..9b4412d6672 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -1588,7 +1588,7 @@ impl X64FunctionCode { } /// Emits a memory operation. - fn emit_cas_loop_op( + fn emit_compare_and_swap( module_info: &ModuleInfo, config: &CodegenConfig, a: &mut Assembler, @@ -1598,8 +1598,12 @@ impl X64FunctionCode { ret: Location, memarg: &MemoryImmediate, value_size: usize, + memory_sz: Size, + stack_sz: Size, cb: F, ) { + assert!(memory_sz <= stack_sz); + let compare = m.reserve_temp_gpr(GPR::RAX); let value = if loc == Location::GPR(GPR::R14) { GPR::R13 @@ -1608,7 +1612,7 @@ impl X64FunctionCode { }; a.emit_push(Size::S64, Location::GPR(value)); - a.emit_mov(Size::S32, loc, Location::GPR(value)); + a.emit_mov(stack_sz, loc, Location::GPR(value)); let retry = a.get_label(); a.emit_label(retry); @@ -1623,10 +1627,10 @@ impl X64FunctionCode { true, value_size, |a, m, addr| { - a.emit_mov(Size::S32, Location::Memory(addr, 0), Location::GPR(compare)); - a.emit_mov(Size::S32, Location::GPR(compare), ret); + a.emit_mov(memory_sz, Location::Memory(addr, 0), Location::GPR(compare)); + a.emit_mov(stack_sz, Location::GPR(compare), ret); cb(a, m, compare, value); - a.emit_lock_cmpxchg(Size::S32, Location::GPR(value), Location::Memory(addr, 0)); + a.emit_lock_cmpxchg(memory_sz, Location::GPR(value), Location::Memory(addr, 0)); }, ); @@ -5542,6 +5546,36 @@ impl FunctionCodeGenerator for X64FunctionCode { a.emit_mov(Size::S32, Location::GPR(value), ret); self.machine.release_temp_gpr(value); } + Operator::I64AtomicRmwAdd { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S64, loc, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 8, + |a, _m, addr| { + a.emit_lock_xadd(Size::S64, Location::GPR(value), Location::Memory(addr, 0)) + }, + ); + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } Operator::I32AtomicRmw8UAdd { ref memarg } => { let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); @@ -5564,7 +5598,7 @@ impl FunctionCodeGenerator for X64FunctionCode { target, memarg, true, - 4, + 1, |a, _m, addr| { a.emit_lock_xadd(Size::S8, Location::GPR(value), Location::Memory(addr, 0)) }, @@ -5594,7 +5628,7 @@ impl FunctionCodeGenerator for X64FunctionCode { target, memarg, true, - 4, + 2, |a, _m, addr| { a.emit_lock_xadd(Size::S16, Location::GPR(value), Location::Memory(addr, 0)) }, @@ -5602,6 +5636,96 @@ impl FunctionCodeGenerator for X64FunctionCode { a.emit_mov(Size::S32, Location::GPR(value), ret); self.machine.release_temp_gpr(value); } + Operator::I64AtomicRmw8UAdd { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S8, loc, Size::S64, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_lock_xadd(Size::S8, Location::GPR(value), Location::Memory(addr, 0)) + }, + ); + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmw16UAdd { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S16, loc, Size::S64, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 2, + |a, _m, addr| { + a.emit_lock_xadd(Size::S16, Location::GPR(value), Location::Memory(addr, 0)) + }, + ); + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmw32UAdd { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_lock_xadd(Size::S32, Location::GPR(value), Location::Memory(addr, 0)) + }, + ); + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } Operator::I32AtomicRmwSub { ref memarg } => { let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); @@ -5633,6 +5757,37 @@ impl FunctionCodeGenerator for X64FunctionCode { a.emit_mov(Size::S32, Location::GPR(value), ret); self.machine.release_temp_gpr(value); } + Operator::I64AtomicRmwSub { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S64, loc, Location::GPR(value)); + a.emit_neg(Size::S64, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 8, + |a, _m, addr| { + a.emit_lock_xadd(Size::S64, Location::GPR(value), Location::Memory(addr, 0)) + }, + ); + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } Operator::I32AtomicRmw8USub { ref memarg } => { let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); @@ -5656,7 +5811,7 @@ impl FunctionCodeGenerator for X64FunctionCode { target, memarg, true, - 4, + 1, |a, _m, addr| { a.emit_lock_xadd(Size::S8, Location::GPR(value), Location::Memory(addr, 0)) }, @@ -5687,7 +5842,7 @@ impl FunctionCodeGenerator for X64FunctionCode { target, memarg, true, - 4, + 2, |a, _m, addr| { a.emit_lock_xadd(Size::S16, Location::GPR(value), Location::Memory(addr, 0)) }, @@ -5695,6 +5850,99 @@ impl FunctionCodeGenerator for X64FunctionCode { a.emit_mov(Size::S32, Location::GPR(value), ret); self.machine.release_temp_gpr(value); } + Operator::I64AtomicRmw8USub { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S8, loc, Size::S64, Location::GPR(value)); + a.emit_neg(Size::S8, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_lock_xadd(Size::S8, Location::GPR(value), Location::Memory(addr, 0)) + }, + ); + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmw16USub { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S16, loc, Size::S64, Location::GPR(value)); + a.emit_neg(Size::S16, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 2, + |a, _m, addr| { + a.emit_lock_xadd(Size::S16, Location::GPR(value), Location::Memory(addr, 0)) + }, + ); + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmw32USub { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(value)); + a.emit_neg(Size::S32, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 2, + |a, _m, addr| { + a.emit_lock_xadd(Size::S32, Location::GPR(value), Location::Memory(addr, 0)) + }, + ); + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } Operator::I32AtomicRmwAnd { ref memarg } => { let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); @@ -5707,7 +5955,7 @@ impl FunctionCodeGenerator for X64FunctionCode { )[0]; self.value_stack.push(ret); - Self::emit_cas_loop_op( + Self::emit_compare_and_swap( module_info, &self.config, a, @@ -5717,11 +5965,593 @@ impl FunctionCodeGenerator for X64FunctionCode { ret, memarg, 4, + Size::S32, + Size::S32, |a, _m, src, dst| { a.emit_and(Size::S32, Location::GPR(src), Location::GPR(dst)); }, ); } + Operator::I64AtomicRmwAnd { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 8, + Size::S64, + Size::S64, + |a, _m, src, dst| { + a.emit_and(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I32AtomicRmw8UAnd { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 1, + Size::S8, + Size::S32, + |a, _m, src, dst| { + a.emit_and(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I32AtomicRmw16UAnd { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 1, + Size::S16, + Size::S32, + |a, _m, src, dst| { + a.emit_and(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I64AtomicRmw8UAnd { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 1, + Size::S8, + Size::S64, + |a, _m, src, dst| { + a.emit_and(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I64AtomicRmw16UAnd { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 1, + Size::S16, + Size::S64, + |a, _m, src, dst| { + a.emit_and(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I64AtomicRmw32UAnd { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 1, + Size::S32, + Size::S64, + |a, _m, src, dst| { + a.emit_and(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I32AtomicRmwOr { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 4, + Size::S32, + Size::S32, + |a, _m, src, dst| { + a.emit_or(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I64AtomicRmwOr { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 8, + Size::S64, + Size::S64, + |a, _m, src, dst| { + a.emit_or(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I32AtomicRmw8UOr { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 1, + Size::S8, + Size::S32, + |a, _m, src, dst| { + a.emit_or(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I32AtomicRmw16UOr { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 1, + Size::S16, + Size::S32, + |a, _m, src, dst| { + a.emit_or(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I64AtomicRmw8UOr { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 1, + Size::S8, + Size::S64, + |a, _m, src, dst| { + a.emit_or(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I64AtomicRmw16UOr { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 1, + Size::S16, + Size::S64, + |a, _m, src, dst| { + a.emit_or(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I64AtomicRmw32UOr { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 1, + Size::S32, + Size::S64, + |a, _m, src, dst| { + a.emit_or(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I32AtomicRmwXor { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 4, + Size::S32, + Size::S32, + |a, _m, src, dst| { + a.emit_xor(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I64AtomicRmwXor { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 8, + Size::S64, + Size::S64, + |a, _m, src, dst| { + a.emit_xor(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I32AtomicRmw8UXor { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 1, + Size::S8, + Size::S32, + |a, _m, src, dst| { + a.emit_xor(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I32AtomicRmw16UXor { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 1, + Size::S16, + Size::S32, + |a, _m, src, dst| { + a.emit_xor(Size::S32, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I64AtomicRmw8UXor { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 1, + Size::S8, + Size::S64, + |a, _m, src, dst| { + a.emit_xor(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I64AtomicRmw16UXor { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 1, + Size::S16, + Size::S64, + |a, _m, src, dst| { + a.emit_xor(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + ); + } + Operator::I64AtomicRmw32UXor { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + Self::emit_compare_and_swap( + module_info, + &self.config, + a, + &mut self.machine, + loc, + target, + ret, + memarg, + 1, + Size::S32, + Size::S64, + |a, _m, src, dst| { + a.emit_xor(Size::S64, Location::GPR(src), Location::GPR(dst)); + }, + ); + } _ => { return Err(CodegenError { message: format!("not yet implemented: {:?}", op), diff --git a/lib/singlepass-backend/src/emitter_x64.rs b/lib/singlepass-backend/src/emitter_x64.rs index 83099f25970..5544d08c537 100644 --- a/lib/singlepass-backend/src/emitter_x64.rs +++ b/lib/singlepass-backend/src/emitter_x64.rs @@ -28,7 +28,7 @@ pub enum Condition { Signed, } -#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)] pub enum Size { S8, S16, @@ -584,7 +584,9 @@ impl Emitter for Assembler { dynasm!(self ; cqo); } fn emit_xor(&mut self, sz: Size, src: Location, dst: Location) { - binop_all_nofp!(xor, self, sz, src, dst, { unreachable!() }); + binop_all_nofp!(xor, self, sz, src, dst, { + panic!("XOR {:?} {:?} {:?}", sz, src, dst) + }); } fn emit_jmp(&mut self, condition: Condition, label: Self::Label) { match condition { @@ -666,72 +668,110 @@ impl Emitter for Assembler { }); } fn emit_add(&mut self, sz: Size, src: Location, dst: Location) { - binop_all_nofp!(add, self, sz, src, dst, { unreachable!() }); + binop_all_nofp!(add, self, sz, src, dst, { + panic!("ADD {:?} {:?} {:?}", sz, src, dst) + }); } fn emit_sub(&mut self, sz: Size, src: Location, dst: Location) { - binop_all_nofp!(sub, self, sz, src, dst, { unreachable!() }); + binop_all_nofp!(sub, self, sz, src, dst, { + panic!("SUB {:?} {:?} {:?}", sz, src, dst) + }); } fn emit_neg(&mut self, sz: Size, value: Location) { match (sz, value) { - (Size::S8, Location::GPR(value)) => { dynasm!(self ; neg Rb(value as u8)) } - (Size::S8, Location::Memory(value, disp)) => { dynasm!(self ; neg [Rq(value as u8) + disp]) } - (Size::S16, Location::GPR(value)) => { dynasm!(self ; neg Rw(value as u8)) } - (Size::S16, Location::Memory(value, disp)) => { dynasm!(self ; neg [Rq(value as u8) + disp]) } - (Size::S32, Location::GPR(value)) => { dynasm!(self ; neg Rd(value as u8)) } - (Size::S32, Location::Memory(value, disp)) => { dynasm!(self ; neg [Rq(value as u8) + disp]) } - (Size::S64, Location::GPR(value)) => { dynasm!(self ; neg Rq(value as u8)) } - (Size::S64, Location::Memory(value, disp)) => { dynasm!(self ; neg [Rq(value as u8) + disp]) } + (Size::S8, Location::GPR(value)) => dynasm!(self ; neg Rb(value as u8)), + (Size::S8, Location::Memory(value, disp)) => { + dynasm!(self ; neg [Rq(value as u8) + disp]) + } + (Size::S16, Location::GPR(value)) => dynasm!(self ; neg Rw(value as u8)), + (Size::S16, Location::Memory(value, disp)) => { + dynasm!(self ; neg [Rq(value as u8) + disp]) + } + (Size::S32, Location::GPR(value)) => dynasm!(self ; neg Rd(value as u8)), + (Size::S32, Location::Memory(value, disp)) => { + dynasm!(self ; neg [Rq(value as u8) + disp]) + } + (Size::S64, Location::GPR(value)) => dynasm!(self ; neg Rq(value as u8)), + (Size::S64, Location::Memory(value, disp)) => { + dynasm!(self ; neg [Rq(value as u8) + disp]) + } _ => panic!("NEG {:?} {:?}", sz, value), } } fn emit_imul(&mut self, sz: Size, src: Location, dst: Location) { binop_gpr_gpr!(imul, self, sz, src, dst, { - binop_mem_gpr!(imul, self, sz, src, dst, { unreachable!() }) + binop_mem_gpr!(imul, self, sz, src, dst, { + panic!("IMUL {:?} {:?} {:?}", sz, src, dst) + }) }); } fn emit_imul_imm32_gpr64(&mut self, src: u32, dst: GPR) { dynasm!(self ; imul Rq(dst as u8), Rq(dst as u8), src as i32); } fn emit_div(&mut self, sz: Size, divisor: Location) { - unop_gpr_or_mem!(div, self, sz, divisor, { unreachable!() }); + unop_gpr_or_mem!(div, self, sz, divisor, { + panic!("DIV {:?} {:?}", sz, divisor) + }); } fn emit_idiv(&mut self, sz: Size, divisor: Location) { - unop_gpr_or_mem!(idiv, self, sz, divisor, { unreachable!() }); + unop_gpr_or_mem!(idiv, self, sz, divisor, { + panic!("IDIV {:?} {:?}", sz, divisor) + }); } fn emit_shl(&mut self, sz: Size, src: Location, dst: Location) { - binop_shift!(shl, self, sz, src, dst, { unreachable!() }); + binop_shift!(shl, self, sz, src, dst, { + panic!("SHL {:?} {:?} {:?}", sz, src, dst) + }); } fn emit_shr(&mut self, sz: Size, src: Location, dst: Location) { - binop_shift!(shr, self, sz, src, dst, { unreachable!() }); + binop_shift!(shr, self, sz, src, dst, { + panic!("SHR {:?} {:?} {:?}", sz, src, dst) + }); } fn emit_sar(&mut self, sz: Size, src: Location, dst: Location) { - binop_shift!(sar, self, sz, src, dst, { unreachable!() }); + binop_shift!(sar, self, sz, src, dst, { + panic!("SAR {:?} {:?} {:?}", sz, src, dst) + }); } fn emit_rol(&mut self, sz: Size, src: Location, dst: Location) { - binop_shift!(rol, self, sz, src, dst, { unreachable!() }); + binop_shift!(rol, self, sz, src, dst, { + panic!("ROL {:?} {:?} {:?}", sz, src, dst) + }); } fn emit_ror(&mut self, sz: Size, src: Location, dst: Location) { - binop_shift!(ror, self, sz, src, dst, { unreachable!() }); + binop_shift!(ror, self, sz, src, dst, { + panic!("ROR {:?} {:?} {:?}", sz, src, dst) + }); } fn emit_and(&mut self, sz: Size, src: Location, dst: Location) { - binop_all_nofp!(and, self, sz, src, dst, { unreachable!() }); + binop_all_nofp!(and, self, sz, src, dst, { + panic!("AND {:?} {:?} {:?}", sz, src, dst) + }); } fn emit_or(&mut self, sz: Size, src: Location, dst: Location) { - binop_all_nofp!(or, self, sz, src, dst, { unreachable!() }); + binop_all_nofp!(or, self, sz, src, dst, { + panic!("OR {:?} {:?} {:?}", sz, src, dst) + }); } fn emit_lzcnt(&mut self, sz: Size, src: Location, dst: Location) { binop_gpr_gpr!(lzcnt, self, sz, src, dst, { - binop_mem_gpr!(lzcnt, self, sz, src, dst, { unreachable!() }) + binop_mem_gpr!(lzcnt, self, sz, src, dst, { + panic!("LZCNT {:?} {:?} {:?}", sz, src, dst) + }) }); } fn emit_tzcnt(&mut self, sz: Size, src: Location, dst: Location) { binop_gpr_gpr!(tzcnt, self, sz, src, dst, { - binop_mem_gpr!(tzcnt, self, sz, src, dst, { unreachable!() }) + binop_mem_gpr!(tzcnt, self, sz, src, dst, { + panic!("TZCNT {:?} {:?} {:?}", sz, src, dst) + }) }); } fn emit_popcnt(&mut self, sz: Size, src: Location, dst: Location) { binop_gpr_gpr!(popcnt, self, sz, src, dst, { - binop_mem_gpr!(popcnt, self, sz, src, dst, { unreachable!() }) + binop_mem_gpr!(popcnt, self, sz, src, dst, { + panic!("POPCNT {:?} {:?} {:?}", sz, src, dst) + }) }); } fn emit_movzx(&mut self, sz_src: Size, src: Location, sz_dst: Size, dst: Location) { From df1af2e69ca1cee61030a8458251106b2fd23340 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Tue, 24 Sep 2019 15:44:17 -0700 Subject: [PATCH 8/9] Finish atomic operations for singlepass, excluding wait and notify. --- lib/singlepass-backend/src/codegen_x64.rs | 588 +++++++++++++++++++++- lib/spectests/tests/excludes.txt | 46 +- 2 files changed, 615 insertions(+), 19 deletions(-) diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index 9b4412d6672..9322531f836 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -1514,24 +1514,23 @@ impl X64FunctionCode { a.emit_add(Size::S64, Location::GPR(tmp_base), Location::GPR(tmp_bound)); a.emit_mov(Size::S32, addr, Location::GPR(tmp_addr)); - if memarg.offset != 0 && value_size != 0 { - // This branch is used for emitting "faster" code for the special case of (offset + value_size) not exceeding u32 range. - match (memarg.offset as u32).checked_add(value_size as u32) { - Some(x) => { - a.emit_add(Size::S64, Location::Imm32(x), Location::GPR(tmp_addr)); - } - None => { - a.emit_add( - Size::S64, - Location::Imm32(memarg.offset as u32), - Location::GPR(tmp_addr), - ); - a.emit_add( - Size::S64, - Location::Imm32(value_size as u32), - Location::GPR(tmp_addr), - ); - } + // This branch is used for emitting "faster" code for the special case of (offset + value_size) not exceeding u32 range. + match (memarg.offset as u32).checked_add(value_size as u32) { + Some(0) => {} + Some(x) => { + a.emit_add(Size::S64, Location::Imm32(x), Location::GPR(tmp_addr)); + } + None => { + a.emit_add( + Size::S64, + Location::Imm32(memarg.offset as u32), + Location::GPR(tmp_addr), + ); + a.emit_add( + Size::S64, + Location::Imm32(value_size as u32), + Location::GPR(tmp_addr), + ); } } @@ -6552,6 +6551,559 @@ impl FunctionCodeGenerator for X64FunctionCode { }, ); } + Operator::I32AtomicRmwXchg { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_xchg(Size::S32, Location::GPR(value), Location::Memory(addr, 0)) + }, + ); + a.emit_mov(Size::S32, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmwXchg { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S64, loc, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 8, + |a, _m, addr| { + a.emit_xchg(Size::S64, Location::GPR(value), Location::Memory(addr, 0)) + }, + ); + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmw8UXchg { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S8, loc, Size::S32, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_xchg(Size::S8, Location::GPR(value), Location::Memory(addr, 0)) + }, + ); + a.emit_mov(Size::S32, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmw16UXchg { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S16, loc, Size::S32, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 2, + |a, _m, addr| { + a.emit_xchg(Size::S16, Location::GPR(value), Location::Memory(addr, 0)) + }, + ); + a.emit_mov(Size::S32, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmw8UXchg { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S8, loc, Size::S64, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_xchg(Size::S8, Location::GPR(value), Location::Memory(addr, 0)) + }, + ); + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmw16UXchg { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_movzx(Size::S16, loc, Size::S64, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 2, + |a, _m, addr| { + a.emit_xchg(Size::S16, Location::GPR(value), Location::Memory(addr, 0)) + }, + ); + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I64AtomicRmw32UXchg { ref memarg } => { + let loc = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let value = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(value)); + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_xchg(Size::S32, Location::GPR(value), Location::Memory(addr, 0)) + }, + ); + a.emit_mov(Size::S64, Location::GPR(value), ret); + self.machine.release_temp_gpr(value); + } + Operator::I32AtomicRmwCmpxchg { ref memarg } => { + let new = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cmp = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let compare = self.machine.reserve_temp_gpr(GPR::RAX); + let value = if cmp == Location::GPR(GPR::R14) { + if new == Location::GPR(GPR::R13) { + GPR::R12 + } else { + GPR::R13 + } + } else { + GPR::R14 + }; + a.emit_push(Size::S64, Location::GPR(value)); + a.emit_mov(Size::S32, cmp, Location::GPR(compare)); + a.emit_mov(Size::S32, new, Location::GPR(value)); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 4, + |a, _m, addr| { + a.emit_lock_cmpxchg( + Size::S32, + Location::GPR(value), + Location::Memory(addr, 0), + ); + a.emit_mov(Size::S32, Location::GPR(compare), ret); + }, + ); + a.emit_pop(Size::S64, Location::GPR(value)); + self.machine.release_temp_gpr(compare); + } + Operator::I64AtomicRmwCmpxchg { ref memarg } => { + let new = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cmp = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let compare = self.machine.reserve_temp_gpr(GPR::RAX); + let value = if cmp == Location::GPR(GPR::R14) { + if new == Location::GPR(GPR::R13) { + GPR::R12 + } else { + GPR::R13 + } + } else { + GPR::R14 + }; + a.emit_push(Size::S64, Location::GPR(value)); + a.emit_mov(Size::S64, cmp, Location::GPR(compare)); + a.emit_mov(Size::S64, new, Location::GPR(value)); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 8, + |a, _m, addr| { + a.emit_lock_cmpxchg( + Size::S64, + Location::GPR(value), + Location::Memory(addr, 0), + ); + a.emit_mov(Size::S64, Location::GPR(compare), ret); + }, + ); + a.emit_pop(Size::S64, Location::GPR(value)); + self.machine.release_temp_gpr(compare); + } + Operator::I32AtomicRmw8UCmpxchg { ref memarg } => { + let new = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cmp = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let compare = self.machine.reserve_temp_gpr(GPR::RAX); + let value = if cmp == Location::GPR(GPR::R14) { + if new == Location::GPR(GPR::R13) { + GPR::R12 + } else { + GPR::R13 + } + } else { + GPR::R14 + }; + a.emit_push(Size::S64, Location::GPR(value)); + a.emit_mov(Size::S32, cmp, Location::GPR(compare)); + a.emit_mov(Size::S32, new, Location::GPR(value)); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_lock_cmpxchg( + Size::S8, + Location::GPR(value), + Location::Memory(addr, 0), + ); + a.emit_movzx(Size::S8, Location::GPR(compare), Size::S32, ret); + }, + ); + a.emit_pop(Size::S64, Location::GPR(value)); + self.machine.release_temp_gpr(compare); + } + Operator::I32AtomicRmw16UCmpxchg { ref memarg } => { + let new = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cmp = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I32, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let compare = self.machine.reserve_temp_gpr(GPR::RAX); + let value = if cmp == Location::GPR(GPR::R14) { + if new == Location::GPR(GPR::R13) { + GPR::R12 + } else { + GPR::R13 + } + } else { + GPR::R14 + }; + a.emit_push(Size::S64, Location::GPR(value)); + a.emit_mov(Size::S32, cmp, Location::GPR(compare)); + a.emit_mov(Size::S32, new, Location::GPR(value)); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_lock_cmpxchg( + Size::S16, + Location::GPR(value), + Location::Memory(addr, 0), + ); + a.emit_movzx(Size::S16, Location::GPR(compare), Size::S32, ret); + }, + ); + a.emit_pop(Size::S64, Location::GPR(value)); + self.machine.release_temp_gpr(compare); + } + Operator::I64AtomicRmw8UCmpxchg { ref memarg } => { + let new = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cmp = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let compare = self.machine.reserve_temp_gpr(GPR::RAX); + let value = if cmp == Location::GPR(GPR::R14) { + if new == Location::GPR(GPR::R13) { + GPR::R12 + } else { + GPR::R13 + } + } else { + GPR::R14 + }; + a.emit_push(Size::S64, Location::GPR(value)); + a.emit_mov(Size::S64, cmp, Location::GPR(compare)); + a.emit_mov(Size::S64, new, Location::GPR(value)); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_lock_cmpxchg( + Size::S8, + Location::GPR(value), + Location::Memory(addr, 0), + ); + a.emit_movzx(Size::S8, Location::GPR(compare), Size::S64, ret); + }, + ); + a.emit_pop(Size::S64, Location::GPR(value)); + self.machine.release_temp_gpr(compare); + } + Operator::I64AtomicRmw16UCmpxchg { ref memarg } => { + let new = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cmp = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let compare = self.machine.reserve_temp_gpr(GPR::RAX); + let value = if cmp == Location::GPR(GPR::R14) { + if new == Location::GPR(GPR::R13) { + GPR::R12 + } else { + GPR::R13 + } + } else { + GPR::R14 + }; + a.emit_push(Size::S64, Location::GPR(value)); + a.emit_mov(Size::S64, cmp, Location::GPR(compare)); + a.emit_mov(Size::S64, new, Location::GPR(value)); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_lock_cmpxchg( + Size::S16, + Location::GPR(value), + Location::Memory(addr, 0), + ); + a.emit_movzx(Size::S16, Location::GPR(compare), Size::S64, ret); + }, + ); + a.emit_pop(Size::S64, Location::GPR(value)); + self.machine.release_temp_gpr(compare); + } + Operator::I64AtomicRmw32UCmpxchg { ref memarg } => { + let new = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let cmp = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let target = + get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations( + a, + &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); + + let compare = self.machine.reserve_temp_gpr(GPR::RAX); + let value = if cmp == Location::GPR(GPR::R14) { + if new == Location::GPR(GPR::R13) { + GPR::R12 + } else { + GPR::R13 + } + } else { + GPR::R14 + }; + a.emit_push(Size::S64, Location::GPR(value)); + a.emit_mov(Size::S64, cmp, Location::GPR(compare)); + a.emit_mov(Size::S64, new, Location::GPR(value)); + + Self::emit_memory_op( + module_info, + &self.config, + a, + &mut self.machine, + target, + memarg, + true, + 1, + |a, _m, addr| { + a.emit_lock_cmpxchg( + Size::S32, + Location::GPR(value), + Location::Memory(addr, 0), + ); + a.emit_mov(Size::S32, Location::GPR(compare), ret); + }, + ); + a.emit_pop(Size::S64, Location::GPR(value)); + self.machine.release_temp_gpr(compare); + } _ => { return Err(CodegenError { message: format!("not yet implemented: {:?}", op), diff --git a/lib/spectests/tests/excludes.txt b/lib/spectests/tests/excludes.txt index 19a1c51a8c2..56c6a2d5613 100644 --- a/lib/spectests/tests/excludes.txt +++ b/lib/spectests/tests/excludes.txt @@ -867,7 +867,6 @@ llvm:skip:simd_binaryen.wast:*:unix # Module - caught panic Any # Singlepass -singlepass:skip:atomic.wast:* # Threads not implemented singlepass:skip:simd.wast:* # SIMD not implemented singlepass:skip:simd_binaryen.wast:* # SIMD not implemented @@ -904,6 +903,51 @@ singlepass:fail:address.wast:586 # AssertTrap - expected trap, got Runtime:Error singlepass:fail:address.wast:588 # AssertTrap - expected trap, got [] singlepass:fail:address.wast:589 # AssertTrap - expected trap, got [] singlepass:fail:align.wast:864 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:380 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:381 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:382 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:383 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:384 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:385 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:386 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:387 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:388 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:389 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:390 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:391 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:392 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:393 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:394 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:395 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:396 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:397 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:398 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:399 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:400 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:401 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:402 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:403 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:404 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:405 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:406 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:407 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:408 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:409 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:410 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:411 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:412 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:413 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:414 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:415 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:416 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:417 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:418 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:419 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:420 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:421 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:422 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:423 # AssertTrap - expected trap, got Runtime:Error unknown error +singlepass:fail:atomic.wast:424 # AssertTrap - expected trap, got Runtime:Error unknown error singlepass:fail:call.wast:289 # AssertTrap - expected trap, got Runtime:Error unknown error singlepass:fail:call_indirect.wast:469 # AssertTrap - expected trap, got Runtime:Error unknown error singlepass:fail:call_indirect.wast:470 # AssertTrap - expected trap, got Runtime:Error unknown error From ae798bdac676206fd175829fab4658443826561b Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Tue, 24 Sep 2019 15:47:14 -0700 Subject: [PATCH 9/9] Add changelog entry. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c9106293688..c009d19f178 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Blocks of changes will separated by version increments. ## **[Unreleased]** +- [#831](https://github.com/wasmerio/wasmer/pull/831) Add support for atomic operations, excluding wait and notify, to singlepass. - [#822](https://github.com/wasmerio/wasmer/pull/822) Update Cranelift fork version to `0.43.1` - [#829](https://github.com/wasmerio/wasmer/pull/829) Fix deps on `make bench-*` commands; benchmarks don't compile other backends now - [#807](https://github.com/wasmerio/wasmer/pull/807) Implement Send for `Instance`, breaking change on `ImportObject`, remove method `get_namespace` replaced with `with_namespace` and `maybe_with_namespace`