diff --git a/lib/compiler-singlepass/src/codegen.rs b/lib/compiler-singlepass/src/codegen.rs index c9f98b30b64..2e68a8fc0d9 100644 --- a/lib/compiler-singlepass/src/codegen.rs +++ b/lib/compiler-singlepass/src/codegen.rs @@ -737,8 +737,8 @@ impl<'a, M: Machine> FuncGen<'a, M> { .collect(); // Save used GPRs. Preserve correct stack alignment - let mut used_stack = self.machine.push_used_gpr(); let used_gprs = self.machine.get_used_gprs(); + let mut used_stack = self.machine.push_used_gpr(&used_gprs); for r in used_gprs.iter() { let content = self.state.register_values[self.machine.index_from_gpr(*r).0].clone(); if content == MachineValue::Undefined { @@ -752,7 +752,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { // Save used SIMD registers. let used_simds = self.machine.get_used_simd(); if used_simds.len() > 0 { - used_stack += self.machine.push_used_simd(); + used_stack += self.machine.push_used_simd(&used_simds); for r in used_simds.iter().rev() { let content = @@ -842,7 +842,8 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.state.stack_values.push(MachineValue::Undefined); } } - self.machine.move_location(params_size[i], *param, loc); + self.machine + .move_location_for_native(params_size[i], *param, loc); } _ => { return Err(CodegenError { @@ -914,14 +915,14 @@ impl<'a, M: Machine> FuncGen<'a, M> { // Restore SIMDs. if !used_simds.is_empty() { - self.machine.pop_used_simd(); + self.machine.pop_used_simd(&used_simds); for _ in 0..used_simds.len() { self.state.stack_values.pop().unwrap(); } } // Restore GPRs. - self.machine.pop_used_gpr(); + self.machine.pop_used_gpr(&used_gprs); for _ in used_gprs.iter().rev() { self.state.stack_values.pop().unwrap(); } diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index 08505560d94..c5d31f9c764 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -401,6 +401,11 @@ impl EmitterARM64 for Assembler { let addr = addr.into_index() as u32; dynasm!(self ; stur D(reg), [X(addr), offset]); } + (Size::S32, Location::SIMD(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; stur S(reg), [X(addr), offset]); + } _ => panic!( "singlepass can't emit STUR {:?}, {:?}, {:?}, {:?}", sz, reg, addr, offset @@ -425,6 +430,11 @@ impl EmitterARM64 for Assembler { let addr = addr.into_index() as u32; dynasm!(self ; ldur D(reg), [X(addr), offset]); } + (Size::S32, Location::SIMD(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldur S(reg), [X(addr), offset]); + } _ => panic!( "singlepass can't emit LDUR {:?}, {:?}, {:?}, {:?}", sz, reg, addr, offset diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index a68daa91ec5..1ad5d04254e 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -83,9 +83,9 @@ pub trait Machine { /// reserve a GPR fn reserve_gpr(&mut self, gpr: Self::GPR); /// Push used gpr to the stack. Return the bytes taken on the stack - fn push_used_gpr(&mut self) -> usize; + fn push_used_gpr(&mut self, grps: &Vec) -> usize; /// Pop used gpr to the stack - fn pop_used_gpr(&mut self); + fn pop_used_gpr(&mut self, grps: &Vec); /// Picks an unused SIMD register. /// /// This method does not mark the register as used @@ -101,9 +101,9 @@ pub trait Machine { /// Releases a temporary XMM register. fn release_simd(&mut self, simd: Self::SIMD); /// Push used simd regs to the stack. Return bytes taken on the stack - fn push_used_simd(&mut self) -> usize; + fn push_used_simd(&mut self, simds: &Vec) -> usize; /// Pop used simd regs to the stack - fn pop_used_simd(&mut self); + fn pop_used_simd(&mut self, simds: &Vec); /// Return a rounded stack adjustement value (must be multiple of 16bytes on ARM64 for example) fn round_stack_adjust(&self, value: usize) -> usize; /// Set the source location of the Wasm to the given offset. @@ -140,7 +140,12 @@ pub trait Machine { /// GPR Reg used for local pointer on the stack fn local_pointer(&self) -> Self::GPR; /// push a value on the stack for a native call - fn push_location_for_native(&mut self, loc: Location); + fn move_location_for_native( + &mut self, + size: Size, + loc: Location, + dest: Location, + ); /// Determine whether a local should be allocated on the stack. fn is_local_on_stack(&self, idx: usize) -> bool; /// Determine a local's location. diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 9b678b7e453..7ea534a0e10 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -1175,8 +1175,7 @@ impl Machine for MachineARM64 { self.used_gprs.insert(gpr); } - fn push_used_gpr(&mut self) -> usize { - let used_gprs = self.get_used_gprs(); + fn push_used_gpr(&mut self, used_gprs: &Vec) -> usize { if used_gprs.len() % 2 == 1 { self.emit_push(Size::S64, Location::GPR(GPR::XzrSp)); } @@ -1185,8 +1184,7 @@ impl Machine for MachineARM64 { } ((used_gprs.len() + 1) / 2) * 16 } - fn pop_used_gpr(&mut self) { - let used_gprs = self.get_used_gprs(); + fn pop_used_gpr(&mut self, used_gprs: &Vec) { for r in used_gprs.iter().rev() { self.emit_pop(Size::S64, Location::GPR(*r)); } @@ -1237,8 +1235,7 @@ impl Machine for MachineARM64 { assert_eq!(self.used_simd.remove(&simd), true); } - fn push_used_simd(&mut self) -> usize { - let used_neons = self.get_used_simd(); + fn push_used_simd(&mut self, used_neons: &Vec) -> usize { let stack_adjust = if used_neons.len() & 1 == 1 { (used_neons.len() * 8) as u32 + 8 } else { @@ -1255,8 +1252,7 @@ impl Machine for MachineARM64 { } stack_adjust as usize } - fn pop_used_simd(&mut self) { - let used_neons = self.get_used_simd(); + fn pop_used_simd(&mut self, used_neons: &Vec) { for (i, r) in used_neons.iter().enumerate() { self.assembler.emit_ldr( Size::S64, @@ -1407,13 +1403,17 @@ impl Machine for MachineARM64 { ); } // push a value on the stack for a native call - fn push_location_for_native(&mut self, loc: Location) { + fn move_location_for_native(&mut self, size: Size, loc: Location, dest: Location) { match loc { - Location::Imm64(_) => { - self.move_location(Size::S64, loc, Location::GPR(GPR::X17)); - self.emit_push(Size::S64, Location::GPR(GPR::X17)); + Location::Imm64(_) + | Location::Imm32(_) + | Location::Imm8(_) + | Location::Memory(_, _) + | Location::Memory2(_, _, _, _) => { + self.move_location(size, loc, Location::GPR(GPR::X17)); + self.move_location(size, Location::GPR(GPR::X17), dest); } - _ => self.emit_push(Size::S64, loc), + _ => self.move_location(size, loc, dest), } } @@ -1965,6 +1965,14 @@ impl Machine for MachineARM64 { self.assembler.emit_fmax(sz, input, input, tmp); self.move_location(sz, tmp, output); } + (Size::S32, Location::Memory(_, _), _) | (Size::S64, Location::Memory(_, _), _) => { + let src = self.location_to_neon(sz, input, &mut tempn, ImmType::None, true); + let tmp = self.location_to_neon(sz, output, &mut tempn, ImmType::None, false); + self.assembler.emit_fmax(sz, src, src, tmp); + if tmp != output { + self.move_location(sz, tmp, output); + } + } _ => panic!( "singlepass can't emit canonicalize_nan {:?} {:?} {:?}", sz, input, output diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index 2d1c0f5f50d..a0598858155 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -1626,15 +1626,13 @@ impl Machine for MachineX86_64 { self.used_gprs.insert(gpr); } - fn push_used_gpr(&mut self) -> usize { - let used_gprs = self.get_used_gprs(); + fn push_used_gpr(&mut self, used_gprs: &Vec) -> usize { for r in used_gprs.iter() { self.assembler.emit_push(Size::S64, Location::GPR(*r)); } used_gprs.len() * 8 } - fn pop_used_gpr(&mut self) { - let used_gprs = self.get_used_gprs(); + fn pop_used_gpr(&mut self, used_gprs: &Vec) { for r in used_gprs.iter().rev() { self.assembler.emit_pop(Size::S64, Location::GPR(*r)); } @@ -1682,8 +1680,7 @@ impl Machine for MachineX86_64 { assert_eq!(self.used_simd.remove(&simd), true); } - fn push_used_simd(&mut self) -> usize { - let used_xmms = self.get_used_simd(); + fn push_used_simd(&mut self, used_xmms: &Vec) -> usize { self.adjust_stack((used_xmms.len() * 8) as u32); for (i, r) in used_xmms.iter().enumerate() { @@ -1696,8 +1693,7 @@ impl Machine for MachineX86_64 { used_xmms.len() * 8 } - fn pop_used_simd(&mut self) { - let used_xmms = self.get_used_simd(); + fn pop_used_simd(&mut self, used_xmms: &Vec) { for (i, r) in used_xmms.iter().enumerate() { self.move_location( Size::S64, @@ -1806,34 +1802,23 @@ impl Machine for MachineX86_64 { ); } // push a value on the stack for a native call - fn push_location_for_native(&mut self, loc: Location) { + fn move_location_for_native(&mut self, _size: Size, loc: Location, dest: Location) { match loc { - Location::Imm64(_) => { - // x86_64 does not support `mov imm64, mem`. We must first place the immdiate value - // into a register and then write the register to the memory. Now the problem is - // that there might not be any registers available to clobber. In order to make - // this work out we spill a register thus retaining both the original value of the - // register and producing the required data at the top of the stack. - // - // FIXME(#2723): figure out how to not require spilling a register here. It should - // definitely be possible to `pick_gpr`/`pick_temp_gpr` to grab an otherwise unused - // register and just clobber its value here. - self.assembler.emit_push(Size::S64, Location::GPR(GPR::R9)); - self.move_location(Size::S64, loc, Location::GPR(GPR::R9)); - self.assembler.emit_xchg( - Size::S64, - Location::GPR(GPR::R9), - Location::Memory(GPR::RSP, 0), - ); - } - Location::SIMD(_) => { - // Dummy value slot to be filled with `mov`. - self.assembler.emit_push(Size::S64, Location::GPR(GPR::RAX)); - - // XMM registers can be directly stored to memory. - self.move_location(Size::S64, loc, Location::Memory(GPR::RSP, 0)); + Location::Imm64(_) | Location::Memory(_, _) | Location::Memory2(_, _, _, _) => { + let tmp = self.pick_temp_gpr(); + if let Some(x) = tmp { + self.assembler.emit_mov(Size::S64, loc, Location::GPR(x)); + self.assembler.emit_mov(Size::S64, Location::GPR(x), dest); + } else { + self.assembler + .emit_mov(Size::S64, Location::GPR(GPR::RAX), dest); + self.assembler + .emit_mov(Size::S64, loc, Location::GPR(GPR::RAX)); + self.assembler + .emit_xchg(Size::S64, Location::GPR(GPR::RAX), dest); + } } - _ => self.assembler.emit_push(Size::S64, loc), + _ => self.assembler.emit_mov(Size::S64, loc, dest), } }