From aa2ee5b63730ece75d49843e88111aa5e449bcaa Mon Sep 17 00:00:00 2001 From: Edoardo Marangoni Date: Wed, 27 Mar 2024 15:36:00 +0100 Subject: [PATCH 1/5] feat(singlepass): use SIMD insts for popcount --- lib/compiler-singlepass/src/emitter_arm64.rs | 59 +++++++++ lib/compiler-singlepass/src/machine_arm64.rs | 120 ++++++++----------- 2 files changed, 109 insertions(+), 70 deletions(-) diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs index aa5ee80f2c6..aa7d7c90443 100644 --- a/lib/compiler-singlepass/src/emitter_arm64.rs +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -466,6 +466,15 @@ pub trait EmitterARM64 { dst: Location, ) -> Result<(), CompileError>; + fn emit_fmov( + &mut self, + src_size: Size, + src: Location, + dst_size: Size, + dst: Location, + ) -> Result<(), CompileError>; + fn emit_cnt(&mut self, src: NEON, dst: NEON) -> Result<(), CompileError>; + fn emit_addv(&mut self, src: NEON, dst: NEON) -> Result<(), CompileError>; fn emit_read_fpcr(&mut self, reg: GPR) -> Result<(), CompileError>; fn emit_write_fpcr(&mut self, reg: GPR) -> Result<(), CompileError>; fn emit_read_fpsr(&mut self, reg: GPR) -> Result<(), CompileError>; @@ -3217,6 +3226,56 @@ impl EmitterARM64 for Assembler { dynasm!(self ; msr 0b1_011_0100_0100_001, X(reg as u32)); Ok(()) } + + fn emit_cnt(&mut self, src: NEON, dst: NEON) -> Result<(), CompileError> { + dynasm!(self ; cnt V(dst.into_index() as u32).B8, V(src.into_index() as u32).B8); + Ok(()) + } + + fn emit_addv(&mut self, src: NEON, dst: NEON) -> Result<(), CompileError> { + dynasm!(self ; addv B(dst.into_index() as u32), V(src.into_index() as u32).B8); + Ok(()) + } + + fn emit_fmov( + &mut self, + src_size: Size, + src: Location, + dst_size: Size, + dst: Location, + ) -> Result<(), CompileError> { + let res = match (src, dst) { + (AbstractLocation::GPR(src), AbstractLocation::SIMD(dst)) => { + match (src_size, dst_size) { + (Size::S32, Size::S32) => Ok(dynasm!(self ; fmov S(dst as u32), W(src as u32))), + (Size::S64, Size::S64) => Ok(dynasm!(self ; fmov D(dst as u32), X(src as u32))), + _ => Err(()), + } + } + (AbstractLocation::SIMD(src), AbstractLocation::GPR(dst)) => { + match (src_size, dst_size) { + (Size::S32, Size::S32) => Ok(dynasm!(self ; fmov W(dst as u32), S(src as u32))), + (Size::S64, Size::S64) => Ok(dynasm!(self ; fmov X(dst as u32), D(src as u32))), + _ => Err(()), + } + } + // (AbstractLocation::SIMD(src), AbstractLocation::SIMD(dst)) => todo!(), + // (AbstractLocation::SIMD(src), AbstractLocation::Imm32(dst)) => todo!(), + // (AbstractLocation::SIMD(src), AbstractLocation::Imm64(dst)) => todo!(), + _ => Err(()), + }; + + match res { + Ok(_) => Ok(()), + Err(_) => codegen_error!( + "singlepass can't generate fmov instruction for src_size: {:?}, src: {:?}, dst_size: {:?}, dst: {:?}", + src_size, + src, + dst_size, + dst, + ), + } + } } pub fn gen_std_trampoline_arm64( diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index f5e2b0c1b18..3a3d14aa002 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -3072,49 +3072,38 @@ impl Machine for MachineARM64 { Ok(()) } fn i32_popcnt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> { - // no opcode for that. - // 2 solutions: using NEON CNT, that count bits per Byte, or using clz with some shift and loop let mut temps = vec![]; - let src = self.location_to_reg(Size::S32, loc, &mut temps, ImmType::None, true, None)?; - let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None)?; - let src = if src == loc { - let tmp = self.acquire_temp_gpr().ok_or_else(|| { - CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned()) - })?; - temps.push(tmp); - self.assembler - .emit_mov(Size::S32, src, Location::GPR(tmp))?; - Location::GPR(tmp) - } else { - src - }; - let tmp = { - let tmp = self.acquire_temp_gpr().ok_or_else(|| { - CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned()) - })?; - temps.push(tmp); - Location::GPR(tmp) - }; - let label_loop = self.assembler.get_label(); - let label_exit = self.assembler.get_label(); - self.assembler - .emit_mov(Size::S32, Location::GPR(GPR::XzrSp), dest)?; // 0 => dest - self.assembler.emit_cbz_label(Size::S32, src, label_exit)?; // src==0, exit - self.assembler.emit_label(label_loop)?; // loop: + + let src_gpr = + self.location_to_reg(Size::S32, loc, &mut temps, ImmType::None, true, None)?; + let dst_gpr = + self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None)?; + + let mut neon_temps = vec![]; + let neon_temp = self.acquire_temp_simd().ok_or_else(|| { + CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned()) + })?; + neon_temps.push(neon_temp); + self.assembler - .emit_add(Size::S32, dest, Location::Imm8(1), dest)?; // dest += 1 - self.assembler.emit_clz(Size::S32, src, tmp)?; // clz src => tmp - self.assembler.emit_lsl(Size::S32, src, tmp, src)?; // src << tmp => src + .emit_fmov(Size::S32, src_gpr, Size::S32, Location::SIMD(neon_temp))?; + self.assembler.emit_cnt(neon_temp, neon_temp)?; + self.assembler.emit_addv(neon_temp, neon_temp)?; self.assembler - .emit_lsl(Size::S32, src, Location::Imm8(1), src)?; // src << 1 => src - self.assembler.emit_cbnz_label(Size::S32, src, label_loop)?; // if src!=0 goto loop - self.assembler.emit_label(label_exit)?; - if ret != dest { - self.move_location(Size::S32, dest, ret)?; + .emit_fmov(Size::S32, Location::SIMD(neon_temp), Size::S32, dst_gpr)?; + + if ret != dst_gpr { + self.move_location(Size::S32, dst_gpr, ret)?; } + for r in temps { self.release_gpr(r); } + + for r in neon_temps { + self.release_simd(r); + } + Ok(()) } fn i32_shl( @@ -5185,46 +5174,37 @@ impl Machine for MachineARM64 { } fn i64_popcnt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> { let mut temps = vec![]; - let src = self.location_to_reg(Size::S64, loc, &mut temps, ImmType::None, true, None)?; - let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None)?; - let src = if src == loc { - let tmp = self.acquire_temp_gpr().ok_or_else(|| { - CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned()) - })?; - temps.push(tmp); - self.assembler - .emit_mov(Size::S64, src, Location::GPR(tmp))?; - Location::GPR(tmp) - } else { - src - }; - let tmp = { - let tmp = self.acquire_temp_gpr().ok_or_else(|| { - CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned()) - })?; - temps.push(tmp); - Location::GPR(tmp) - }; - let label_loop = self.assembler.get_label(); - let label_exit = self.assembler.get_label(); - self.assembler - .emit_mov(Size::S32, Location::GPR(GPR::XzrSp), dest)?; // dest <= 0 - self.assembler.emit_cbz_label(Size::S64, src, label_exit)?; // src == 0, then goto label_exit - self.assembler.emit_label(label_loop)?; + + let src_gpr = + self.location_to_reg(Size::S64, loc, &mut temps, ImmType::None, true, None)?; + let dst_gpr = + self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None)?; + + let mut neon_temps = vec![]; + let neon_temp = self.acquire_temp_simd().ok_or_else(|| { + CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned()) + })?; + neon_temps.push(neon_temp); + self.assembler - .emit_add(Size::S32, dest, Location::Imm8(1), dest)?; // dest += 1 - self.assembler.emit_clz(Size::S64, src, tmp)?; // clz src => tmp - self.assembler.emit_lsl(Size::S64, src, tmp, src)?; // src << tmp => src + .emit_fmov(Size::S64, src_gpr, Size::S64, Location::SIMD(neon_temp))?; + self.assembler.emit_cnt(neon_temp, neon_temp)?; + self.assembler.emit_addv(neon_temp, neon_temp)?; self.assembler - .emit_lsl(Size::S64, src, Location::Imm8(1), src)?; // src << 1 => src - self.assembler.emit_cbnz_label(Size::S64, src, label_loop)?; // src != 0, then goto label_loop - self.assembler.emit_label(label_exit)?; - if ret != dest { - self.move_location(Size::S64, dest, ret)?; + .emit_fmov(Size::S64, Location::SIMD(neon_temp), Size::S64, dst_gpr)?; + + if ret != dst_gpr { + self.move_location(Size::S64, dst_gpr, ret)?; } + for r in temps { self.release_gpr(r); } + + for r in neon_temps { + self.release_simd(r); + } + Ok(()) } fn i64_shl( From 4a437464fbe91b172d208662bd536907ce06493d Mon Sep 17 00:00:00 2001 From: Edoardo Marangoni Date: Wed, 27 Mar 2024 18:27:44 +0100 Subject: [PATCH 2/5] feat(singlepass): add NEON feature for arm64 --- lib/compiler-singlepass/src/compiler.rs | 2 +- lib/compiler-singlepass/src/machine.rs | 6 +- lib/compiler-singlepass/src/machine_arm64.rs | 224 ++++++++++++++----- lib/types/src/compilation/target.rs | 18 +- 4 files changed, 187 insertions(+), 63 deletions(-) diff --git a/lib/compiler-singlepass/src/compiler.rs b/lib/compiler-singlepass/src/compiler.rs index 961712556f5..5e723a3cfc8 100644 --- a/lib/compiler-singlepass/src/compiler.rs +++ b/lib/compiler-singlepass/src/compiler.rs @@ -180,7 +180,7 @@ impl Compiler for SinglepassCompiler { generator.finalize(input) } Architecture::Aarch64(_) => { - let machine = MachineARM64::new(); + let machine = MachineARM64::new(Some(target.clone())); let mut generator = FuncGen::new( module, &self.config, diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index d4602ae77fe..e3ca24ba302 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -2411,7 +2411,7 @@ pub fn gen_std_trampoline( machine.gen_std_trampoline(sig, calling_convention) } Architecture::Aarch64(_) => { - let machine = MachineARM64::new(); + let machine = MachineARM64::new(Some(target.clone())); machine.gen_std_trampoline(sig, calling_convention) } _ => Err(CompileError::UnsupportedTarget( @@ -2433,7 +2433,7 @@ pub fn gen_std_dynamic_import_trampoline( machine.gen_std_dynamic_import_trampoline(vmoffsets, sig, calling_convention) } Architecture::Aarch64(_) => { - let machine = MachineARM64::new(); + let machine = MachineARM64::new(Some(target.clone())); machine.gen_std_dynamic_import_trampoline(vmoffsets, sig, calling_convention) } _ => Err(CompileError::UnsupportedTarget( @@ -2455,7 +2455,7 @@ pub fn gen_import_call_trampoline( machine.gen_import_call_trampoline(vmoffsets, index, sig, calling_convention) } Architecture::Aarch64(_) => { - let machine = MachineARM64::new(); + let machine = MachineARM64::new(Some(target.clone())); machine.gen_import_call_trampoline(vmoffsets, index, sig, calling_convention) } _ => Err(CompileError::UnsupportedTarget( diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 3a3d14aa002..68a9d1552a6 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -4,9 +4,9 @@ use gimli::{write::CallFrameInstruction, AArch64}; use wasmer_compiler::wasmparser::ValType as WpType; use wasmer_types::{ - CallingConvention, CompileError, CustomSection, FunctionBody, FunctionIndex, FunctionType, - InstructionAddressMap, Relocation, RelocationKind, RelocationTarget, SourceLoc, TrapCode, - TrapInformation, VMOffsets, + CallingConvention, CompileError, CpuFeature, CustomSection, FunctionBody, FunctionIndex, + FunctionType, InstructionAddressMap, Relocation, RelocationKind, RelocationTarget, SourceLoc, + Target, TrapCode, TrapInformation, VMOffsets, }; use crate::arm64_decl::new_machine_state; @@ -114,6 +114,8 @@ pub struct MachineARM64 { pushed: bool, /// Vector of unwind operations with offset unwind_ops: Vec<(usize, UnwindOps)>, + /// The actual compilation target. + target: Option, } #[allow(dead_code)] @@ -138,7 +140,7 @@ enum ImmType { #[allow(dead_code)] impl MachineARM64 { - pub fn new() -> Self { + pub fn new(target: Option) -> Self { MachineARM64 { assembler: Assembler::new(0), used_gprs: 0, @@ -148,6 +150,7 @@ impl MachineARM64 { src_loc: 0, pushed: false, unwind_ops: vec![], + target, } } fn compatible_imm(&self, imm: i64, ty: ImmType) -> bool { @@ -3072,38 +3075,90 @@ impl Machine for MachineARM64 { Ok(()) } fn i32_popcnt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> { - let mut temps = vec![]; - - let src_gpr = - self.location_to_reg(Size::S32, loc, &mut temps, ImmType::None, true, None)?; - let dst_gpr = - self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None)?; - - let mut neon_temps = vec![]; - let neon_temp = self.acquire_temp_simd().ok_or_else(|| { - CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned()) - })?; - neon_temps.push(neon_temp); + if self.target.is_some() + && self + .target + .as_ref() + .unwrap() + .cpu_features() + .contains(CpuFeature::NEON) + { + let mut temps = vec![]; + + let src_gpr = + self.location_to_reg(Size::S32, loc, &mut temps, ImmType::None, true, None)?; + let dst_gpr = + self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None)?; + + let mut neon_temps = vec![]; + let neon_temp = self.acquire_temp_simd().ok_or_else(|| { + CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned()) + })?; + neon_temps.push(neon_temp); - self.assembler - .emit_fmov(Size::S32, src_gpr, Size::S32, Location::SIMD(neon_temp))?; - self.assembler.emit_cnt(neon_temp, neon_temp)?; - self.assembler.emit_addv(neon_temp, neon_temp)?; - self.assembler - .emit_fmov(Size::S32, Location::SIMD(neon_temp), Size::S32, dst_gpr)?; + self.assembler + .emit_fmov(Size::S32, src_gpr, Size::S32, Location::SIMD(neon_temp))?; + self.assembler.emit_cnt(neon_temp, neon_temp)?; + self.assembler.emit_addv(neon_temp, neon_temp)?; + self.assembler + .emit_fmov(Size::S32, Location::SIMD(neon_temp), Size::S32, dst_gpr)?; - if ret != dst_gpr { - self.move_location(Size::S32, dst_gpr, ret)?; - } + if ret != dst_gpr { + self.move_location(Size::S32, dst_gpr, ret)?; + } - for r in temps { - self.release_gpr(r); - } + for r in temps { + self.release_gpr(r); + } - for r in neon_temps { - self.release_simd(r); + for r in neon_temps { + self.release_simd(r); + } + } else { + let mut temps = vec![]; + let src = + self.location_to_reg(Size::S32, loc, &mut temps, ImmType::None, true, None)?; + let dest = + self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None)?; + let src = if src == loc { + let tmp = self.acquire_temp_gpr().ok_or_else(|| { + CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned()) + })?; + temps.push(tmp); + self.assembler + .emit_mov(Size::S32, src, Location::GPR(tmp))?; + Location::GPR(tmp) + } else { + src + }; + let tmp = { + let tmp = self.acquire_temp_gpr().ok_or_else(|| { + CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned()) + })?; + temps.push(tmp); + Location::GPR(tmp) + }; + let label_loop = self.assembler.get_label(); + let label_exit = self.assembler.get_label(); + self.assembler + .emit_mov(Size::S32, Location::GPR(GPR::XzrSp), dest)?; // 0 => dest + self.assembler.emit_cbz_label(Size::S32, src, label_exit)?; // src==0, exit + self.assembler.emit_label(label_loop)?; // loop: + self.assembler + .emit_add(Size::S32, dest, Location::Imm8(1), dest)?; // dest += 1 + self.assembler.emit_clz(Size::S32, src, tmp)?; // clz src => tmp + self.assembler.emit_lsl(Size::S32, src, tmp, src)?; // src << tmp => src + self.assembler + .emit_lsl(Size::S32, src, Location::Imm8(1), src)?; // src << 1 => src + self.assembler.emit_cbnz_label(Size::S32, src, label_loop)?; // if src!=0 goto loop + self.assembler.emit_label(label_exit)?; + if ret != dest { + self.move_location(Size::S32, dest, ret)?; + } + for r in temps { + self.release_gpr(r); + } } - Ok(()) } fn i32_shl( @@ -5173,36 +5228,89 @@ impl Machine for MachineARM64 { Ok(()) } fn i64_popcnt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> { - let mut temps = vec![]; - - let src_gpr = - self.location_to_reg(Size::S64, loc, &mut temps, ImmType::None, true, None)?; - let dst_gpr = - self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None)?; - - let mut neon_temps = vec![]; - let neon_temp = self.acquire_temp_simd().ok_or_else(|| { - CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned()) - })?; - neon_temps.push(neon_temp); + if self.target.is_some() + && self + .target + .as_ref() + .unwrap() + .cpu_features() + .contains(CpuFeature::NEON) + { + let mut temps = vec![]; + + let src_gpr = + self.location_to_reg(Size::S64, loc, &mut temps, ImmType::None, true, None)?; + let dst_gpr = + self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None)?; + + let mut neon_temps = vec![]; + let neon_temp = self.acquire_temp_simd().ok_or_else(|| { + CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned()) + })?; + neon_temps.push(neon_temp); - self.assembler - .emit_fmov(Size::S64, src_gpr, Size::S64, Location::SIMD(neon_temp))?; - self.assembler.emit_cnt(neon_temp, neon_temp)?; - self.assembler.emit_addv(neon_temp, neon_temp)?; - self.assembler - .emit_fmov(Size::S64, Location::SIMD(neon_temp), Size::S64, dst_gpr)?; + self.assembler + .emit_fmov(Size::S64, src_gpr, Size::S64, Location::SIMD(neon_temp))?; + self.assembler.emit_cnt(neon_temp, neon_temp)?; + self.assembler.emit_addv(neon_temp, neon_temp)?; + self.assembler + .emit_fmov(Size::S64, Location::SIMD(neon_temp), Size::S64, dst_gpr)?; - if ret != dst_gpr { - self.move_location(Size::S64, dst_gpr, ret)?; - } + if ret != dst_gpr { + self.move_location(Size::S64, dst_gpr, ret)?; + } - for r in temps { - self.release_gpr(r); - } + for r in temps { + self.release_gpr(r); + } - for r in neon_temps { - self.release_simd(r); + for r in neon_temps { + self.release_simd(r); + } + } else { + let mut temps = vec![]; + let src = + self.location_to_reg(Size::S64, loc, &mut temps, ImmType::None, true, None)?; + let dest = + self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None)?; + let src = if src == loc { + let tmp = self.acquire_temp_gpr().ok_or_else(|| { + CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned()) + })?; + temps.push(tmp); + self.assembler + .emit_mov(Size::S64, src, Location::GPR(tmp))?; + Location::GPR(tmp) + } else { + src + }; + let tmp = { + let tmp = self.acquire_temp_gpr().ok_or_else(|| { + CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned()) + })?; + temps.push(tmp); + Location::GPR(tmp) + }; + let label_loop = self.assembler.get_label(); + let label_exit = self.assembler.get_label(); + self.assembler + .emit_mov(Size::S32, Location::GPR(GPR::XzrSp), dest)?; // dest <= 0 + self.assembler.emit_cbz_label(Size::S64, src, label_exit)?; // src == 0, then goto label_exit + self.assembler.emit_label(label_loop)?; + self.assembler + .emit_add(Size::S32, dest, Location::Imm8(1), dest)?; // dest += 1 + self.assembler.emit_clz(Size::S64, src, tmp)?; // clz src => tmp + self.assembler.emit_lsl(Size::S64, src, tmp, src)?; // src << tmp => src + self.assembler + .emit_lsl(Size::S64, src, Location::Imm8(1), src)?; // src << 1 => src + self.assembler.emit_cbnz_label(Size::S64, src, label_loop)?; // src != 0, then goto label_loop + self.assembler.emit_label(label_exit)?; + if ret != dest { + self.move_location(Size::S64, dest, ret)?; + } + for r in temps { + self.release_gpr(r); + } } Ok(()) @@ -8761,7 +8869,7 @@ mod test { #[test] fn tests_arm64() -> Result<(), CompileError> { - let mut machine = MachineARM64::new(); + let mut machine = MachineARM64::new(None); test_move_location(&mut machine, Size::S32)?; test_move_location(&mut machine, Size::S64)?; diff --git a/lib/types/src/compilation/target.rs b/lib/types/src/compilation/target.rs index 176e1907cce..0d3d98f1449 100644 --- a/lib/types/src/compilation/target.rs +++ b/lib/types/src/compilation/target.rs @@ -48,6 +48,7 @@ pub enum CpuFeature { AVX512F, LZCNT, // ARM features + NEON, // Risc-V features } @@ -101,7 +102,20 @@ impl CpuFeature { } features } - #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] + + #[cfg(target_arch = "aarch64")] + /// Retrieves the features for the current Host + pub fn for_host() -> EnumSet { + let mut features = EnumSet::new(); + + if std::arch::is_aarch64_feature_detected!("neon") { + features.insert(Self::NEON); + } + + features + } + + #[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))] /// Retrieves the features for the current Host pub fn for_host() -> EnumSet { // We default to an empty hash set @@ -140,6 +154,7 @@ impl FromStr for CpuFeature { "avx512vl" => Ok(Self::AVX512VL), "avx512f" => Ok(Self::AVX512F), "lzcnt" => Ok(Self::LZCNT), + "neon" => Ok(Self::NEON), _ => Err(ParseCpuFeatureError::Missing(s.to_string())), } } @@ -162,6 +177,7 @@ impl ToString for CpuFeature { Self::AVX512VL => "avx512vl", Self::AVX512F => "avx512f", Self::LZCNT => "lzcnt", + Self::NEON => "neon", } .to_string() } From f553d9fe871e4ad091c57f423e5edc9461dc9fac Mon Sep 17 00:00:00 2001 From: Edoardo Marangoni Date: Thu, 28 Mar 2024 11:47:49 +0100 Subject: [PATCH 3/5] fix(singlepass): use `has_neon` flag --- lib/compiler-singlepass/src/machine_arm64.rs | 27 ++++++++------------ 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 68a9d1552a6..885853ac496 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -105,7 +105,6 @@ pub struct MachineARM64 { used_simd: u32, trap_table: TrapTable, /// Map from byte offset into wasm function to range of native instructions. - /// // Ordered by increasing InstructionAddressMap::srcloc. instructions_address_map: Vec, /// The source location for the current operator. @@ -116,6 +115,8 @@ pub struct MachineARM64 { unwind_ops: Vec<(usize, UnwindOps)>, /// The actual compilation target. target: Option, + /// A boolean flag signaling if this machine supports NEON. + has_neon: bool, } #[allow(dead_code)] @@ -141,6 +142,11 @@ enum ImmType { #[allow(dead_code)] impl MachineARM64 { pub fn new(target: Option) -> Self { + let has_neon = match target { + Some(target) => target.cpu_features().contains(CpuFeature::NEON), + None => false, + }; + MachineARM64 { assembler: Assembler::new(0), used_gprs: 0, @@ -151,6 +157,7 @@ impl MachineARM64 { pushed: false, unwind_ops: vec![], target, + has_neon, } } fn compatible_imm(&self, imm: i64, ty: ImmType) -> bool { @@ -3075,14 +3082,7 @@ impl Machine for MachineARM64 { Ok(()) } fn i32_popcnt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> { - if self.target.is_some() - && self - .target - .as_ref() - .unwrap() - .cpu_features() - .contains(CpuFeature::NEON) - { + if self.has_neon { let mut temps = vec![]; let src_gpr = @@ -5228,14 +5228,7 @@ impl Machine for MachineARM64 { Ok(()) } fn i64_popcnt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> { - if self.target.is_some() - && self - .target - .as_ref() - .unwrap() - .cpu_features() - .contains(CpuFeature::NEON) - { + if self.has_neon { let mut temps = vec![]; let src_gpr = From 733cf1fba398a4260f89d0fc57ee789f160e1f5d Mon Sep 17 00:00:00 2001 From: Edoardo Marangoni Date: Thu, 28 Mar 2024 11:54:52 +0100 Subject: [PATCH 4/5] fix(singlepass): use `ref` to match on target --- lib/compiler-singlepass/src/machine_arm64.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 885853ac496..3cb8bce7dc8 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -143,7 +143,7 @@ enum ImmType { impl MachineARM64 { pub fn new(target: Option) -> Self { let has_neon = match target { - Some(target) => target.cpu_features().contains(CpuFeature::NEON), + Some(ref target) => target.cpu_features().contains(CpuFeature::NEON), None => false, }; From 338b15d7732111d710fc76385c741da322d84f4d Mon Sep 17 00:00:00 2001 From: Edoardo Marangoni Date: Thu, 28 Mar 2024 12:20:09 +0100 Subject: [PATCH 5/5] fix(singlepass): lint --- lib/compiler-singlepass/src/machine_arm64.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 3cb8bce7dc8..2518dec0a0a 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -113,8 +113,6 @@ pub struct MachineARM64 { pushed: bool, /// Vector of unwind operations with offset unwind_ops: Vec<(usize, UnwindOps)>, - /// The actual compilation target. - target: Option, /// A boolean flag signaling if this machine supports NEON. has_neon: bool, } @@ -142,6 +140,9 @@ enum ImmType { #[allow(dead_code)] impl MachineARM64 { pub fn new(target: Option) -> Self { + // If and when needed, checks for other supported features should be + // added as boolean fields in the struct to make checking if such + // features are available as cheap as possible. let has_neon = match target { Some(ref target) => target.cpu_features().contains(CpuFeature::NEON), None => false, @@ -156,7 +157,6 @@ impl MachineARM64 { src_loc: 0, pushed: false, unwind_ops: vec![], - target, has_neon, } }