diff --git a/CHANGELOG.md b/CHANGELOG.md index cac16c9d02f..d55570a776d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ Looking for changes that affect our C API? See the [C API Changelog](lib/c-api/C - [#2478](https://github.com/wasmerio/wasmer/pull/2478) Rename `wasm_instance_new()`’s “traps” argument to “trap”. ### Fixed +- [#2599](https://github.com/wasmerio/wasmer/pull/2599) Fixed Universal engine for Linux/Aarch64 target. - [#2587](https://github.com/wasmerio/wasmer/pull/2587) Fixed deriving `WasmerEnv` when aliasing `Result`. - [#2518](https://github.com/wasmerio/wasmer/pull/2518) Remove temporary file used to creating an artifact when creating a Dylib engine artifact. - [#2494](https://github.com/wasmerio/wasmer/pull/2494) Fixed `WasmerEnv` access when using `call_indirect` with the Singlepass compiler. diff --git a/lib/compiler-cranelift/src/compiler.rs b/lib/compiler-cranelift/src/compiler.rs index ec31c15df72..c836b9ce106 100644 --- a/lib/compiler-cranelift/src/compiler.rs +++ b/lib/compiler-cranelift/src/compiler.rs @@ -294,6 +294,7 @@ impl Compiler for CraneliftCompiler { function_call_trampolines, dynamic_function_trampolines, dwarf, + None, )) } } diff --git a/lib/compiler-llvm/src/compiler.rs b/lib/compiler-llvm/src/compiler.rs index d98ccd8a8c3..f13d47cbb54 100644 --- a/lib/compiler-llvm/src/compiler.rs +++ b/lib/compiler-llvm/src/compiler.rs @@ -12,9 +12,10 @@ use rayon::iter::ParallelBridge; use rayon::prelude::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; use std::sync::Arc; use wasmer_compiler::{ - Compilation, CompileError, CompileModuleInfo, Compiler, CustomSection, CustomSectionProtection, - Dwarf, FunctionBodyData, ModuleMiddleware, ModuleTranslationState, RelocationTarget, - SectionBody, SectionIndex, Symbol, SymbolRegistry, Target, + Architecture, Compilation, CompileError, CompileModuleInfo, Compiler, CustomSection, + CustomSectionProtection, Dwarf, FunctionBodyData, ModuleMiddleware, ModuleTranslationState, + RelocationTarget, SectionBody, SectionIndex, Symbol, SymbolRegistry, Target, + TrampolinesSection, }; use wasmer_types::entity::{EntityRef, PrimaryMap}; use wasmer_types::{FunctionIndex, LocalFunctionIndex, SignatureIndex}; @@ -303,6 +304,37 @@ impl Compiler for LLVMCompiler { }) .collect::>(); + let trampolines = match target.triple().architecture { + Architecture::Aarch64(_) => { + let nj = 16; + // We create a jump to an absolute 64bits address + // using x17 as a scratch register, SystemV declare both x16 and x17 as Intra-Procedural scratch register + // but Apple ask to just not use x16 + // LDR x17, #8 51 00 00 58 + // BR x17 20 02 1f d6 + // JMPADDR 00 00 00 00 00 00 00 00 + let onejump = [ + 0x51, 0x00, 0x00, 0x58, 0x20, 0x02, 0x1f, 0xd6, 0, 0, 0, 0, 0, 0, 0, 0, + ]; + let trampolines = Some(TrampolinesSection::new( + SectionIndex::from_u32(module_custom_sections.len() as u32), + nj, + onejump.len(), + )); + let mut alljmps = vec![]; + for _ in 0..nj { + alljmps.extend(onejump.iter().copied()); + } + module_custom_sections.push(CustomSection { + protection: CustomSectionProtection::ReadExecute, + bytes: SectionBody::new_with_vec(alljmps), + relocations: vec![], + }); + trampolines + } + _ => None, + }; + let dwarf = if !frame_section_bytes.is_empty() { let dwarf = Some(Dwarf::new(SectionIndex::from_u32( module_custom_sections.len() as u32, @@ -367,6 +399,7 @@ impl Compiler for LLVMCompiler { function_call_trampolines, dynamic_function_trampolines, dwarf, + trampolines, )) } } diff --git a/lib/compiler-llvm/src/config.rs b/lib/compiler-llvm/src/config.rs index 550d641af3d..c6b65197261 100644 --- a/lib/compiler-llvm/src/config.rs +++ b/lib/compiler-llvm/src/config.rs @@ -102,11 +102,6 @@ impl LLVM { fn target_triple(&self, target: &Target) -> TargetTriple { // Hack: we're using is_pic to determine whether this is a native // build or not. - let binary_format = if self.is_pic { - target.triple().binary_format - } else { - target_lexicon::BinaryFormat::Elf - }; let operating_system = if target.triple().operating_system == wasmer_compiler::OperatingSystem::Darwin && !self.is_pic @@ -117,10 +112,18 @@ impl LLVM { // MachO, they check whether the OS is set to Darwin. // // Since both linux and darwin use SysV ABI, this should work. - wasmer_compiler::OperatingSystem::Linux + // but not in the case of Aarch64, there the ABI is slightly different + match target.triple().architecture { + _ => wasmer_compiler::OperatingSystem::Linux, + } } else { target.triple().operating_system }; + let binary_format = if self.is_pic { + target.triple().binary_format + } else { + target_lexicon::BinaryFormat::Elf + }; let triple = Triple { architecture: target.triple().architecture, vendor: target.triple().vendor.clone(), diff --git a/lib/compiler-llvm/src/object_file.rs b/lib/compiler-llvm/src/object_file.rs index 3c3a99fb9f2..87b21866448 100644 --- a/lib/compiler-llvm/src/object_file.rs +++ b/lib/compiler-llvm/src/object_file.rs @@ -168,6 +168,19 @@ where (object::RelocationKind::Elf(object::elf::R_X86_64_PC64), 0) => { RelocationKind::X86PCRel8 } + (object::RelocationKind::Elf(object::elf::R_AARCH64_MOVW_UABS_G0_NC), 0) => { + RelocationKind::Arm64Movw0 + } + (object::RelocationKind::Elf(object::elf::R_AARCH64_MOVW_UABS_G1_NC), 0) => { + RelocationKind::Arm64Movw1 + } + (object::RelocationKind::Elf(object::elf::R_AARCH64_MOVW_UABS_G2_NC), 0) => { + RelocationKind::Arm64Movw2 + } + (object::RelocationKind::Elf(object::elf::R_AARCH64_MOVW_UABS_G3), 0) => { + RelocationKind::Arm64Movw3 + } + (object::RelocationKind::PltRelative, 26) => RelocationKind::Arm64Call, _ => { return Err(CompileError::Codegen(format!( "unknown relocation {:?}", diff --git a/lib/compiler-singlepass/src/compiler.rs b/lib/compiler-singlepass/src/compiler.rs index f47ac7019c2..37feb7c8e63 100644 --- a/lib/compiler-singlepass/src/compiler.rs +++ b/lib/compiler-singlepass/src/compiler.rs @@ -167,6 +167,7 @@ impl Compiler for SinglepassCompiler { function_call_trampolines, dynamic_function_trampolines, None, + None, )) } } diff --git a/lib/compiler/src/function.rs b/lib/compiler/src/function.rs index c649ae0e3dd..8eeafb1e1b6 100644 --- a/lib/compiler/src/function.rs +++ b/lib/compiler/src/function.rs @@ -109,6 +109,33 @@ impl Dwarf { } } +/// Trampolines section used by ARM short jump (26bits) +#[cfg_attr(feature = "enable-serde", derive(Deserialize, Serialize))] +#[cfg_attr( + feature = "enable-rkyv", + derive(RkyvSerialize, RkyvDeserialize, Archive) +)] +#[derive(Debug, PartialEq, Eq, Clone, MemoryUsage)] +pub struct TrampolinesSection { + /// SectionIndex for the actual Trampolines code + pub section_index: SectionIndex, + /// Number of jump slots in the section + pub slots: usize, + /// Slot size + pub size: usize, +} + +impl TrampolinesSection { + /// Creates a `Trampolines` struct with the indice for its section, and number of slots and size of slot + pub fn new(section_index: SectionIndex, slots: usize, size: usize) -> Self { + Self { + section_index, + slots, + size, + } + } +} + /// The result of compiling a WebAssembly module's functions. #[cfg_attr(feature = "enable-serde", derive(Deserialize, Serialize))] #[derive(Debug, PartialEq, Eq)] @@ -155,6 +182,9 @@ pub struct Compilation { /// Section ids corresponding to the Dwarf debug info debug: Option, + + /// Trampolines for the arch that needs it + trampolines: Option, } impl Compilation { @@ -165,6 +195,7 @@ impl Compilation { function_call_trampolines: PrimaryMap, dynamic_function_trampolines: PrimaryMap, debug: Option, + trampolines: Option, ) -> Self { Self { functions, @@ -172,6 +203,7 @@ impl Compilation { function_call_trampolines, dynamic_function_trampolines, debug, + trampolines, } } @@ -249,6 +281,11 @@ impl Compilation { pub fn get_debug(&self) -> Option { self.debug.clone() } + + /// Returns the Trampilines info. + pub fn get_trampolines(&self) -> Option { + self.trampolines.clone() + } } impl<'a> IntoIterator for &'a Compilation { diff --git a/lib/compiler/src/lib.rs b/lib/compiler/src/lib.rs index 04aadd09be4..27669f27c24 100644 --- a/lib/compiler/src/lib.rs +++ b/lib/compiler/src/lib.rs @@ -74,7 +74,7 @@ pub use crate::error::{ }; pub use crate::function::{ Compilation, CompiledFunction, CompiledFunctionFrameInfo, CustomSections, Dwarf, FunctionBody, - Functions, + Functions, TrampolinesSection, }; pub use crate::jump_table::{JumpTable, JumpTableOffsets}; pub use crate::module::CompileModuleInfo; diff --git a/lib/compiler/src/relocation.rs b/lib/compiler/src/relocation.rs index c591809df3b..8e73d8c34a2 100644 --- a/lib/compiler/src/relocation.rs +++ b/lib/compiler/src/relocation.rs @@ -50,6 +50,14 @@ pub enum RelocationKind { Arm32Call, /// Arm64 call target Arm64Call, + /// Arm64 movk/z part 0 + Arm64Movw0, + /// Arm64 movk/z part 1 + Arm64Movw1, + /// Arm64 movk/z part 2 + Arm64Movw2, + /// Arm64 movk/z part 3 + Arm64Movw3, // /// RISC-V call target // RiscvCall, /// Elf x86_64 32 bit signed PC relative offset to two GOT entries for GD symbol. @@ -72,6 +80,10 @@ impl fmt::Display for RelocationKind { Self::X86CallPLTRel4 => write!(f, "CallPLTRel4"), Self::X86GOTPCRel4 => write!(f, "GOTPCRel4"), Self::Arm32Call | Self::Arm64Call => write!(f, "Call"), + Self::Arm64Movw0 => write!(f, "Arm64MovwG0"), + Self::Arm64Movw1 => write!(f, "Arm64MovwG1"), + Self::Arm64Movw2 => write!(f, "Arm64MovwG2"), + Self::Arm64Movw3 => write!(f, "Arm64MovwG3"), Self::ElfX86_64TlsGd => write!(f, "ElfX86_64TlsGd"), // Self::MachOX86_64Tlv => write!(f, "MachOX86_64Tlv"), } @@ -121,7 +133,11 @@ impl Relocation { /// The function returns the relocation address and the delta. pub fn for_address(&self, start: usize, target_func_address: u64) -> (usize, u64) { match self.kind { - RelocationKind::Abs8 => { + RelocationKind::Abs8 + | RelocationKind::Arm64Movw0 + | RelocationKind::Arm64Movw1 + | RelocationKind::Arm64Movw2 + | RelocationKind::Arm64Movw3 => { let reloc_address = start + self.offset as usize; let reloc_addend = self.addend as isize; let reloc_abs = target_func_address @@ -155,6 +171,14 @@ impl Relocation { .wrapping_add(reloc_addend as u32); (reloc_address, reloc_delta_u32 as u64) } + RelocationKind::Arm64Call => { + let reloc_address = start + self.offset as usize; + let reloc_addend = self.addend as isize; + let reloc_delta_u32 = target_func_address + .wrapping_sub(reloc_address as u64) + .wrapping_add(reloc_addend as u64); + (reloc_address, reloc_delta_u32) + } // RelocationKind::X86PCRelRodata4 => { // (start, target_func_address) // } diff --git a/lib/engine-universal/src/artifact.rs b/lib/engine-universal/src/artifact.rs index de6e6cc24fb..6e0d4bac908 100644 --- a/lib/engine-universal/src/artifact.rs +++ b/lib/engine-universal/src/artifact.rs @@ -122,6 +122,7 @@ impl UniversalArtifact { custom_sections: compilation.get_custom_sections(), custom_section_relocations: compilation.get_custom_section_relocations(), debug: compilation.get_debug(), + trampolines: compilation.get_trampolines(), }; let serializable = SerializableModule { compilation: serializable_compilation, @@ -194,6 +195,7 @@ impl UniversalArtifact { serializable.compilation.function_relocations.clone(), &custom_sections, &serializable.compilation.custom_section_relocations, + &serializable.compilation.trampolines, ); // Compute indices into the shared signature table. @@ -221,6 +223,7 @@ impl UniversalArtifact { } None => None, }; + // Make all code compiled thus far executable. inner_engine.publish_compiled_code(); diff --git a/lib/engine-universal/src/link.rs b/lib/engine-universal/src/link.rs index e900fc78e5a..cb7b7245194 100644 --- a/lib/engine-universal/src/link.rs +++ b/lib/engine-universal/src/link.rs @@ -1,21 +1,82 @@ //! Linking for Universal-compiled code. -use std::ptr::write_unaligned; +use std::collections::HashMap; +use std::ptr::{read_unaligned, write_unaligned}; use wasmer_compiler::{ JumpTable, JumpTableOffsets, Relocation, RelocationKind, RelocationTarget, Relocations, - SectionIndex, + SectionIndex, TrampolinesSection, }; use wasmer_engine::FunctionExtent; use wasmer_types::entity::{EntityRef, PrimaryMap}; use wasmer_types::{LocalFunctionIndex, ModuleInfo}; use wasmer_vm::SectionBodyPtr; +/// Add a new trampoline address, given the base adress of the Section. Return the address of the jump +/// The trampoline itself still have to be writen +fn trampolines_add( + map: &mut HashMap, + trampoline: &TrampolinesSection, + address: usize, + baseaddress: usize, +) -> usize { + if let Some(target) = map.get(&address) { + return *target; + } + let ret = map.len(); + if ret == trampoline.slots { + panic!("No more slot in Trampolines"); + } + map.insert(address, baseaddress + ret * trampoline.size); + baseaddress + ret * trampoline.size +} + +fn use_trampoline( + address: usize, + allocated_sections: &PrimaryMap, + trampolines: &Option, + map: &mut HashMap, +) -> Option { + match trampolines { + Some(trampolines) => Some(trampolines_add( + map, + trampolines, + address, + *allocated_sections[trampolines.section_index] as usize, + )), + _ => None, + } +} + +fn fill_trampolin_map( + allocated_sections: &PrimaryMap, + trampolines: &Option, +) -> HashMap { + let mut map: HashMap = HashMap::new(); + match trampolines { + Some(trampolines) => { + let baseaddress = *allocated_sections[trampolines.section_index] as usize; + for i in 0..trampolines.size { + let jmpslot: usize = unsafe { + read_unaligned((baseaddress + i * trampolines.size + 8) as *mut usize) + }; + if jmpslot != 0 { + map.insert(jmpslot, baseaddress + i * trampolines.size); + } + } + } + _ => {} + }; + map +} + fn apply_relocation( body: usize, r: &Relocation, allocated_functions: &PrimaryMap, jt_offsets: &PrimaryMap, allocated_sections: &PrimaryMap, + trampolines: &Option, + trampolines_map: &mut HashMap, ) { let target_func_address: usize = match r.reloc_target { RelocationTarget::LocalFunc(index) => *allocated_functions[index].ptr as usize, @@ -53,6 +114,56 @@ fn apply_relocation( write_unaligned(reloc_address as *mut u32, reloc_delta as _); }, RelocationKind::X86PCRelRodata4 => {} + RelocationKind::Arm64Call => unsafe { + let (reloc_address, mut reloc_delta) = r.for_address(body, target_func_address as u64); + if (reloc_delta as i64).abs() >= 0x1000_0000 { + let new_address = match use_trampoline( + target_func_address, + allocated_sections, + trampolines, + trampolines_map, + ) { + Some(new_address) => new_address, + _ => panic!( + "Relocation to big for {:?} for {:?} with {:x}, current val {:x}", + r.kind, + r.reloc_target, + reloc_delta, + read_unaligned(reloc_address as *mut u32) + ), + }; + write_unaligned((new_address + 8) as *mut u64, target_func_address as u64); // write the jump address + let (_, new_delta) = r.for_address(body, new_address as u64); + reloc_delta = new_delta; + } + let reloc_delta = (((reloc_delta / 4) as u32) & 0x3ff_ffff) + | read_unaligned(reloc_address as *mut u32); + write_unaligned(reloc_address as *mut u32, reloc_delta); + }, + RelocationKind::Arm64Movw0 => unsafe { + let (reloc_address, reloc_delta) = r.for_address(body, target_func_address as u64); + let reloc_delta = + (((reloc_delta & 0xffff) as u32) << 5) | read_unaligned(reloc_address as *mut u32); + write_unaligned(reloc_address as *mut u32, reloc_delta); + }, + RelocationKind::Arm64Movw1 => unsafe { + let (reloc_address, reloc_delta) = r.for_address(body, target_func_address as u64); + let reloc_delta = ((((reloc_delta >> 16) & 0xffff) as u32) << 5) + | read_unaligned(reloc_address as *mut u32); + write_unaligned(reloc_address as *mut u32, reloc_delta); + }, + RelocationKind::Arm64Movw2 => unsafe { + let (reloc_address, reloc_delta) = r.for_address(body, target_func_address as u64); + let reloc_delta = ((((reloc_delta >> 32) & 0xffff) as u32) << 5) + | read_unaligned(reloc_address as *mut u32); + write_unaligned(reloc_address as *mut u32, reloc_delta); + }, + RelocationKind::Arm64Movw3 => unsafe { + let (reloc_address, reloc_delta) = r.for_address(body, target_func_address as u64); + let reloc_delta = ((((reloc_delta >> 48) & 0xffff) as u32) << 5) + | read_unaligned(reloc_address as *mut u32); + write_unaligned(reloc_address as *mut u32, reloc_delta); + }, kind => panic!( "Relocation kind unsupported in the current architecture {}", kind @@ -69,17 +180,35 @@ pub fn link_module( function_relocations: Relocations, allocated_sections: &PrimaryMap, section_relocations: &PrimaryMap>, + trampolines: &Option, ) { + let mut trampolines_map = fill_trampolin_map(allocated_sections, trampolines); for (i, section_relocs) in section_relocations.iter() { let body = *allocated_sections[i] as usize; for r in section_relocs { - apply_relocation(body, r, allocated_functions, jt_offsets, allocated_sections); + apply_relocation( + body, + r, + allocated_functions, + jt_offsets, + allocated_sections, + trampolines, + &mut trampolines_map, + ); } } for (i, function_relocs) in function_relocations.iter() { let body = *allocated_functions[i].ptr as usize; for r in function_relocs { - apply_relocation(body, r, allocated_functions, jt_offsets, allocated_sections); + apply_relocation( + body, + r, + allocated_functions, + jt_offsets, + allocated_sections, + trampolines, + &mut trampolines_map, + ); } } } diff --git a/lib/engine-universal/src/serialize.rs b/lib/engine-universal/src/serialize.rs index cc09de9110e..6e3c768d452 100644 --- a/lib/engine-universal/src/serialize.rs +++ b/lib/engine-universal/src/serialize.rs @@ -8,7 +8,7 @@ use rkyv::{ }; use wasmer_compiler::{ CompileModuleInfo, CompiledFunctionFrameInfo, CustomSection, Dwarf, FunctionBody, - JumpTableOffsets, Relocation, SectionIndex, + JumpTableOffsets, Relocation, SectionIndex, TrampolinesSection, }; use wasmer_engine::{DeserializeError, SerializeError}; use wasmer_types::entity::PrimaryMap; @@ -27,6 +27,8 @@ pub struct SerializableCompilation { pub custom_section_relocations: PrimaryMap>, // The section indices corresponding to the Dwarf debug info pub debug: Option, + // the Trampoline for Arm arch + pub trampolines: Option, } /// Serializable struct that is able to serialize from and to