From 12135eb0621d8650d1490a99e00b3d26cbb639ad Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Tue, 28 Sep 2021 10:01:16 +0200 Subject: [PATCH 01/10] fix(compiler) macOS Aarch64 ABI is not SystemV --- lib/compiler-llvm/src/config.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/compiler-llvm/src/config.rs b/lib/compiler-llvm/src/config.rs index 550d641af3d..2f13b91e774 100644 --- a/lib/compiler-llvm/src/config.rs +++ b/lib/compiler-llvm/src/config.rs @@ -117,7 +117,11 @@ impl LLVM { // MachO, they check whether the OS is set to Darwin. // // Since both linux and darwin use SysV ABI, this should work. - wasmer_compiler::OperatingSystem::Linux + // but not in the case of Aarch64, there the ABI is slightly different + match target.triple().architecture { + Architecture::Aarch64(_) => target.triple().operating_system, + _ => wasmer_compiler::OperatingSystem::Linux, + } } else { target.triple().operating_system }; From 21660e6d051bbabe763481602d413e8b78abb808 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Tue, 28 Sep 2021 10:03:25 +0200 Subject: [PATCH 02/10] feat(compiler) Added preliminary support for Arm64Call relocation --- lib/compiler-llvm/src/object_file.rs | 1 + lib/compiler/src/relocation.rs | 8 ++++++++ lib/engine-universal/src/link.rs | 7 ++++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/compiler-llvm/src/object_file.rs b/lib/compiler-llvm/src/object_file.rs index 3c3a99fb9f2..678c1ac442d 100644 --- a/lib/compiler-llvm/src/object_file.rs +++ b/lib/compiler-llvm/src/object_file.rs @@ -168,6 +168,7 @@ where (object::RelocationKind::Elf(object::elf::R_X86_64_PC64), 0) => { RelocationKind::X86PCRel8 } + (object::RelocationKind::PltRelative, 26) => RelocationKind::Arm64Call, _ => { return Err(CompileError::Codegen(format!( "unknown relocation {:?}", diff --git a/lib/compiler/src/relocation.rs b/lib/compiler/src/relocation.rs index c591809df3b..c5ceddc7699 100644 --- a/lib/compiler/src/relocation.rs +++ b/lib/compiler/src/relocation.rs @@ -155,6 +155,14 @@ impl Relocation { .wrapping_add(reloc_addend as u32); (reloc_address, reloc_delta_u32 as u64) } + RelocationKind::Arm64Call => { + let reloc_address = start + self.offset as usize; + let reloc_addend = self.addend as isize; + let reloc_delta_u32 = (target_func_address as u32) + .wrapping_sub(reloc_address as u32) + .wrapping_add(reloc_addend as u32); + (reloc_address, reloc_delta_u32 as u64) + } // RelocationKind::X86PCRelRodata4 => { // (start, target_func_address) // } diff --git a/lib/engine-universal/src/link.rs b/lib/engine-universal/src/link.rs index e900fc78e5a..cc8f4967d99 100644 --- a/lib/engine-universal/src/link.rs +++ b/lib/engine-universal/src/link.rs @@ -1,6 +1,6 @@ //! Linking for Universal-compiled code. -use std::ptr::write_unaligned; +use std::ptr::{write_unaligned, read_unaligned}; use wasmer_compiler::{ JumpTable, JumpTableOffsets, Relocation, RelocationKind, RelocationTarget, Relocations, SectionIndex, @@ -53,6 +53,11 @@ fn apply_relocation( write_unaligned(reloc_address as *mut u32, reloc_delta as _); }, RelocationKind::X86PCRelRodata4 => {} + RelocationKind::Arm64Call => unsafe { + let (reloc_address, reloc_delta) = r.for_address(body, target_func_address as u64); + let reloc_delta = reloc_delta as u32 | read_unaligned(reloc_address as *mut u32); + write_unaligned(reloc_address as *mut u32, reloc_delta); + }, kind => panic!( "Relocation kind unsupported in the current architecture {}", kind From 4f3b0a95a3800d5396cf3d31268898631924a9bd Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Tue, 28 Sep 2021 11:25:21 +0200 Subject: [PATCH 03/10] feat(compiler) - Fixed Arm64Call relocation --- lib/compiler/src/relocation.rs | 8 ++++---- lib/engine-universal/src/link.rs | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/compiler/src/relocation.rs b/lib/compiler/src/relocation.rs index c5ceddc7699..7c3382719b3 100644 --- a/lib/compiler/src/relocation.rs +++ b/lib/compiler/src/relocation.rs @@ -158,10 +158,10 @@ impl Relocation { RelocationKind::Arm64Call => { let reloc_address = start + self.offset as usize; let reloc_addend = self.addend as isize; - let reloc_delta_u32 = (target_func_address as u32) - .wrapping_sub(reloc_address as u32) - .wrapping_add(reloc_addend as u32); - (reloc_address, reloc_delta_u32 as u64) + let reloc_delta_u32 = target_func_address + .wrapping_sub(reloc_address as u64) + .wrapping_add(reloc_addend as u64); + (reloc_address, reloc_delta_u32) } // RelocationKind::X86PCRelRodata4 => { // (start, target_func_address) diff --git a/lib/engine-universal/src/link.rs b/lib/engine-universal/src/link.rs index cc8f4967d99..17db8bddce1 100644 --- a/lib/engine-universal/src/link.rs +++ b/lib/engine-universal/src/link.rs @@ -1,6 +1,6 @@ //! Linking for Universal-compiled code. -use std::ptr::{write_unaligned, read_unaligned}; +use std::ptr::{read_unaligned, write_unaligned}; use wasmer_compiler::{ JumpTable, JumpTableOffsets, Relocation, RelocationKind, RelocationTarget, Relocations, SectionIndex, @@ -55,7 +55,7 @@ fn apply_relocation( RelocationKind::X86PCRelRodata4 => {} RelocationKind::Arm64Call => unsafe { let (reloc_address, reloc_delta) = r.for_address(body, target_func_address as u64); - let reloc_delta = reloc_delta as u32 | read_unaligned(reloc_address as *mut u32); + let reloc_delta = (reloc_delta / 4) as u32 | read_unaligned(reloc_address as *mut u32); write_unaligned(reloc_address as *mut u32, reloc_delta); }, kind => panic!( From 9b6a9ad16f0f0bc7dc11cf777b839ca7506da869 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Tue, 28 Sep 2021 13:53:42 +0200 Subject: [PATCH 04/10] feat(compiler) Fix Arm64Call relocation with negative offset --- lib/engine-universal/src/link.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/engine-universal/src/link.rs b/lib/engine-universal/src/link.rs index 17db8bddce1..35fe319073a 100644 --- a/lib/engine-universal/src/link.rs +++ b/lib/engine-universal/src/link.rs @@ -55,7 +55,8 @@ fn apply_relocation( RelocationKind::X86PCRelRodata4 => {} RelocationKind::Arm64Call => unsafe { let (reloc_address, reloc_delta) = r.for_address(body, target_func_address as u64); - let reloc_delta = (reloc_delta / 4) as u32 | read_unaligned(reloc_address as *mut u32); + let reloc_delta = (((reloc_delta / 4) as u32) & 0x3ff_ffff) + | read_unaligned(reloc_address as *mut u32); write_unaligned(reloc_address as *mut u32, reloc_delta); }, kind => panic!( From 20f0c66fe6349b04389f614038639be6edcc900e Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Tue, 5 Oct 2021 14:08:55 +0200 Subject: [PATCH 05/10] feat(compiler) Added Trampolines and more Relocations for Arm64 (llvm-universal on linux-aarch64 are OK now) --- lib/compiler-cranelift/src/compiler.rs | 1 + lib/compiler-llvm/src/compiler.rs | 37 ++++++++- lib/compiler-llvm/src/object_file.rs | 12 +++ .../src/translator/intrinsics.rs | 10 +++ lib/compiler/src/function.rs | 65 ++++++++++++++++ lib/compiler/src/lib.rs | 2 +- lib/compiler/src/relocation.rs | 18 ++++- lib/engine-universal/src/artifact.rs | 21 +++++ lib/engine-universal/src/engine.rs | 13 ++++ lib/engine-universal/src/link.rs | 78 ++++++++++++++++++- lib/engine-universal/src/serialize.rs | 4 +- 11 files changed, 251 insertions(+), 10 deletions(-) diff --git a/lib/compiler-cranelift/src/compiler.rs b/lib/compiler-cranelift/src/compiler.rs index ec31c15df72..c836b9ce106 100644 --- a/lib/compiler-cranelift/src/compiler.rs +++ b/lib/compiler-cranelift/src/compiler.rs @@ -294,6 +294,7 @@ impl Compiler for CraneliftCompiler { function_call_trampolines, dynamic_function_trampolines, dwarf, + None, )) } } diff --git a/lib/compiler-llvm/src/compiler.rs b/lib/compiler-llvm/src/compiler.rs index d98ccd8a8c3..4ddb4abc763 100644 --- a/lib/compiler-llvm/src/compiler.rs +++ b/lib/compiler-llvm/src/compiler.rs @@ -12,9 +12,9 @@ use rayon::iter::ParallelBridge; use rayon::prelude::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; use std::sync::Arc; use wasmer_compiler::{ - Compilation, CompileError, CompileModuleInfo, Compiler, CustomSection, CustomSectionProtection, - Dwarf, FunctionBodyData, ModuleMiddleware, ModuleTranslationState, RelocationTarget, - SectionBody, SectionIndex, Symbol, SymbolRegistry, Target, + Architecture, Compilation, CompileError, CompileModuleInfo, Compiler, CustomSection, + CustomSectionProtection, Dwarf, FunctionBodyData, ModuleMiddleware, ModuleTranslationState, + RelocationTarget, SectionBody, SectionIndex, Symbol, SymbolRegistry, Target, Trampolines, }; use wasmer_types::entity::{EntityRef, PrimaryMap}; use wasmer_types::{FunctionIndex, LocalFunctionIndex, SignatureIndex}; @@ -303,6 +303,36 @@ impl Compiler for LLVMCompiler { }) .collect::>(); + let trampolines = match target.triple().architecture { + Architecture::Aarch64(_) => { + let nj = 16; + let trampolines = Some(Trampolines::new( + SectionIndex::from_u32(module_custom_sections.len() as u32), + nj, + 16, + )); + // We create a jump to an absolute 64bits address + // using x16 as a scratch register, SystemV declare both x16 and x17 as Intra-Procedural scratch register + // LDR x16, #8 50 00 00 58 + // BR x16 00 02 1f d6 + // JMPADDR 00 00 00 00 00 00 00 00 + let onejump = vec![ + 0x50, 0x00, 0x00, 0x58, 0x00, 0x02, 0x1f, 0xd6, 0, 0, 0, 0, 0, 0, 0, 0, + ]; + let mut alljmps = Vec::::new(); + for _ in 0..nj { + alljmps.extend(onejump.iter().copied()); + } + module_custom_sections.push(CustomSection { + protection: CustomSectionProtection::ReadExecute, + bytes: SectionBody::new_with_vec(alljmps), + relocations: vec![], + }); + trampolines + } + _ => None, + }; + let dwarf = if !frame_section_bytes.is_empty() { let dwarf = Some(Dwarf::new(SectionIndex::from_u32( module_custom_sections.len() as u32, @@ -367,6 +397,7 @@ impl Compiler for LLVMCompiler { function_call_trampolines, dynamic_function_trampolines, dwarf, + trampolines, )) } } diff --git a/lib/compiler-llvm/src/object_file.rs b/lib/compiler-llvm/src/object_file.rs index 678c1ac442d..87b21866448 100644 --- a/lib/compiler-llvm/src/object_file.rs +++ b/lib/compiler-llvm/src/object_file.rs @@ -168,6 +168,18 @@ where (object::RelocationKind::Elf(object::elf::R_X86_64_PC64), 0) => { RelocationKind::X86PCRel8 } + (object::RelocationKind::Elf(object::elf::R_AARCH64_MOVW_UABS_G0_NC), 0) => { + RelocationKind::Arm64Movw0 + } + (object::RelocationKind::Elf(object::elf::R_AARCH64_MOVW_UABS_G1_NC), 0) => { + RelocationKind::Arm64Movw1 + } + (object::RelocationKind::Elf(object::elf::R_AARCH64_MOVW_UABS_G2_NC), 0) => { + RelocationKind::Arm64Movw2 + } + (object::RelocationKind::Elf(object::elf::R_AARCH64_MOVW_UABS_G3), 0) => { + RelocationKind::Arm64Movw3 + } (object::RelocationKind::PltRelative, 26) => RelocationKind::Arm64Call, _ => { return Err(CompileError::Codegen(format!( diff --git a/lib/compiler-llvm/src/translator/intrinsics.rs b/lib/compiler-llvm/src/translator/intrinsics.rs index 8f92f4042f7..fe141532b6d 100644 --- a/lib/compiler-llvm/src/translator/intrinsics.rs +++ b/lib/compiler-llvm/src/translator/intrinsics.rs @@ -455,6 +455,16 @@ impl<'ctx> Intrinsics<'ctx> { false, ); + let _linkage = if module + .get_triple() + .as_str() + .to_string_lossy() + .starts_with("aarch64") + { + Some(Linkage::External) + } else { + None + }; let intrinsics = Self { ctlz_i32: module.add_function("llvm.ctlz.i32", ret_i32_take_i32_i1, None), ctlz_i64: module.add_function("llvm.ctlz.i64", ret_i64_take_i64_i1, None), diff --git a/lib/compiler/src/function.rs b/lib/compiler/src/function.rs index c649ae0e3dd..4e74fc6ae22 100644 --- a/lib/compiler/src/function.rs +++ b/lib/compiler/src/function.rs @@ -4,6 +4,7 @@ //! A `Compilation` contains the compiled function bodies for a WebAssembly //! module (`CompiledFunction`). +use crate::lib::std::collections::HashMap; use crate::lib::std::vec::Vec; use crate::section::{CustomSection, SectionIndex}; use crate::trap::TrapInformation; @@ -109,6 +110,60 @@ impl Dwarf { } } +/// Trampolines section used by ARM short jump (26bits) +#[cfg_attr(feature = "enable-serde", derive(Deserialize, Serialize))] +#[cfg_attr( + feature = "enable-rkyv", + derive(RkyvSerialize, RkyvDeserialize, Archive) +)] +#[derive(Debug, PartialEq, Eq, Clone, MemoryUsage)] +pub struct Trampolines { + /// SectionIndex for the actual Trampolines code + pub trampolines: SectionIndex, + /// Number of jump slots in the section + slots: usize, + /// Slot size + size: usize, + /// Map containing already done jump + map: HashMap, +} + +impl Trampolines { + /// Creates a `Trampolines` struct with the indice for its section, and number of slots and size of slot + pub fn new(trampolines: SectionIndex, slots: usize, size: usize) -> Self { + let map = HashMap::new(); + Self { + trampolines, + slots, + size, + map, + } + } + + /// Check if an address already have a trampoline + pub fn exist(&self, address: usize) -> bool { + self.map.contains_key(&address) + } + /// Get the trampoline address for an adress + pub fn get(&self, address: usize) -> usize { + *self.map.get(&address).unwrap() + } + /// Add a new trampoline address, given the base adress of the Section. Return the address of the jump + /// The trampoline itself still have to be writen + pub fn add(&mut self, address: usize, baseaddress: usize) -> usize { + if self.map.contains_key(&address) { + *self.map.get(&address).unwrap() + } else { + let ret = self.map.len(); + if ret == self.slots { + panic!("No more slot in Trampolines"); + } + self.map.insert(address, baseaddress + ret * self.size); + baseaddress + ret * self.size + } + } +} + /// The result of compiling a WebAssembly module's functions. #[cfg_attr(feature = "enable-serde", derive(Deserialize, Serialize))] #[derive(Debug, PartialEq, Eq)] @@ -155,6 +210,9 @@ pub struct Compilation { /// Section ids corresponding to the Dwarf debug info debug: Option, + + /// Trampolines for the arch that needs it + trampolines: Option, } impl Compilation { @@ -165,6 +223,7 @@ impl Compilation { function_call_trampolines: PrimaryMap, dynamic_function_trampolines: PrimaryMap, debug: Option, + trampolines: Option, ) -> Self { Self { functions, @@ -172,6 +231,7 @@ impl Compilation { function_call_trampolines, dynamic_function_trampolines, debug, + trampolines, } } @@ -249,6 +309,11 @@ impl Compilation { pub fn get_debug(&self) -> Option { self.debug.clone() } + + /// Returns the Trampilines info. + pub fn get_trampolines(&self) -> Option { + self.trampolines.clone() + } } impl<'a> IntoIterator for &'a Compilation { diff --git a/lib/compiler/src/lib.rs b/lib/compiler/src/lib.rs index 04aadd09be4..5604b96f88a 100644 --- a/lib/compiler/src/lib.rs +++ b/lib/compiler/src/lib.rs @@ -74,7 +74,7 @@ pub use crate::error::{ }; pub use crate::function::{ Compilation, CompiledFunction, CompiledFunctionFrameInfo, CustomSections, Dwarf, FunctionBody, - Functions, + Functions, Trampolines, }; pub use crate::jump_table::{JumpTable, JumpTableOffsets}; pub use crate::module::CompileModuleInfo; diff --git a/lib/compiler/src/relocation.rs b/lib/compiler/src/relocation.rs index 7c3382719b3..8e73d8c34a2 100644 --- a/lib/compiler/src/relocation.rs +++ b/lib/compiler/src/relocation.rs @@ -50,6 +50,14 @@ pub enum RelocationKind { Arm32Call, /// Arm64 call target Arm64Call, + /// Arm64 movk/z part 0 + Arm64Movw0, + /// Arm64 movk/z part 1 + Arm64Movw1, + /// Arm64 movk/z part 2 + Arm64Movw2, + /// Arm64 movk/z part 3 + Arm64Movw3, // /// RISC-V call target // RiscvCall, /// Elf x86_64 32 bit signed PC relative offset to two GOT entries for GD symbol. @@ -72,6 +80,10 @@ impl fmt::Display for RelocationKind { Self::X86CallPLTRel4 => write!(f, "CallPLTRel4"), Self::X86GOTPCRel4 => write!(f, "GOTPCRel4"), Self::Arm32Call | Self::Arm64Call => write!(f, "Call"), + Self::Arm64Movw0 => write!(f, "Arm64MovwG0"), + Self::Arm64Movw1 => write!(f, "Arm64MovwG1"), + Self::Arm64Movw2 => write!(f, "Arm64MovwG2"), + Self::Arm64Movw3 => write!(f, "Arm64MovwG3"), Self::ElfX86_64TlsGd => write!(f, "ElfX86_64TlsGd"), // Self::MachOX86_64Tlv => write!(f, "MachOX86_64Tlv"), } @@ -121,7 +133,11 @@ impl Relocation { /// The function returns the relocation address and the delta. pub fn for_address(&self, start: usize, target_func_address: u64) -> (usize, u64) { match self.kind { - RelocationKind::Abs8 => { + RelocationKind::Abs8 + | RelocationKind::Arm64Movw0 + | RelocationKind::Arm64Movw1 + | RelocationKind::Arm64Movw2 + | RelocationKind::Arm64Movw3 => { let reloc_address = start + self.offset as usize; let reloc_addend = self.addend as isize; let reloc_abs = target_func_address diff --git a/lib/engine-universal/src/artifact.rs b/lib/engine-universal/src/artifact.rs index de6e6cc24fb..ddc38adcf45 100644 --- a/lib/engine-universal/src/artifact.rs +++ b/lib/engine-universal/src/artifact.rs @@ -122,6 +122,7 @@ impl UniversalArtifact { custom_sections: compilation.get_custom_sections(), custom_section_relocations: compilation.get_custom_section_relocations(), debug: compilation.get_debug(), + trampolines: compilation.get_trampolines(), }; let serializable = SerializableModule { compilation: serializable_compilation, @@ -187,6 +188,8 @@ impl UniversalArtifact { &serializable.compilation.custom_sections, )?; + let mut trampolines = serializable.compilation.trampolines.clone(); + link_module( &serializable.compile_info.module, &finished_functions, @@ -194,6 +197,7 @@ impl UniversalArtifact { serializable.compilation.function_relocations.clone(), &custom_sections, &serializable.compilation.custom_section_relocations, + &mut trampolines, ); // Compute indices into the shared signature table. @@ -221,11 +225,28 @@ impl UniversalArtifact { } None => None, }; + + //let trampolines_section = match &serializable.compilation.trampolines { + // Some(trampolines) => { + // let trampolines_section_size = serializable.compilation.custom_sections + // [trampolines.trampolines] + // .bytes + // .len(); + // let trampolines_section_pointer = custom_sections[trampolines.trampolines]; + // Some(unsafe { + // std::slice::from_raw_parts(*trampolines_section_pointer, trampolines_section_size) + // }) + // } + // None => None, + //}; + // Make all code compiled thus far executable. inner_engine.publish_compiled_code(); inner_engine.publish_eh_frame(eh_frame)?; + //inner_engine.publish_trampolines(trampolines_section)?; + let finished_function_lengths = finished_functions .values() .map(|extent| extent.length) diff --git a/lib/engine-universal/src/engine.rs b/lib/engine-universal/src/engine.rs index ecc34931212..78c4ad20fda 100644 --- a/lib/engine-universal/src/engine.rs +++ b/lib/engine-universal/src/engine.rs @@ -307,6 +307,19 @@ impl UniversalEngineInner { Ok(()) } + /// Make Trampolines code executable + //pub(crate) fn publish_trampolines(&mut self, trampolines_code: Option<&[u8]>) -> Result<(), CompileError> { + // self.code_memory + // .last_mut() + // .unwrap() + // .unwind_registry_mut() + // .publish(trampolines_code) + // .map_err(|e| { + // CompileError::Resource(format!("Error while publishing the trampolines code: {}", e)) + // })?; + // Ok(()) + //} + /// Shared signature registry. pub fn signatures(&self) -> &SignatureRegistry { &self.signatures diff --git a/lib/engine-universal/src/link.rs b/lib/engine-universal/src/link.rs index 35fe319073a..d7fd35561e7 100644 --- a/lib/engine-universal/src/link.rs +++ b/lib/engine-universal/src/link.rs @@ -3,19 +3,34 @@ use std::ptr::{read_unaligned, write_unaligned}; use wasmer_compiler::{ JumpTable, JumpTableOffsets, Relocation, RelocationKind, RelocationTarget, Relocations, - SectionIndex, + SectionIndex, Trampolines, }; use wasmer_engine::FunctionExtent; use wasmer_types::entity::{EntityRef, PrimaryMap}; use wasmer_types::{LocalFunctionIndex, ModuleInfo}; use wasmer_vm::SectionBodyPtr; +fn use_trampoline( + address: usize, + allocated_sections: &PrimaryMap, + trampolines: &mut Option, +) -> Option { + match trampolines { + Some(trampolines) => Some(trampolines.add( + address, + *allocated_sections[trampolines.trampolines] as usize, + )), + _ => None, + } +} + fn apply_relocation( body: usize, r: &Relocation, allocated_functions: &PrimaryMap, jt_offsets: &PrimaryMap, allocated_sections: &PrimaryMap, + trampolines: &mut Option, ) { let target_func_address: usize = match r.reloc_target { RelocationTarget::LocalFunc(index) => *allocated_functions[index].ptr as usize, @@ -54,11 +69,51 @@ fn apply_relocation( }, RelocationKind::X86PCRelRodata4 => {} RelocationKind::Arm64Call => unsafe { - let (reloc_address, reloc_delta) = r.for_address(body, target_func_address as u64); + let (reloc_address, mut reloc_delta) = r.for_address(body, target_func_address as u64); + if reloc_delta as i64 >= 0x1000_0000 || -(reloc_delta as i64) >= 0x1000_0000 { + let new_address = + match use_trampoline(target_func_address, allocated_sections, trampolines) { + Some(new_address) => new_address, + _ => panic!( + "Relocation to big for {:?} for {:?} with {:x}, current val {:x}", + r.kind, + r.reloc_target, + reloc_delta, + read_unaligned(reloc_address as *mut u32) + ), + }; + write_unaligned((new_address + 8) as *mut u64, target_func_address as u64); // write the jump address + let (_, new_delta) = r.for_address(body, new_address as u64); + reloc_delta = new_delta; + } let reloc_delta = (((reloc_delta / 4) as u32) & 0x3ff_ffff) | read_unaligned(reloc_address as *mut u32); write_unaligned(reloc_address as *mut u32, reloc_delta); }, + RelocationKind::Arm64Movw0 => unsafe { + let (reloc_address, reloc_delta) = r.for_address(body, target_func_address as u64); + let reloc_delta = + (((reloc_delta & 0xffff) as u32) << 5) | read_unaligned(reloc_address as *mut u32); + write_unaligned(reloc_address as *mut u32, reloc_delta); + }, + RelocationKind::Arm64Movw1 => unsafe { + let (reloc_address, reloc_delta) = r.for_address(body, target_func_address as u64); + let reloc_delta = ((((reloc_delta >> 16) & 0xffff) as u32) << 5) + | read_unaligned(reloc_address as *mut u32); + write_unaligned(reloc_address as *mut u32, reloc_delta); + }, + RelocationKind::Arm64Movw2 => unsafe { + let (reloc_address, reloc_delta) = r.for_address(body, target_func_address as u64); + let reloc_delta = ((((reloc_delta >> 32) & 0xffff) as u32) << 5) + | read_unaligned(reloc_address as *mut u32); + write_unaligned(reloc_address as *mut u32, reloc_delta); + }, + RelocationKind::Arm64Movw3 => unsafe { + let (reloc_address, reloc_delta) = r.for_address(body, target_func_address as u64); + let reloc_delta = ((((reloc_delta >> 48) & 0xffff) as u32) << 5) + | read_unaligned(reloc_address as *mut u32); + write_unaligned(reloc_address as *mut u32, reloc_delta); + }, kind => panic!( "Relocation kind unsupported in the current architecture {}", kind @@ -75,17 +130,32 @@ pub fn link_module( function_relocations: Relocations, allocated_sections: &PrimaryMap, section_relocations: &PrimaryMap>, + trampolines: &mut Option, ) { for (i, section_relocs) in section_relocations.iter() { let body = *allocated_sections[i] as usize; for r in section_relocs { - apply_relocation(body, r, allocated_functions, jt_offsets, allocated_sections); + apply_relocation( + body, + r, + allocated_functions, + jt_offsets, + allocated_sections, + trampolines, + ); } } for (i, function_relocs) in function_relocations.iter() { let body = *allocated_functions[i].ptr as usize; for r in function_relocs { - apply_relocation(body, r, allocated_functions, jt_offsets, allocated_sections); + apply_relocation( + body, + r, + allocated_functions, + jt_offsets, + allocated_sections, + trampolines, + ); } } } diff --git a/lib/engine-universal/src/serialize.rs b/lib/engine-universal/src/serialize.rs index cc09de9110e..33986298c77 100644 --- a/lib/engine-universal/src/serialize.rs +++ b/lib/engine-universal/src/serialize.rs @@ -8,7 +8,7 @@ use rkyv::{ }; use wasmer_compiler::{ CompileModuleInfo, CompiledFunctionFrameInfo, CustomSection, Dwarf, FunctionBody, - JumpTableOffsets, Relocation, SectionIndex, + JumpTableOffsets, Relocation, SectionIndex, Trampolines, }; use wasmer_engine::{DeserializeError, SerializeError}; use wasmer_types::entity::PrimaryMap; @@ -27,6 +27,8 @@ pub struct SerializableCompilation { pub custom_section_relocations: PrimaryMap>, // The section indices corresponding to the Dwarf debug info pub debug: Option, + // the Trampoline for Arm arch + pub trampolines: Option, } /// Serializable struct that is able to serialize from and to From 44eef4964abd05dccd6f2d722c372dcf16bb32ed Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Tue, 5 Oct 2021 14:19:08 +0200 Subject: [PATCH 06/10] feat(compiler) Fixed single-pass build --- lib/compiler-singlepass/src/compiler.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/compiler-singlepass/src/compiler.rs b/lib/compiler-singlepass/src/compiler.rs index f47ac7019c2..37feb7c8e63 100644 --- a/lib/compiler-singlepass/src/compiler.rs +++ b/lib/compiler-singlepass/src/compiler.rs @@ -167,6 +167,7 @@ impl Compiler for SinglepassCompiler { function_call_trampolines, dynamic_function_trampolines, None, + None, )) } } From 9cf36055f29770410f3fe54ce5f3fda3f25a5e80 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Thu, 7 Oct 2021 11:24:32 +0200 Subject: [PATCH 07/10] feat(compiler) Don't try to use macOS Aarch64 specific ABI for now (at least coremark works now) --- lib/compiler-llvm/src/config.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/compiler-llvm/src/config.rs b/lib/compiler-llvm/src/config.rs index 2f13b91e774..e62e4c911fc 100644 --- a/lib/compiler-llvm/src/config.rs +++ b/lib/compiler-llvm/src/config.rs @@ -102,14 +102,9 @@ impl LLVM { fn target_triple(&self, target: &Target) -> TargetTriple { // Hack: we're using is_pic to determine whether this is a native // build or not. - let binary_format = if self.is_pic { - target.triple().binary_format - } else { - target_lexicon::BinaryFormat::Elf - }; let operating_system = if target.triple().operating_system - == wasmer_compiler::OperatingSystem::Darwin - && !self.is_pic + == wasmer_compiler::OperatingSystem::Darwin + && !self.is_pic { // LLVM detects static relocation + darwin + 64-bit and // force-enables PIC because MachO doesn't support that @@ -119,12 +114,17 @@ impl LLVM { // Since both linux and darwin use SysV ABI, this should work. // but not in the case of Aarch64, there the ABI is slightly different match target.triple().architecture { - Architecture::Aarch64(_) => target.triple().operating_system, + //Architecture::Aarch64(_) => wasmer_compiler::OperatingSystem::Darwin, _ => wasmer_compiler::OperatingSystem::Linux, } } else { target.triple().operating_system }; + let binary_format = if self.is_pic { + target.triple().binary_format + } else { + target_lexicon::BinaryFormat::Elf + }; let triple = Triple { architecture: target.triple().architecture, vendor: target.triple().vendor.clone(), From 204238c9c98be7747e92e917c127ecddcf83c6e7 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Thu, 7 Oct 2021 11:27:08 +0200 Subject: [PATCH 08/10] feat(compiler) Fixed linting --- lib/compiler-llvm/src/config.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/compiler-llvm/src/config.rs b/lib/compiler-llvm/src/config.rs index e62e4c911fc..28a318a309f 100644 --- a/lib/compiler-llvm/src/config.rs +++ b/lib/compiler-llvm/src/config.rs @@ -103,8 +103,8 @@ impl LLVM { // Hack: we're using is_pic to determine whether this is a native // build or not. let operating_system = if target.triple().operating_system - == wasmer_compiler::OperatingSystem::Darwin - && !self.is_pic + == wasmer_compiler::OperatingSystem::Darwin + && !self.is_pic { // LLVM detects static relocation + darwin + 64-bit and // force-enables PIC because MachO doesn't support that From 0c6010ca34cfb330ba4e570baba2118e9f0a7f38 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 8 Oct 2021 16:18:44 +0200 Subject: [PATCH 09/10] feat(compiler) Use x17 as scratch instead of x16 on Aarch64 to help with Apple ABI --- lib/compiler-llvm/src/compiler.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/compiler-llvm/src/compiler.rs b/lib/compiler-llvm/src/compiler.rs index 4ddb4abc763..f88da132c82 100644 --- a/lib/compiler-llvm/src/compiler.rs +++ b/lib/compiler-llvm/src/compiler.rs @@ -312,12 +312,13 @@ impl Compiler for LLVMCompiler { 16, )); // We create a jump to an absolute 64bits address - // using x16 as a scratch register, SystemV declare both x16 and x17 as Intra-Procedural scratch register - // LDR x16, #8 50 00 00 58 - // BR x16 00 02 1f d6 + // using x17 as a scratch register, SystemV declare both x16 and x17 as Intra-Procedural scratch register + // but Apple ask to just not use x16 + // LDR x17, #8 51 00 00 58 + // BR x17 20 02 1f d6 // JMPADDR 00 00 00 00 00 00 00 00 let onejump = vec![ - 0x50, 0x00, 0x00, 0x58, 0x00, 0x02, 0x1f, 0xd6, 0, 0, 0, 0, 0, 0, 0, 0, + 0x51, 0x00, 0x00, 0x58, 0x20, 0x02, 0x1f, 0xd6, 0, 0, 0, 0, 0, 0, 0, 0, ]; let mut alljmps = Vec::::new(); for _ in 0..nj { From 7965180b3655780f1dda13deb5632eea22830010 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 8 Oct 2021 17:26:15 +0200 Subject: [PATCH 10/10] feat(compiler) Added CHANGELOG note about Linux/Aarch64 Universal engine working --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb779357c8c..375475aa92d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ Looking for changes that affect our C API? See the [C API Changelog](lib/c-api/C - [#2478](https://github.com/wasmerio/wasmer/pull/2478) Rename `wasm_instance_new()`’s “traps” argument to “trap”. ### Fixed +- [#2599](https://github.com/wasmerio/wasmer/pull/2599) Fixed Universal engine for Linux/Aarch64 target. - [#2587](https://github.com/wasmerio/wasmer/pull/2587) Fixed deriving `WasmerEnv` when aliasing `Result`. - [#2518](https://github.com/wasmerio/wasmer/pull/2518) Remove temporary file used to creating an artifact when creating a Dylib engine artifact. - [#2494](https://github.com/wasmerio/wasmer/pull/2494) Fixed `WasmerEnv` access when using `call_indirect` with the Singlepass compiler.